Merge b05b2e78cd into f7cde17919

Merge pull request #6292 from Little-LittleProgrammer/feature/alibaba-omni-support
feat(alibaba): Added alibaba vision model and omni model support
2025-11-01 14:53:43 +08:00 · 2025-03-06 14:50:50 +01:00 · 2025-03-01 10:25:16 +08:00 · 2025-03-01 10:24:38 +08:00 · 2025-02-28 19:48:09 +08:00 · 2025-02-28 10:47:52 +05:00
5 changed files with 64 additions and 20 deletions
--- a/README.md
+++ b/README.md
@@ -22,7 +22,6 @@ English / [简体中文](./README_CN.md)
 [![MacOS][MacOS-image]][download-url]
 [![Linux][Linux-image]][download-url]

-[NextChatAI](https://nextchat.dev/chat?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) 
 [NextChatAI](https://nextchat.club?utm_source=readme) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) / [Discord](https://discord.gg/YCkeafCafC) / [Enterprise Edition](#enterprise-edition) / [Twitter](https://twitter.com/NextChatDev)


@@ -130,7 +129,7 @@ For enterprise inquiries, please contact: **business@nextchat.dev**
 - 🚀 v2.15.8 Now supports Realtime Chat [#5672](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5672)
 - 🚀 v2.15.4 The Application supports using Tauri fetch LLM API, MORE SECURITY! [#5379](https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web/issues/5379)
 - 🚀 v2.15.0 Now supports Plugins! Read this: [NextChat-Awesome-Plugins](https://github.com/ChatGPTNextWeb/NextChat-Awesome-Plugins)
- 🚀 v2.14.0 Now supports  Artifacts & SD 
+- 🚀 v2.14.0 Now supports  Artifacts & SD.
 - 🚀 v2.10.1 support Google Gemini Pro model.
 - 🚀 v2.9.11 you can use azure endpoint now.
 - 🚀 v2.8 now we have a client that runs across all platforms!
@@ -338,7 +337,7 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name

 ### `DEFAULT_MODEL` （optional）

-Change default model
+Change default model.

 ### `VISION_MODELS` (optional)

@@ -369,7 +368,7 @@ Customize Stability API url.

 ### `ENABLE_MCP` (optional)

-Enable MCP（Model Context Protocol）Feature
+Enable MCP（Model Context Protocol）Feature.

 ### `SILICONFLOW_API_KEY` (optional)

--- a/app/client/api.ts
+++ b/app/client/api.ts
@@ -40,6 +40,11 @@ export interface MultimodalContent {
  };
 }

+export interface MultimodalContentForAlibaba {
+  text?: string;
+  image?: string;
+}
+
 export interface RequestMessage {
  role: MessageRole;
  content: string | MultimodalContent[];
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -7,7 +7,10 @@ import {
  ChatMessageTool,
  usePluginStore,
 } from "@/app/store";
-import { streamWithThink } from "@/app/utils/chat";
+import {
+  preProcessImageContentForAlibabaDashScope,
+  streamWithThink,
+} from "@/app/utils/chat";
 import {
  ChatOptions,
  getHeaders,
@@ -15,12 +18,14 @@ import {
  LLMModel,
  SpeechOptions,
  MultimodalContent,
+  MultimodalContentForAlibaba,
 } from "../api";
 import { getClientConfig } from "@/app/config/client";
 import {
  getMessageTextContent,
  getMessageTextContentWithoutThinking,
  getTimeoutMSByModel,
+  isVisionModel,
 } from "@/app/utils";
 import { fetch } from "@/app/utils/stream";

@@ -89,14 +94,6 @@ export class QwenApi implements LLMApi {
  }

  async chat(options: ChatOptions) {
-    const messages = options.messages.map((v) => ({
-      role: v.role,
-      content:
-        v.role === "assistant"
-          ? getMessageTextContentWithoutThinking(v)
-          : getMessageTextContent(v),
-    }));
-
    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
      ...useChatStore.getState().currentSession().mask.modelConfig,
@@ -105,6 +102,21 @@ export class QwenApi implements LLMApi {
      },
    };

+    const visionModel = isVisionModel(options.config.model);
+
+    const messages: ChatOptions["messages"] = [];
+    for (const v of options.messages) {
+      const content = (
+        visionModel
+          ? await preProcessImageContentForAlibabaDashScope(v.content)
+          : v.role === "assistant"
+          ? getMessageTextContentWithoutThinking(v)
+          : getMessageTextContent(v)
+      ) as any;
+
+      messages.push({ role: v.role, content });
+    }
+
    const shouldStream = !!options.config.stream;
    const requestPayload: RequestPayload = {
      model: modelConfig.model,
@@ -129,7 +141,7 @@ export class QwenApi implements LLMApi {
        "X-DashScope-SSE": shouldStream ? "enable" : "disable",
      };

-      const chatPath = this.path(Alibaba.ChatPath);
+      const chatPath = this.path(Alibaba.ChatPath(modelConfig.model));
      const chatPayload = {
        method: "POST",
        body: JSON.stringify(requestPayload),
@@ -162,7 +174,7 @@ export class QwenApi implements LLMApi {
            const json = JSON.parse(text);
            const choices = json.output.choices as Array<{
              message: {
-                content: string | null;
+                content: string | null | MultimodalContentForAlibaba[];
                tool_calls: ChatMessageTool[];
                reasoning_content: string | null;
              };
@@ -212,7 +224,9 @@ export class QwenApi implements LLMApi {
            } else if (content && content.length > 0) {
              return {
                isThinking: false,
-                content: content,
+                content: Array.isArray(content)
+                  ? content.map((item) => item.text).join(",")
+                  : content,
              };
            }

--- a/app/constant.ts
+++ b/app/constant.ts
@@ -221,7 +221,12 @@ export const ByteDance = {

 export const Alibaba = {
  ExampleEndpoint: ALIBABA_BASE_URL,
-  ChatPath: "v1/services/aigc/text-generation/generation",
+  ChatPath: (modelName: string) => {
+    if (modelName.includes("vl") || modelName.includes("omni")) {
+      return "v1/services/aigc/multimodal-generation/generation";
+    }
+    return `v1/services/aigc/text-generation/generation`;
+  },
 };

 export const Tencent = {
@@ -570,6 +575,9 @@ const alibabaModes = [
  "qwen-max-0403",
  "qwen-max-0107",
  "qwen-max-longcontext",
+  "qwen-omni-turbo",
+  "qwen-vl-plus",
+  "qwen-vl-max",
 ];

 const tencentModels = [
--- a/app/utils/chat.ts
+++ b/app/utils/chat.ts
@@ -3,7 +3,7 @@ import {
  UPLOAD_URL,
  REQUEST_TIMEOUT_MS,
 } from "@/app/constant";
-import { RequestMessage } from "@/app/client/api";
+import { MultimodalContent, RequestMessage } from "@/app/client/api";
 import Locale from "@/app/locales";
 import {
  EventStreamContentType,
@@ -70,8 +70,9 @@ export function compressImage(file: Blob, maxSize: number): Promise<string> {
  });
 }

-export async function preProcessImageContent(
+export async function preProcessImageContentBase(
  content: RequestMessage["content"],
+  transformImageUrl: (url: string) => Promise<{ [key: string]: any }>,
 ) {
  if (typeof content === "string") {
    return content;
@@ -81,7 +82,7 @@ export async function preProcessImageContent(
    if (part?.type == "image_url" && part?.image_url?.url) {
      try {
        const url = await cacheImageToBase64Image(part?.image_url?.url);
-        result.push({ type: part.type, image_url: { url } });
+        result.push(await transformImageUrl(url));
      } catch (error) {
        console.error("Error processing image URL:", error);
      }
@@ -92,6 +93,23 @@ export async function preProcessImageContent(
  return result;
 }

+export async function preProcessImageContent(
+  content: RequestMessage["content"],
+) {
+  return preProcessImageContentBase(content, async (url) => ({
+    type: "image_url",
+    image_url: { url },
+  })) as Promise<MultimodalContent[] | string>;
+}
+
+export async function preProcessImageContentForAlibabaDashScope(
+  content: RequestMessage["content"],
+) {
+  return preProcessImageContentBase(content, async (url) => ({
+    image: url,
+  }));
+}
+
 const imageCaches: Record<string, string> = {};
 export function cacheImageToBase64Image(imageUrl: string) {
  if (imageUrl.includes(CACHE_URL_PREFIX)) {
Author	SHA1	Message	Date
Shay Molcho	bac68466e1	Merge `b05b2e78cd` into `f7cde17919`	2025-03-06 14:50:50 +01:00
RiverRay	f7cde17919	Merge pull request #6292 from Little-LittleProgrammer/feature/alibaba-omni-support Some checks failed Run Tests / test (push) Has been cancelled Details feat(alibaba): Added alibaba vision model and omni model support	2025-03-01 10:25:16 +08:00
RiverRay	570cbb34b6	Merge pull request #6310 from agi-dude/patch-1 Remove duplicate links	2025-03-01 10:24:38 +08:00
RiverRay	7aa9ae0a3e	Merge pull request #6311 from ChatGPTNextWeb/6305-bugthe-first-message-except-the-system-message-of-deepseek-reasoner-must-be-a-user-message-but-an-assistant-message-detected Some checks are pending Run Tests / test (push) Waiting to run Details fix: enforce that the first message (excluding system messages) is a …	2025-02-28 19:48:09 +08:00
Mr. AGI	ad6666eeaf	Update README.md	2025-02-28 10:47:52 +05:00
EvanWu	a2c4e468a0	fix(app/utils/chat.ts): fix type error	2025-02-26 19:58:32 +08:00
EvanWu	0a25a1a8cb	refacto(app/utils/chat.ts)r: optimize function preProcessImageContentBase	2025-02-25 09:22:47 +08:00
EvanWu	b709ee3983	feat(alibaba): Added alibaba vision model and omni model support	2025-02-24 20:18:07 +08:00
Shay Molcho	b05b2e78cd	Added missing periods (.) in multiple places This commit adds missing periods (.) in several places to ensure consistency and improve readability in the code and documentation	2025-01-22 20:23:51 +02:00