Merge branch 'main' into tts-stt

2025-11-17 14:33:41 +08:00 · 2024-10-11 19:20:30 +08:00
parent 76faff03ff c5074f0aa4
commit 8ec4df4b25
94 changed files with 3477 additions and 647 deletions
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -13,7 +13,6 @@ import {
  LLMApi,
  LLMModel,
  SpeechOptions,
-  TranscriptionOptions,
  MultimodalContent,
 } from "../api";
 import Locale from "../../locales";
@@ -24,6 +23,7 @@ import {
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
+import { fetch } from "@/app/utils/stream";

 export interface OpenAIListModelResponse {
  object: string;
@@ -88,9 +88,6 @@ export class QwenApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
@@ -182,6 +179,7 @@ export class QwenApi implements LLMApi {
        controller.signal.onabort = finish;

        fetchEventSource(chatPath, {
+          fetch: fetch as any,
          ...chatPayload,
          async onopen(res) {
            clearTimeout(requestTimeoutId);
--- a/app/client/platforms/anthropic.ts
+++ b/app/client/platforms/anthropic.ts
@@ -1,12 +1,5 @@
-import { ACCESS_CODE_PREFIX, Anthropic, ApiPath } from "@/app/constant";
-import {
-  ChatOptions,
-  getHeaders,
-  LLMApi,
-  MultimodalContent,
-  SpeechOptions,
-  TranscriptionOptions,
-} from "../api";
+import { Anthropic, ApiPath } from "@/app/constant";
+import { ChatOptions, getHeaders, LLMApi, SpeechOptions } from "../api";
 import {
  useAccessStore,
  useAppConfig,
@@ -15,14 +8,7 @@ import {
  ChatMessageTool,
 } from "@/app/store";
 import { getClientConfig } from "@/app/config/client";
-import { DEFAULT_API_HOST } from "@/app/constant";
-import {
-  EventStreamContentType,
-  fetchEventSource,
-} from "@fortaine/fetch-event-source";
-
-import Locale from "../../locales";
-import { prettyObject } from "@/app/utils/format";
+import { ANTHROPIC_BASE_URL } from "@/app/constant";
 import { getMessageTextContent, isVisionModel } from "@/app/utils";
 import { preProcessImageContent, stream } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
@@ -90,9 +76,6 @@ export class ClaudeApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  extractMessage(res: any) {
    console.log("[Response] claude response: ", res);
@@ -405,9 +388,7 @@ export class ClaudeApi implements LLMApi {
    if (baseUrl.trim().length === 0) {
      const isApp = !!getClientConfig()?.isApp;

-      baseUrl = isApp
-        ? DEFAULT_API_HOST + "/api/proxy/anthropic"
-        : ApiPath.Anthropic;
+      baseUrl = isApp ? ANTHROPIC_BASE_URL : ApiPath.Anthropic;
    }

    if (!baseUrl.startsWith("http") && !baseUrl.startsWith("/api")) {
--- a/app/client/platforms/baidu.ts
+++ b/app/client/platforms/baidu.ts
@@ -15,7 +15,6 @@ import {
  LLMModel,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -25,6 +24,7 @@ import {
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
+import { fetch } from "@/app/utils/stream";

 export interface OpenAIListModelResponse {
  object: string;
@@ -80,9 +80,6 @@ export class ErnieApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
@@ -201,6 +198,7 @@ export class ErnieApi implements LLMApi {
        controller.signal.onabort = finish;

        fetchEventSource(chatPath, {
+          fetch: fetch as any,
          ...chatPayload,
          async onopen(res) {
            clearTimeout(requestTimeoutId);
--- a/app/client/platforms/bytedance.ts
+++ b/app/client/platforms/bytedance.ts
@@ -14,7 +14,6 @@ import {
  LLMModel,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -24,6 +23,7 @@ import {
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
+import { fetch } from "@/app/utils/stream";

 export interface OpenAIListModelResponse {
  object: string;
@@ -82,9 +82,6 @@ export class DoubaoApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
@@ -169,6 +166,7 @@ export class DoubaoApi implements LLMApi {
        controller.signal.onabort = finish;

        fetchEventSource(chatPath, {
+          fetch: fetch as any,
          ...chatPayload,
          async onopen(res) {
            clearTimeout(requestTimeoutId);
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -6,23 +6,27 @@ import {
  LLMModel,
  LLMUsage,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
-import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
-import { getClientConfig } from "@/app/config/client";
-import { DEFAULT_API_HOST } from "@/app/constant";
-import Locale from "../../locales";
 import {
-  EventStreamContentType,
-  fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
+  useAccessStore,
+  useAppConfig,
+  useChatStore,
+  usePluginStore,
+  ChatMessageTool,
+} from "@/app/store";
+import { stream } from "@/app/utils/chat";
+import { getClientConfig } from "@/app/config/client";
+import { GEMINI_BASE_URL } from "@/app/constant";
+
 import {
  getMessageTextContent,
  getMessageImages,
  isVisionModel,
 } from "@/app/utils";
 import { preProcessImageContent } from "@/app/utils/chat";
+import { nanoid } from "nanoid";
+import { RequestPayload } from "./openai";
+import { fetch } from "@/app/utils/stream";

 export class GeminiProApi implements LLMApi {
  path(path: string): string {
@@ -35,7 +39,7 @@ export class GeminiProApi implements LLMApi {

    const isApp = !!getClientConfig()?.isApp;
    if (baseUrl.length === 0) {
-      baseUrl = isApp ? DEFAULT_API_HOST + `/api/proxy/google` : ApiPath.Google;
+      baseUrl = isApp ? GEMINI_BASE_URL : ApiPath.Google;
    }
    if (baseUrl.endsWith("/")) {
      baseUrl = baseUrl.slice(0, baseUrl.length - 1);
@@ -49,10 +53,6 @@ export class GeminiProApi implements LLMApi {
    let chatPath = [baseUrl, path].join("/");

    chatPath += chatPath.includes("?") ? "&alt=sse" : "?alt=sse";
-    // if chatPath.startsWith('http') then add key in query string
-    if (chatPath.startsWith("http") && accessStore.googleApiKey) {
-      chatPath += `&key=${accessStore.googleApiKey}`;
-    }
    return chatPath;
  }
  extractMessage(res: any) {
@@ -67,9 +67,7 @@ export class GeminiProApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }
+
  async chat(options: ChatOptions): Promise<void> {
    const apiClient = this;
    let multimodal = false;
@@ -184,114 +182,81 @@ export class GeminiProApi implements LLMApi {
      );

      if (shouldStream) {
-        let responseText = "";
-        let remainText = "";
-        let finished = false;
+        const [tools, funcs] = usePluginStore
+          .getState()
+          .getAsTools(
+            useChatStore.getState().currentSession().mask?.plugin || [],
+          );
+        return stream(
+          chatPath,
+          requestPayload,
+          getHeaders(),
+          // @ts-ignore
+          [{ functionDeclarations: tools.map((tool) => tool.function) }],
+          funcs,
+          controller,
+          // parseSSE
+          (text: string, runTools: ChatMessageTool[]) => {
+            // console.log("parseSSE", text, runTools);
+            const chunkJson = JSON.parse(text);

-        const finish = () => {
-          if (!finished) {
-            finished = true;
-            options.onFinish(responseText + remainText);
-          }
-        };
-
-        // animate response to make it looks smooth
-        function animateResponseText() {
-          if (finished || controller.signal.aborted) {
-            responseText += remainText;
-            finish();
-            return;
-          }
-
-          if (remainText.length > 0) {
-            const fetchCount = Math.max(1, Math.round(remainText.length / 60));
-            const fetchText = remainText.slice(0, fetchCount);
-            responseText += fetchText;
-            remainText = remainText.slice(fetchCount);
-            options.onUpdate?.(responseText, fetchText);
-          }
-
-          requestAnimationFrame(animateResponseText);
-        }
-
-        // start animaion
-        animateResponseText();
-
-        controller.signal.onabort = finish;
-
-        fetchEventSource(chatPath, {
-          ...chatPayload,
-          async onopen(res) {
-            clearTimeout(requestTimeoutId);
-            const contentType = res.headers.get("content-type");
-            console.log(
-              "[Gemini] request response content type: ",
-              contentType,
+            const functionCall = chunkJson?.candidates
+              ?.at(0)
+              ?.content.parts.at(0)?.functionCall;
+            if (functionCall) {
+              const { name, args } = functionCall;
+              runTools.push({
+                id: nanoid(),
+                type: "function",
+                function: {
+                  name,
+                  arguments: JSON.stringify(args), // utils.chat call function, using JSON.parse
+                },
+              });
+            }
+            return chunkJson?.candidates?.at(0)?.content.parts.at(0)?.text;
+          },
+          // processToolMessage, include tool_calls message and tool call results
+          (
+            requestPayload: RequestPayload,
+            toolCallMessage: any,
+            toolCallResult: any[],
+          ) => {
+            // @ts-ignore
+            requestPayload?.contents?.splice(
+              // @ts-ignore
+              requestPayload?.contents?.length,
+              0,
+              {
+                role: "model",
+                parts: toolCallMessage.tool_calls.map(
+                  (tool: ChatMessageTool) => ({
+                    functionCall: {
+                      name: tool?.function?.name,
+                      args: JSON.parse(tool?.function?.arguments as string),
+                    },
+                  }),
+                ),
+              },
+              // @ts-ignore
+              ...toolCallResult.map((result) => ({
+                role: "function",
+                parts: [
+                  {
+                    functionResponse: {
+                      name: result.name,
+                      response: {
+                        name: result.name,
+                        content: result.content, // TODO just text content...
+                      },
+                    },
+                  },
+                ],
+              })),
            );
-
-            if (contentType?.startsWith("text/plain")) {
-              responseText = await res.clone().text();
-              return finish();
-            }
-
-            if (
-              !res.ok ||
-              !res.headers
-                .get("content-type")
-                ?.startsWith(EventStreamContentType) ||
-              res.status !== 200
-            ) {
-              const responseTexts = [responseText];
-              let extraInfo = await res.clone().text();
-              try {
-                const resJson = await res.clone().json();
-                extraInfo = prettyObject(resJson);
-              } catch {}
-
-              if (res.status === 401) {
-                responseTexts.push(Locale.Error.Unauthorized);
-              }
-
-              if (extraInfo) {
-                responseTexts.push(extraInfo);
-              }
-
-              responseText = responseTexts.join("\n\n");
-
-              return finish();
-            }
          },
-          onmessage(msg) {
-            if (msg.data === "[DONE]" || finished) {
-              return finish();
-            }
-            const text = msg.data;
-            try {
-              const json = JSON.parse(text);
-              const delta = apiClient.extractMessage(json);
-
-              if (delta) {
-                remainText += delta;
-              }
-
-              const blockReason = json?.promptFeedback?.blockReason;
-              if (blockReason) {
-                // being blocked
-                console.log(`[Google] [Safety Ratings] result:`, blockReason);
-              }
-            } catch (e) {
-              console.error("[Request] parse error", text, msg);
-            }
-          },
-          onclose() {
-            finish();
-          },
-          onerror(e) {
-            options.onError?.(e);
-            throw e;
-          },
-          openWhenHidden: true,
-        });
+          options,
+        );
      } else {
        const res = await fetch(chatPath, chatPayload);
        clearTimeout(requestTimeoutId);
--- a/app/client/platforms/iflytek.ts
+++ b/app/client/platforms/iflytek.ts
@@ -1,7 +1,7 @@
 "use client";
 import {
  ApiPath,
-  DEFAULT_API_HOST,
+  IFLYTEK_BASE_URL,
  Iflytek,
  REQUEST_TIMEOUT_MS,
 } from "@/app/constant";
@@ -13,7 +13,6 @@ import {
  LLMApi,
  LLMModel,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -23,8 +22,9 @@ import {
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
+import { fetch } from "@/app/utils/stream";

-import { OpenAIListModelResponse, RequestPayload } from "./openai";
+import { RequestPayload } from "./openai";

 export class SparkApi implements LLMApi {
  private disableListModels = true;
@@ -41,7 +41,7 @@ export class SparkApi implements LLMApi {
    if (baseUrl.length === 0) {
      const isApp = !!getClientConfig()?.isApp;
      const apiPath = ApiPath.Iflytek;
-      baseUrl = isApp ? DEFAULT_API_HOST + "/proxy" + apiPath : apiPath;
+      baseUrl = isApp ? IFLYTEK_BASE_URL : apiPath;
    }

    if (baseUrl.endsWith("/")) {
@@ -63,9 +63,6 @@ export class SparkApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
@@ -153,6 +150,7 @@ export class SparkApi implements LLMApi {
        controller.signal.onabort = finish;

        fetchEventSource(chatPath, {
+          fetch: fetch as any,
          ...chatPayload,
          async onopen(res) {
            clearTimeout(requestTimeoutId);
--- a/app/client/platforms/moonshot.ts
+++ b/app/client/platforms/moonshot.ts
@@ -2,11 +2,9 @@
 // azure and openai, using same models. so using same LLMApi.
 import {
  ApiPath,
-  DEFAULT_API_HOST,
-  DEFAULT_MODELS,
+  MOONSHOT_BASE_URL,
  Moonshot,
  REQUEST_TIMEOUT_MS,
-  ServiceProvider,
 } from "@/app/constant";
 import {
  useAccessStore,
@@ -15,30 +13,17 @@ import {
  ChatMessageTool,
  usePluginStore,
 } from "@/app/store";
-import { collectModelsWithDefaultModel } from "@/app/utils/model";
-import { preProcessImageContent, stream } from "@/app/utils/chat";
-import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
-
+import { stream } from "@/app/utils/chat";
 import {
  ChatOptions,
  getHeaders,
  LLMApi,
  LLMModel,
-  LLMUsage,
-  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
-import Locale from "../../locales";
-import {
-  EventStreamContentType,
-  fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
-
-import { OpenAIListModelResponse, RequestPayload } from "./openai";
+import { RequestPayload } from "./openai";

 export class MoonshotApi implements LLMApi {
  private disableListModels = true;
@@ -55,7 +40,7 @@ export class MoonshotApi implements LLMApi {
    if (baseUrl.length === 0) {
      const isApp = !!getClientConfig()?.isApp;
      const apiPath = ApiPath.Moonshot;
-      baseUrl = isApp ? DEFAULT_API_HOST + "/proxy" + apiPath : apiPath;
+      baseUrl = isApp ? MOONSHOT_BASE_URL : apiPath;
    }

    if (baseUrl.endsWith("/")) {
@@ -77,9 +62,6 @@ export class MoonshotApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -2,7 +2,7 @@
 // azure and openai, using same models. so using same LLMApi.
 import {
  ApiPath,
-  DEFAULT_API_HOST,
+  OPENAI_BASE_URL,
  DEFAULT_MODELS,
  OpenaiPath,
  Azure,
@@ -34,18 +34,11 @@ import {
  LLMUsage,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
-import {
-  EventStreamContentType,
-  fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import {
  getMessageTextContent,
-  getMessageImages,
  isVisionModel,
  isDalle3 as _isDalle3,
 } from "@/app/utils";
@@ -86,7 +79,7 @@ export interface DalleRequestPayload {
 export class ChatGPTApi implements LLMApi {
  private disableListModels = true;

-  path(path: string, model?: string): string {
+  path(path: string): string {
    const accessStore = useAccessStore.getState();

    let baseUrl = "";
@@ -105,7 +98,7 @@ export class ChatGPTApi implements LLMApi {
    if (baseUrl.length === 0) {
      const isApp = !!getClientConfig()?.isApp;
      const apiPath = isAzure ? ApiPath.Azure : ApiPath.OpenAI;
-      baseUrl = isApp ? DEFAULT_API_HOST + "/proxy" + apiPath : apiPath;
+      baseUrl = isApp ? OPENAI_BASE_URL : apiPath;
    }

    if (baseUrl.endsWith("/")) {
@@ -164,7 +157,7 @@ export class ChatGPTApi implements LLMApi {
    options.onController?.(controller);

    try {
-      const speechPath = this.path(OpenaiPath.SpeechPath, options.model);
+      const speechPath = this.path(OpenaiPath.SpeechPath);
      const speechPayload = {
        method: "POST",
        body: JSON.stringify(requestPayload),
@@ -325,6 +318,7 @@ export class ChatGPTApi implements LLMApi {
        );
      }
      if (shouldStream) {
+        let index = -1;
        const [tools, funcs] = usePluginStore
          .getState()
          .getAsTools(
@@ -350,10 +344,10 @@ export class ChatGPTApi implements LLMApi {
            }>;
            const tool_calls = choices[0]?.delta?.tool_calls;
            if (tool_calls?.length > 0) {
-              const index = tool_calls[0]?.index;
              const id = tool_calls[0]?.id;
              const args = tool_calls[0]?.function?.arguments;
              if (id) {
+                index += 1;
                runTools.push({
                  id,
                  type: tool_calls[0]?.type,
@@ -375,6 +369,8 @@ export class ChatGPTApi implements LLMApi {
            toolCallMessage: any,
            toolCallResult: any[],
          ) => {
+            // reset index value
+            index = -1;
            // @ts-ignore
            requestPayload?.messages?.splice(
              // @ts-ignore
--- a/app/client/platforms/tencent.ts
+++ b/app/client/platforms/tencent.ts
@@ -1,5 +1,5 @@
 "use client";
-import { ApiPath, DEFAULT_API_HOST, REQUEST_TIMEOUT_MS } from "@/app/constant";
+import { ApiPath, TENCENT_BASE_URL, REQUEST_TIMEOUT_MS } from "@/app/constant";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";

 import {
@@ -9,7 +9,6 @@ import {
  LLMModel,
  MultimodalContent,
  SpeechOptions,
-  TranscriptionOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -23,6 +22,7 @@ import mapKeys from "lodash-es/mapKeys";
 import mapValues from "lodash-es/mapValues";
 import isArray from "lodash-es/isArray";
 import isObject from "lodash-es/isObject";
+import { fetch } from "@/app/utils/stream";

 export interface OpenAIListModelResponse {
  object: string;
@@ -71,9 +71,7 @@ export class HunyuanApi implements LLMApi {

    if (baseUrl.length === 0) {
      const isApp = !!getClientConfig()?.isApp;
-      baseUrl = isApp
-        ? DEFAULT_API_HOST + "/api/proxy/tencent"
-        : ApiPath.Tencent;
+      baseUrl = isApp ? TENCENT_BASE_URL : ApiPath.Tencent;
    }

    if (baseUrl.endsWith("/")) {
@@ -94,9 +92,6 @@ export class HunyuanApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
-  transcription(options: TranscriptionOptions): Promise<string> {
-    throw new Error("Method not implemented.");
-  }

  async chat(options: ChatOptions) {
    const visionModel = isVisionModel(options.config.model);
@@ -183,6 +178,7 @@ export class HunyuanApi implements LLMApi {
        controller.signal.onabort = finish;

        fetchEventSource(chatPath, {
+          fetch: fetch as any,
          ...chatPayload,
          async onopen(res) {
            clearTimeout(requestTimeoutId);