Merge remote-tracking branch 'upstream/main'

2025-11-14 05:03:43 +08:00 · 2024-03-08 17:35:13 +08:00
parent 91c683b2c0 5529ece220
commit 8fbcc653a0
35 changed files with 1254 additions and 486 deletions
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -1,9 +1,4 @@
-import {
-  ApiPath,
-  Google,
-  REQUEST_TIMEOUT_MS,
-  ServiceProvider,
-} from "@/app/constant";
+import { Google, REQUEST_TIMEOUT_MS } from "@/app/constant";
 import {
  AgentChatOptions,
  ChatOptions,
@@ -14,13 +9,13 @@ import {
  SpeechOptions,
 } from "../api";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
-import axios from "axios";
-
-const getImageBase64Data = async (url: string) => {
-  const response = await axios.get(url, { responseType: "arraybuffer" });
-  const base64 = Buffer.from(response.data, "binary").toString("base64");
-  return base64;
-};
+import { getClientConfig } from "@/app/config/client";
+import { DEFAULT_API_HOST } from "@/app/constant";
+import {
+  getMessageTextContent,
+  getMessageImages,
+  isVisionModel,
+} from "@/app/utils";

 export class GeminiProApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
@@ -39,32 +34,34 @@ export class GeminiProApi implements LLMApi {
    );
  }
  async chat(options: ChatOptions): Promise<void> {
-    const messages: any[] = [];
-    if (options.config.model.includes("vision")) {
-      for (const v of options.messages) {
-        let message: any = {
-          role: v.role.replace("assistant", "model").replace("system", "user"),
-          parts: [{ text: v.content }],
-        };
-        if (v.image_url) {
-          var base64Data = await getImageBase64Data(v.image_url);
-          message.parts.push({
-            inline_data: {
-              mime_type: "image/jpeg",
-              data: base64Data,
-            },
-          });
+    // const apiClient = this;
+    const visionModel = isVisionModel(options.config.model);
+    let multimodal = false;
+    const messages = options.messages.map((v) => {
+      let parts: any[] = [{ text: getMessageTextContent(v) }];
+      if (visionModel) {
+        const images = getMessageImages(v);
+        if (images.length > 0) {
+          multimodal = true;
+          parts = parts.concat(
+            images.map((image) => {
+              const imageType = image.split(";")[0].split(":")[1];
+              const imageData = image.split(",")[1];
+              return {
+                inline_data: {
+                  mime_type: imageType,
+                  data: imageData,
+                },
+              };
+            }),
+          );
        }
-        messages.push(message);
      }
-    } else {
-      options.messages.map((v) =>
-        messages.push({
-          role: v.role.replace("assistant", "model").replace("system", "user"),
-          parts: [{ text: v.content }],
-        }),
-      );
-    }
+      return {
+        role: v.role.replace("assistant", "model").replace("system", "user"),
+        parts: parts,
+      };
+    });

    // google requires that role in neighboring messages must not be the same
    for (let i = 0; i < messages.length - 1; ) {
@@ -79,7 +76,9 @@ export class GeminiProApi implements LLMApi {
        i++;
      }
    }
-
+    // if (visionModel && messages.length > 1) {
+    //   options.onError?.(new Error("Multiturn chat is not enabled for models/gemini-pro-vision"));
+    // }
    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
      ...useChatStore.getState().currentSession().mask.modelConfig,
@@ -118,15 +117,28 @@ export class GeminiProApi implements LLMApi {
      ],
    };

-    console.log("[Request] google payload: ", requestPayload);
+    const accessStore = useAccessStore.getState();
+    let baseUrl = accessStore.googleUrl;
+    const isApp = !!getClientConfig()?.isApp;

-    const shouldStream = !!options.config.stream;
+    let shouldStream = !!options.config.stream;
    const controller = new AbortController();
    options.onController?.(controller);
    try {
-      const chatPath = this.path(
-        Google.ChatPath.replace("{{model}}", options.config.model),
-      );
+      let googleChatPath = visionModel
+        ? Google.VisionChatPath
+        : Google.ChatPath;
+      let chatPath = this.path(googleChatPath);
+
+      if (!baseUrl) {
+        baseUrl = isApp
+          ? DEFAULT_API_HOST + "/api/proxy/google/" + googleChatPath
+          : chatPath;
+      }
+
+      if (isApp) {
+        baseUrl += `?key=${accessStore.googleApiKey}`;
+      }
      const chatPayload = {
        method: "POST",
        body: JSON.stringify(requestPayload),
@@ -142,10 +154,6 @@ export class GeminiProApi implements LLMApi {
      if (shouldStream) {
        let responseText = "";
        let remainText = "";
-        let streamChatPath = chatPath.replace(
-          "generateContent",
-          "streamGenerateContent",
-        );
        let finished = false;

        let existingTexts: string[] = [];
@@ -175,11 +183,12 @@ export class GeminiProApi implements LLMApi {

        // start animaion
        animateResponseText();
-        fetch(streamChatPath, chatPayload)
-          .then(async (response) => {
-            if (!response.ok) {
-              throw new Error(await response?.text());
-            }
+
+        fetch(
+          baseUrl.replace("generateContent", "streamGenerateContent"),
+          chatPayload,
+        )
+          .then((response) => {
            const reader = response?.body?.getReader();
            const decoder = new TextDecoder();
            let partialData = "";
@@ -189,6 +198,19 @@ export class GeminiProApi implements LLMApi {
              value,
            }): Promise<any> {
              if (done) {
+                if (response.status !== 200) {
+                  try {
+                    let data = JSON.parse(ensureProperEnding(partialData));
+                    if (data && data[0].error) {
+                      options.onError?.(new Error(data[0].error.message));
+                    } else {
+                      options.onError?.(new Error("Request failed"));
+                    }
+                  } catch (_) {
+                    options.onError?.(new Error("Request failed"));
+                  }
+                }
+
                console.log("Stream complete");
                // options.onFinish(responseText + remainText);
                finished = true;
@@ -230,7 +252,7 @@ export class GeminiProApi implements LLMApi {
            options.onError?.(error as Error);
          });
      } else {
-        const res = await fetch(chatPath, chatPayload);
+        const res = await fetch(baseUrl, chatPayload);
        clearTimeout(requestTimeoutId);

        const resJson = await res.json();
@@ -259,30 +281,7 @@ export class GeminiProApi implements LLMApi {
    return [];
  }
  path(path: string): string {
-    const accessStore = useAccessStore.getState();
-    const isGoogle =
-      accessStore.useCustomConfig &&
-      accessStore.provider === ServiceProvider.Google;
-
-    if (isGoogle && !accessStore.isValidGoogle()) {
-      throw Error(
-        "incomplete google config, please check it in your settings page",
-      );
-    }
-
-    let baseUrl = isGoogle ? accessStore.googleBaseUrl : ApiPath.GoogleAI;
-
-    if (baseUrl.length === 0) {
-      baseUrl = ApiPath.GoogleAI;
-    }
-    if (baseUrl.endsWith("/")) {
-      baseUrl = baseUrl.slice(0, baseUrl.length - 1);
-    }
-    if (!baseUrl.startsWith("http") && !baseUrl.startsWith(ApiPath.GoogleAI)) {
-      baseUrl = "https://" + baseUrl;
-    }
-
-    return [baseUrl, path].join("/");
+    return "/api/google/" + path;
  }
 }

--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -16,6 +16,7 @@ import {
  LLMApi,
  LLMModel,
  LLMUsage,
+  MultimodalContent,
  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
@@ -26,7 +27,11 @@ import {
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { makeAzurePath } from "@/app/azure";
-import axios from "axios";
+import {
+  getMessageTextContent,
+  getMessageImages,
+  isVisionModel,
+} from "@/app/utils";

 export interface OpenAIListModelResponse {
  object: string;
@@ -120,49 +125,11 @@ export class ChatGPTApi implements LLMApi {
  }

  async chat(options: ChatOptions) {
-    const messages: any[] = [];
-
-    const getImageBase64Data = async (url: string) => {
-      const response = await axios.get(url, { responseType: "arraybuffer" });
-      const base64 = Buffer.from(response.data, "binary").toString("base64");
-      return base64;
-    };
-    if (options.config.model === "gpt-4-vision-preview") {
-      for (const v of options.messages) {
-        let message: {
-          role: string;
-          content: {
-            type: string;
-            text?: string;
-            image_url?: { url: string };
-          }[];
-        } = {
-          role: v.role,
-          content: [],
-        };
-        message.content.push({
-          type: "text",
-          text: v.content,
-        });
-        if (v.image_url) {
-          var base64Data = await getImageBase64Data(v.image_url);
-          message.content.push({
-            type: "image_url",
-            image_url: {
-              url: `data:image/jpeg;base64,${base64Data}`,
-            },
-          });
-        }
-        messages.push(message);
-      }
-    } else {
-      options.messages.map((v) =>
-        messages.push({
-          role: v.role,
-          content: v.content,
-        }),
-      );
-    }
+    const visionModel = isVisionModel(options.config.model);
+    const messages = options.messages.map((v) => ({
+      role: v.role,
+      content: visionModel ? v.content : getMessageTextContent(v),
+    }));

    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
@@ -186,6 +153,16 @@ export class ChatGPTApi implements LLMApi {
      // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
    };

+    // add max_tokens to vision model
+    if (visionModel) {
+      Object.defineProperty(requestPayload, "max_tokens", {
+        enumerable: true,
+        configurable: true,
+        writable: true,
+        value: modelConfig.max_tokens,
+      });
+    }
+
    console.log("[Request] openai payload: ", requestPayload);

    const shouldStream = !!options.config.stream;