fix: #352

2025-11-16 14:03:43 +08:00 · 2025-03-07 16:20:06 +08:00
parent f704e7a271
commit 48fe84024f
7 changed files with 293 additions and 103 deletions
--- a/app/api/langchain/rag/search/route.ts
+++ b/app/api/langchain/rag/search/route.ts
@@ -31,13 +31,13 @@ async function handle(req: NextRequest) {
    // const pineconeIndex = pinecone.Index(serverConfig.pineconeIndex!);
    const apiKey = getOpenAIApiKey(token);
    const baseUrl = getOpenAIBaseUrl(reqBody.baseUrl);
-    const embeddings = new OpenAIEmbeddings(
-      {
-        modelName: serverConfig.ragEmbeddingModel ?? "text-embedding-3-large",
-        openAIApiKey: apiKey,
+    const embeddings = new OpenAIEmbeddings({
+      modelName: serverConfig.ragEmbeddingModel ?? "text-embedding-3-large",
+      openAIApiKey: apiKey,
+      configuration: {
+        baseURL: baseUrl,
      },
-      { basePath: baseUrl },
-    );
+    });
    // const vectorStore = await PineconeStore.fromExistingIndex(embeddings, {
    //   pineconeIndex,
    // });
--- a/app/api/langchain/rag/store/route.ts
+++ b/app/api/langchain/rag/store/route.ts
@@ -94,13 +94,11 @@ async function handle(req: NextRequest) {
        baseUrl: process.env.OLLAMA_BASE_URL,
      });
    } else {
-      embeddings = new OpenAIEmbeddings(
-        {
-          modelName: serverConfig.ragEmbeddingModel,
-          openAIApiKey: apiKey,
-        },
-        { basePath: baseUrl },
-      );
+      embeddings = new OpenAIEmbeddings({
+        modelName: serverConfig.ragEmbeddingModel,
+        openAIApiKey: apiKey,
+        configuration: { baseURL: baseUrl },
+      });
    }
    // https://js.langchain.com/docs/integrations/vectorstores/pinecone
    // https://js.langchain.com/docs/integrations/vectorstores/qdrant
@@ -181,12 +179,10 @@ async function handle(req: NextRequest) {
 }

 function bufferToBlob(buffer: Buffer, mimeType?: string): Blob {
-  const arrayBuffer: ArrayBuffer = buffer.buffer.slice(
-    buffer.byteOffset,
-    buffer.byteOffset + buffer.byteLength,
-  );
+  const arrayBuffer = new Uint8Array(buffer).buffer;
  return new Blob([arrayBuffer], { type: mimeType || "" });
 }
+
 function getOpenAIApiKey(token: string) {
  const serverConfig = getServerSideConfig();
  const isApiKey = !token.startsWith(ACCESS_CODE_PREFIX);
--- a/app/api/langchain/tool/agent/agentapi.ts
+++ b/app/api/langchain/tool/agent/agentapi.ts
@@ -39,7 +39,11 @@ import {
  ChatPromptTemplate,
  MessagesPlaceholder,
 } from "@langchain/core/prompts";
-import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
+import {
+  AzureChatOpenAI,
+  ChatOpenAI,
+  OpenAIEmbeddings,
+} from "@langchain/openai";
 import { ChatAnthropic } from "@langchain/anthropic";
 import {
  BaseMessage,
@@ -74,6 +78,8 @@ export interface RequestBody {
  returnIntermediateSteps: boolean;
  useTools: (undefined | string)[];
  provider: ServiceProvider;
+  max_tokens?: number;
+  max_completion_tokens?: number;
 }

 export class ResponseBody {
@@ -254,14 +260,14 @@ export class AgentApi {
        },
      });
    }
-    return new ChatOpenAI(
-      {
-        temperature: 0,
-        modelName: reqBody.model,
-        openAIApiKey: apiKey,
+    return new ChatOpenAI({
+      temperature: 0,
+      modelName: reqBody.model,
+      openAIApiKey: apiKey,
+      configuration: {
+        baseURL: baseUrl,
      },
-      { basePath: baseUrl },
-    );
+    });
  }

  getToolEmbeddings(reqBody: RequestBody, apiKey: string, baseUrl: string) {
@@ -275,19 +281,19 @@ export class AgentApi {
        return null;
      }
    }
-    return new OpenAIEmbeddings(
-      {
-        openAIApiKey: apiKey,
+    return new OpenAIEmbeddings({
+      openAIApiKey: apiKey,
+      configuration: {
+        baseURL: baseUrl,
      },
-      { basePath: baseUrl },
-    );
+    });
  }

  getLLM(reqBody: RequestBody, apiKey: string, baseUrl: string) {
    const serverConfig = getServerSideConfig();
    if (reqBody.isAzure || serverConfig.isAzure) {
      console.log("[use Azure ChatOpenAI]");
-      return new ChatOpenAI({
+      return new AzureChatOpenAI({
        temperature: reqBody.temperature,
        streaming: reqBody.stream,
        topP: reqBody.top_p,
@@ -299,22 +305,26 @@ export class AgentApi {
          : serverConfig.azureApiVersion,
        azureOpenAIApiDeploymentName: reqBody.model,
        azureOpenAIBasePath: baseUrl,
+        maxTokens: reqBody.max_tokens,
+        maxCompletionTokens: reqBody.max_completion_tokens,
      });
    }
    if (reqBody.provider === ServiceProvider.OpenAI) {
      console.log("[use ChatOpenAI]");
-      return new ChatOpenAI(
-        {
-          modelName: reqBody.model,
-          openAIApiKey: apiKey,
-          temperature: reqBody.temperature,
-          streaming: reqBody.stream,
-          topP: reqBody.top_p,
-          presencePenalty: reqBody.presence_penalty,
-          frequencyPenalty: reqBody.frequency_penalty,
+      return new ChatOpenAI({
+        modelName: reqBody.model,
+        openAIApiKey: apiKey,
+        temperature: reqBody.temperature,
+        streaming: reqBody.stream,
+        topP: reqBody.top_p,
+        presencePenalty: reqBody.presence_penalty,
+        frequencyPenalty: reqBody.frequency_penalty,
+        maxTokens: reqBody.max_tokens,
+        maxCompletionTokens: reqBody.max_completion_tokens,
+        configuration: {
+          baseURL: baseUrl,
        },
-        { basePath: baseUrl },
-      );
+      });
    }
    if (reqBody.provider === ServiceProvider.Anthropic) {
      console.log("[use ChatAnthropic]");
@@ -439,11 +449,16 @@ export class AgentApi {
      });

      const pastMessages = new Array();
-
+      const isO1OrO3 =
+        reqBody.model.startsWith("o1") || reqBody.model.startsWith("o3");
      reqBody.messages
        .slice(0, reqBody.messages.length - 1)
        .forEach((message) => {
-          if (message.role === "system" && typeof message.content === "string")
+          if (
+            !isO1OrO3 &&
+            message.role === "system" &&
+            typeof message.content === "string"
+          )
            pastMessages.push(new SystemMessage(message.content));
          if (message.role === "user")
            typeof message.content === "string"
@@ -458,6 +473,15 @@ export class AgentApi {
            pastMessages.push(new AIMessage(message.content));
        });

+      reqBody.temperature = !isO1OrO3 ? reqBody.temperature : 1;
+      reqBody.presence_penalty = !isO1OrO3 ? reqBody.presence_penalty : 0;
+      reqBody.frequency_penalty = !isO1OrO3 ? reqBody.frequency_penalty : 0;
+      reqBody.top_p = !isO1OrO3 ? reqBody.top_p : 1;
+
+      if (isO1OrO3) {
+        reqBody.max_completion_tokens = reqBody.max_tokens;
+      }
+
      let llm = this.getLLM(reqBody, apiKey, baseUrl);

      const MEMORY_KEY = "chat_history";
--- a/app/api/langchain/tool/agent/nodejs/route.ts
+++ b/app/api/langchain/tool/agent/nodejs/route.ts
@@ -49,14 +49,11 @@ async function handle(req: NextRequest) {
        baseUrl: process.env.OLLAMA_BASE_URL,
      });
    } else {
-      ragEmbeddings = new OpenAIEmbeddings(
-        {
-          modelName:
-            process.env.RAG_EMBEDDING_MODEL ?? "text-embedding-3-large",
-          openAIApiKey: apiKey,
-        },
-        { basePath: baseUrl },
-      );
+      ragEmbeddings = new OpenAIEmbeddings({
+        modelName: process.env.RAG_EMBEDDING_MODEL ?? "text-embedding-3-large",
+        openAIApiKey: apiKey,
+        configuration: { baseURL: baseUrl },
+      });
    }

    var dalleCallback = async (data: string) => {
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -21,6 +21,7 @@ import {
  preProcessImageAndWebReferenceContent,
  preProcessImageContent,
  stream,
+  streamWithThink,
 } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
 import { DalleSize, DalleQuality, DalleStyle } from "@/app/typing";
@@ -49,6 +50,7 @@ import {
  isVisionModel,
  isDalle3 as _isDalle3,
  getWebReferenceMessageTextContent,
+  getTimeoutMSByModel,
 } from "@/app/utils";

 export interface OpenAIListModelResponse {
@@ -73,6 +75,7 @@ export interface RequestPayload {
  frequency_penalty: number;
  top_p: number;
  max_tokens?: number;
+  max_completion_tokens?: number;
 }

 export interface DalleRequestPayload {
@@ -223,7 +226,9 @@ export class ChatGPTApi implements LLMApi {
    let requestPayload: RequestPayload | DalleRequestPayload;

    const isDalle3 = _isDalle3(options.config.model);
-    const isO1 = options.config.model.startsWith("o1");
+    const isO1OrO3 =
+      options.config.model.startsWith("o1") ||
+      options.config.model.startsWith("o3");
    if (isDalle3) {
      const prompt = getMessageTextContent(
        options.messages.slice(-1)?.pop() as any,
@@ -245,23 +250,28 @@ export class ChatGPTApi implements LLMApi {
        const content = visionModel
          ? await preProcessImageAndWebReferenceContent(v)
          : getWebReferenceMessageTextContent(v);
-        if (!(isO1 && v.role === "system"))
+        if (!(isO1OrO3 && v.role === "system"))
          messages.push({ role: v.role, content });
      }

      // O1 not support image, tools (plugin in ChatGPTNextWeb) and system, stream, logprobs, temperature, top_p, n, presence_penalty, frequency_penalty yet.
      requestPayload = {
        messages,
-        stream: !isO1 ? options.config.stream : false,
+        stream: options.config.stream,
        model: modelConfig.model,
-        temperature: !isO1 ? modelConfig.temperature : 1,
-        presence_penalty: !isO1 ? modelConfig.presence_penalty : 0,
-        frequency_penalty: !isO1 ? modelConfig.frequency_penalty : 0,
-        top_p: !isO1 ? modelConfig.top_p : 1,
+        temperature: !isO1OrO3 ? modelConfig.temperature : 1,
+        presence_penalty: !isO1OrO3 ? modelConfig.presence_penalty : 0,
+        frequency_penalty: !isO1OrO3 ? modelConfig.frequency_penalty : 0,
+        top_p: !isO1OrO3 ? modelConfig.top_p : 1,
        // max_tokens: Math.max(modelConfig.max_tokens, 1024),
        // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
      };

+      // O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
+      if (isO1OrO3) {
+        requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
+      }
+
      // add max_tokens to vision model
      if (visionModel) {
        requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000);
@@ -270,7 +280,7 @@ export class ChatGPTApi implements LLMApi {

    console.log("[Request] openai payload: ", requestPayload);

-    const shouldStream = !isDalle3 && !!options.config.stream && !isO1;
+    const shouldStream = !isDalle3 && !!options.config.stream;
    const controller = new AbortController();
    options.onController?.(controller);

@@ -307,15 +317,16 @@ export class ChatGPTApi implements LLMApi {
        );
      }
      if (shouldStream) {
+        let index = -1;
        // const [tools, funcs] = usePluginStore
        //   .getState()
        //   .getAsTools(
        //     useChatStore.getState().currentSession().mask?.plugin || [],
        //   );
-        // console.log("getAsTools", tools, funcs);
        const tools = null;
        const funcs: Record<string, Function> = {};
-        stream(
+        // console.log("getAsTools", tools, funcs);
+        streamWithThink(
          chatPath,
          requestPayload,
          getHeaders(),
@@ -330,14 +341,18 @@ export class ChatGPTApi implements LLMApi {
              delta: {
                content: string;
                tool_calls: ChatMessageTool[];
+                reasoning_content: string | null;
              };
            }>;
+
+            if (!choices?.length) return { isThinking: false, content: "" };
+
            const tool_calls = choices[0]?.delta?.tool_calls;
            if (tool_calls?.length > 0) {
-              const index = tool_calls[0]?.index;
              const id = tool_calls[0]?.id;
              const args = tool_calls[0]?.function?.arguments;
              if (id) {
+                index += 1;
                runTools.push({
                  id,
                  type: tool_calls[0]?.type,
@@ -351,7 +366,37 @@ export class ChatGPTApi implements LLMApi {
                runTools[index]["function"]["arguments"] += args;
              }
            }
-            return choices[0]?.delta?.content;
+
+            const reasoning = choices[0]?.delta?.reasoning_content;
+            const content = choices[0]?.delta?.content;
+
+            // Skip if both content and reasoning_content are empty or null
+            if (
+              (!reasoning || reasoning.length === 0) &&
+              (!content || content.length === 0)
+            ) {
+              return {
+                isThinking: false,
+                content: "",
+              };
+            }
+
+            if (reasoning && reasoning.length > 0) {
+              return {
+                isThinking: true,
+                content: reasoning,
+              };
+            } else if (content && content.length > 0) {
+              return {
+                isThinking: false,
+                content: content,
+              };
+            }
+
+            return {
+              isThinking: false,
+              content: "",
+            };
          },
          // processToolMessage, include tool_calls message and tool call results
          (
@@ -359,6 +404,8 @@ export class ChatGPTApi implements LLMApi {
            toolCallMessage: any,
            toolCallResult: any[],
          ) => {
+            // reset index value
+            index = -1;
            // @ts-ignore
            requestPayload?.messages?.splice(
              // @ts-ignore
@@ -381,7 +428,7 @@ export class ChatGPTApi implements LLMApi {
        // make a fetch request
        const requestTimeoutId = setTimeout(
          () => controller.abort(),
-          isDalle3 || isO1 ? REQUEST_TIMEOUT_MS * 2 : REQUEST_TIMEOUT_MS, // dalle3 using b64_json is slow.
+          getTimeoutMSByModel(options.config.model),
        );

        const res = await fetch(chatPath, chatPayload);