Merge a87ec75ba6 into fb3af2a08f

Merge pull request #6515 from dupl/main
Removed deprecated Gemini models
2025-10-09 03:26:38 +08:00 · 2025-06-16 00:43:27 +08:00 · 2025-06-14 13:35:32 +08:00 · 2025-06-12 15:34:03 +08:00 · 2025-06-12 11:19:24 +08:00 · 2025-06-12 11:13:31 +08:00
4 changed files with 60 additions and 31 deletions
--- a/app/api/common.ts
+++ b/app/api/common.ts
@ -90,6 +90,14 @@ export async function requestOpenai(req: NextRequest) {

  const fetchUrl = cloudflareAIGatewayUrl(`${baseUrl}/${path}`);
  console.log("fetchUrl", fetchUrl);
+
+  let payload = await req.text();
+  if (baseUrl.includes("openrouter.ai")) {
+    const body = JSON.parse(payload);
+    body["include_reasoning"] = true;
+    payload = JSON.stringify(body);
+  }
+
  const fetchOptions: RequestInit = {
    headers: {
      "Content-Type": "application/json",
@ -100,7 +108,7 @@ export async function requestOpenai(req: NextRequest) {
      }),
    },
    method: req.method,
-    body: req.body,
+    body: payload,
    // to fix #2485: https://stackoverflow.com/questions/55920957/cloudflare-worker-typeerror-one-time-use-body
    redirect: "manual",
    // @ts-ignore
@ -111,10 +119,7 @@ export async function requestOpenai(req: NextRequest) {
  // #1815 try to refuse gpt4 request
  if (serverConfig.customModels && req.body) {
    try {
-      const clonedBody = await req.text();
-      fetchOptions.body = clonedBody;
-
-      const jsonBody = JSON.parse(clonedBody) as { model?: string };
+      const jsonBody = JSON.parse(payload) as { model?: string };

      // not undefined and is false
      if (
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@ -2,10 +2,10 @@
 // azure and openai, using same models. so using same LLMApi.
 import {
  ApiPath,
-  OPENAI_BASE_URL,
-  DEFAULT_MODELS,
-  OpenaiPath,
  Azure,
+  DEFAULT_MODELS,
+  OPENAI_BASE_URL,
+  OpenaiPath,
  REQUEST_TIMEOUT_MS,
  ServiceProvider,
 } from "@/app/constant";
@ -18,13 +18,13 @@ import {
 } from "@/app/store";
 import { collectModelsWithDefaultModel } from "@/app/utils/model";
 import {
-  preProcessImageContent,
-  uploadImage,
  base64Image2Blob,
+  preProcessImageContent,
  streamWithThink,
+  uploadImage,
 } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
-import { ModelSize, DalleQuality, DalleStyle } from "@/app/typing";
+import { DalleQuality, DalleStyle, ModelSize } from "@/app/typing";

 import {
  ChatOptions,
@ -39,9 +39,9 @@ import Locale from "../../locales";
 import { getClientConfig } from "@/app/config/client";
 import {
  getMessageTextContent,
-  isVisionModel,
-  isDalle3 as _isDalle3,
  getTimeoutMSByModel,
+  isDalle3 as _isDalle3,
+  isVisionModel,
 } from "@/app/utils";
 import { fetch } from "@/app/utils/stream";

@ -56,7 +56,7 @@ export interface OpenAIListModelResponse {

 export interface RequestPayload {
  messages: {
-    role: "system" | "user" | "assistant";
+    role: "developer" | "system" | "user" | "assistant";
    content: string | MultimodalContent[];
  }[];
  stream?: boolean;
@ -238,8 +238,16 @@ export class ChatGPTApi implements LLMApi {
        // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
      };

-      // O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
      if (isO1OrO3) {
+        // by default the o1/o3 models will not attempt to produce output that includes markdown formatting
+        // manually add "Formatting re-enabled" developer message to encourage markdown inclusion in model responses
+        // (https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning?tabs=python-secure#markdown-output)
+        requestPayload["messages"].unshift({
+          role: "developer",
+          content: "Formatting re-enabled",
+        });
+
+        // o1/o3 uses max_completion_tokens to control the number of tokens (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
        requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
      }

@ -295,6 +303,13 @@ export class ChatGPTApi implements LLMApi {
            useChatStore.getState().currentSession().mask?.plugin || [],
          );
        // console.log("getAsTools", tools, funcs);
+
+        // Add "include_reasoning" for OpenRouter: https://openrouter.ai/announcements/reasoning-tokens-for-thinking-models
+        if (chatPath.includes("openrouter.ai")) {
+          // @ts-ignore
+          requestPayload["include_reasoning"] = true;
+        }
+
        streamWithThink(
          chatPath,
          requestPayload,
@ -311,6 +326,7 @@ export class ChatGPTApi implements LLMApi {
                content: string;
                tool_calls: ChatMessageTool[];
                reasoning_content: string | null;
+                reasoning: string | null;
              };
            }>;

@ -336,7 +352,9 @@ export class ChatGPTApi implements LLMApi {
              }
            }

-            const reasoning = choices[0]?.delta?.reasoning_content;
+            const reasoning =
+              choices[0]?.delta?.reasoning_content ||
+              choices[0]?.delta?.reasoning;
            const content = choices[0]?.delta?.content;

            // Skip if both content and reasoning_content are empty or null
@ -412,6 +430,7 @@ export class ChatGPTApi implements LLMApi {
      options.onError?.(e as Error);
    }
  }
+
  async usage() {
    const formatDate = (d: Date) =>
      `${d.getFullYear()}-${(d.getMonth() + 1).toString().padStart(2, "0")}-${d
@ -515,4 +534,5 @@ export class ChatGPTApi implements LLMApi {
    }));
  }
 }
+
 export { OpenaiPath };
--- a/app/constant.ts
+++ b/app/constant.ts
@ -523,20 +523,15 @@ const openaiModels = [
 ];

 const googleModels = [
-  "gemini-1.0-pro", // Deprecated on 2/15/2025
  "gemini-1.5-pro-latest",
  "gemini-1.5-pro",
  "gemini-1.5-pro-002",
-  "gemini-1.5-pro-exp-0827",
  "gemini-1.5-flash-latest",
  "gemini-1.5-flash-8b-latest",
  "gemini-1.5-flash",
  "gemini-1.5-flash-8b",
  "gemini-1.5-flash-002",
-  "gemini-1.5-flash-exp-0827",
  "learnlm-1.5-pro-experimental",
-  "gemini-exp-1114",
-  "gemini-exp-1121",
  "gemini-exp-1206",
  "gemini-2.0-flash",
  "gemini-2.0-flash-exp",
@ -546,6 +541,7 @@ const googleModels = [
  "gemini-2.0-flash-thinking-exp-01-21",
  "gemini-2.0-pro-exp",
  "gemini-2.0-pro-exp-02-05",
+  "gemini-2.5-pro-preview-06-05",
 ];

 const anthropicModels = [
@ -632,6 +628,18 @@ const xAIModes = [
  "grok-2-vision-1212",
  "grok-2-vision",
  "grok-2-vision-latest",
+  "grok-3-mini-fast-beta",
+  "grok-3-mini-fast",
+  "grok-3-mini-fast-latest",
+  "grok-3-mini-beta",
+  "grok-3-mini",
+  "grok-3-mini-latest",
+  "grok-3-fast-beta",
+  "grok-3-fast",
+  "grok-3-fast-latest",
+  "grok-3-beta",
+  "grok-3",
+  "grok-3-latest",
 ];

 const chatglmModels = [
--- a/app/utils/chat.ts
+++ b/app/utils/chat.ts
@ -1,7 +1,7 @@
 import {
  CACHE_URL_PREFIX,
-  UPLOAD_URL,
  REQUEST_TIMEOUT_MS,
+  UPLOAD_URL,
 } from "@/app/constant";
 import { MultimodalContent, RequestMessage } from "@/app/client/api";
 import Locale from "@/app/locales";
@ -111,6 +111,7 @@ export async function preProcessImageContentForAlibabaDashScope(
 }

 const imageCaches: Record<string, string> = {};
+
 export function cacheImageToBase64Image(imageUrl: string) {
  if (imageUrl.includes(CACHE_URL_PREFIX)) {
    if (!imageCaches[imageUrl]) {
@ -385,6 +386,7 @@ export function stream(
      openWhenHidden: true,
    });
  }
+
  console.debug("[ChatAPI] start");
  chatApi(chatPath, headers, requestPayload, tools); // call fetchEventSource
 }
@ -627,16 +629,9 @@ export function streamWithThink(
              if (remainText.length > 0) {
                remainText += "\n";
              }
-              remainText += "> " + chunk.content;
-            } else {
-              // Handle newlines in thinking content
-              if (chunk.content.includes("\n\n")) {
-                const lines = chunk.content.split("\n\n");
-                remainText += lines.join("\n\n> ");
-              } else {
-                remainText += chunk.content;
-              }
+              remainText += "> ";
            }
+            remainText += chunk.content.replaceAll("\n", "\n> ");
          } else {
            // If in normal mode
            if (isInThinkingMode || isThinkingChanged) {
@ -662,6 +657,7 @@ export function streamWithThink(
      openWhenHidden: true,
    });
  }
+
  console.debug("[ChatAPI] start");
  chatApi(chatPath, headers, requestPayload, tools); // call fetchEventSource
 }
Author	SHA1	Message	Date
Xu Chenxi	cb27f923c7	Merge `a87ec75ba6` into `fb3af2a08f`	2025-06-16 00:43:27 +08:00
RiverRay	fb3af2a08f	Merge pull request #6515 from dupl/main Some checks failed Run Tests / test (push) Has been cancelled Details Removed deprecated Gemini models	2025-06-14 13:35:32 +08:00
dupl	eb193ac0ff	Removed deprecated Gemini models	2025-06-12 15:34:03 +08:00
RiverRay	c30ddfbb07	Merge pull request #6425 from yunlingz/o_model_md_response Some checks failed Run Tests / test (push) Has been cancelled Details Fix: Encourage markdown inclusion in model responses for o1/o3	2025-06-12 11:19:24 +08:00
RiverRay	a2f0149786	Merge pull request #6460 from dreamsafari/main 加入Grok3模型列表	2025-06-12 11:13:31 +08:00
GH Action - Upstream Sync	03d36f96ed	Merge branch 'main' of https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web	2025-06-12 01:53:30 +00:00
RiverRay	705dffc664	Merge pull request #6514 from KevinShiCN/patch-1 Some checks failed Run Tests / test (push) Has been cancelled Details Add gemini-2.5-pro-preview-06-05 into constant.ts	2025-06-11 16:14:09 +08:00
KevinShiCN	02f7e6de98	Add gemini-2.5-pro-preview-06-05 into constant.ts	2025-06-08 23:59:49 +08:00
dreamsafari	843dc52efa	加入Grok3模型列表	2025-04-22 13:06:54 +08:00
Yunling Zhu	c261ebc82c	use unshift to improve perf	2025-04-06 16:56:54 +08:00
Yunling Zhu	f7c747c65f	encourage markdown inclusion for o1/o3	2025-04-03 22:11:59 +08:00
xsun2001	a87ec75ba6	Support OpenRouter reasoning when using env var	2025-02-25 16:35:03 +08:00
xsun2001	6e082ad7ac	Support reasoning for OpenRouter using OpenAI provider	2025-02-18 14:41:59 +08:00