Merge c3b50a9c93 into c30ddfbb07

Merge pull request #6425 from yunlingz/o_model_md_response
Fix: Encourage markdown inclusion in model responses for o1/o3
2025-10-10 20:16:37 +08:00 · 2025-06-13 18:36:35 +08:00 · 2025-06-12 11:19:24 +08:00 · 2025-06-12 11:13:31 +08:00 · 2025-06-12 01:53:30 +00:00 · 2025-04-22 13:06:54 +08:00
3 changed files with 113 additions and 18 deletions
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@ -56,7 +56,7 @@ export interface OpenAIListModelResponse {

 export interface RequestPayload {
  messages: {
-    role: "system" | "user" | "assistant";
+    role: "developer" | "system" | "user" | "assistant";
    content: string | MultimodalContent[];
  }[];
  stream?: boolean;
@ -238,8 +238,16 @@ export class ChatGPTApi implements LLMApi {
        // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
      };

-      // O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
      if (isO1OrO3) {
+        // by default the o1/o3 models will not attempt to produce output that includes markdown formatting
+        // manually add "Formatting re-enabled" developer message to encourage markdown inclusion in model responses
+        // (https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning?tabs=python-secure#markdown-output)
+        requestPayload["messages"].unshift({
+          role: "developer",
+          content: "Formatting re-enabled",
+        });
+
+        // o1/o3 uses max_completion_tokens to control the number of tokens (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
        requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
      }

--- a/app/components/tts-config.tsx
+++ b/app/components/tts-config.tsx
@ -1,19 +1,76 @@
 import { TTSConfig, TTSConfigValidator } from "../store";
+import React, { useState } from "react";

 import Locale from "../locales";
 import { ListItem, Select } from "./ui-lib";
 import {
+  ModelProvider,
  DEFAULT_TTS_ENGINE,
  DEFAULT_TTS_ENGINES,
  DEFAULT_TTS_MODELS,
  DEFAULT_TTS_VOICES,
 } from "../constant";
 import { InputRange } from "./input-range";
+import { IconButton } from "./button";
+import SpeakIcon from "../icons/speak.svg";
+import SpeakStopIcon from "../icons/speak-stop.svg";
+import { createTTSPlayer } from "../utils/audio";
+import { useAppConfig } from "../store";
+import { ClientApi } from "../client/api";
+import { showToast } from "../components/ui-lib";

+const ttsPlayer = createTTSPlayer();
 export function TTSConfigList(props: {
  ttsConfig: TTSConfig;
  updateConfig: (updater: (config: TTSConfig) => void) => void;
 }) {
+  const [speechLoading, setSpeechLoading] = useState(false);
+  const [speechStatus, setSpeechStatus] = useState(false);
+
+  const config = useAppConfig.getState();
+
+  function stopSpeech() {
+    ttsPlayer.stop();
+    setSpeechStatus(false);
+  }
+
+  async function playSpeech(text: string, ttsConfig: TTSConfig) {
+    try {
+      const api = new ClientApi(ModelProvider.GPT);
+      setSpeechLoading(true);
+      ttsPlayer.init();
+
+      const audioBuffer = await api.llm.speech({
+        model: ttsConfig.model,
+        input: text,
+        voice: ttsConfig.voice,
+        speed: ttsConfig.speed,
+      });
+
+      setSpeechStatus(true);
+      await ttsPlayer.play(audioBuffer, () => {
+        setSpeechStatus(false);
+      });
+    } catch (error) {
+      console.error("[OpenAI Speech]", error);
+      setSpeechStatus(false);
+      // Implement user-facing error notification here
+      if (typeof (error as Error).message === "string") {
+        showToast((error as Error).message);
+      }
+    } finally {
+      setSpeechLoading(false);
+    }
+  }
+
+  async function openaiSpeech(text: string) {
+    if (speechStatus) {
+      stopSpeech();
+    } else {
+      await playSpeech(text, config.ttsConfig);
+    }
+  }
+
  return (
    <>
      <ListItem
@ -88,23 +145,41 @@ export function TTSConfigList(props: {
            title={Locale.Settings.TTS.Voice.Title}
            subTitle={Locale.Settings.TTS.Voice.SubTitle}
          >
-            <Select
-              value={props.ttsConfig.voice}
-              onChange={(e) => {
-                props.updateConfig(
-                  (config) =>
-                    (config.voice = TTSConfigValidator.voice(
+            <div style={{ display: "flex", gap: "10px" }}>
+              <IconButton
+                aria={Locale.Chat.Actions.Speech}
+                icon={speechStatus ? <SpeakStopIcon /> : <SpeakIcon />}
+                text={
+                  speechLoading
+                    ? "Loading..."
+                    : speechStatus
+                    ? Locale.Chat.Actions.Stop
+                    : Locale.Chat.Actions.Speech
+                }
+                onClick={() => {
+                  openaiSpeech(
+                    "NextChat,Unleash your imagination, experience the future of AI conversation.",
+                  );
+                }}
+              />
+
+              <Select
+                value={props.ttsConfig.voice}
+                onChange={(e) => {
+                  props.updateConfig((config) => {
+                    config.voice = TTSConfigValidator.voice(
                      e.currentTarget.value,
-                    )),
-                );
-              }}
-            >
-              {DEFAULT_TTS_VOICES.map((v, i) => (
-                <option value={v} key={i}>
-                  {v}
-                </option>
-              ))}
-            </Select>
+                    );
+                  });
+                }}
+              >
+                {DEFAULT_TTS_VOICES.map((v, i) => (
+                  <option value={v} key={i}>
+                    {v}
+                  </option>
+                ))}
+              </Select>
+            </div>
          </ListItem>
          <ListItem
            title={Locale.Settings.TTS.Speed.Title}
--- a/app/constant.ts
+++ b/app/constant.ts
@ -633,6 +633,18 @@ const xAIModes = [
  "grok-2-vision-1212",
  "grok-2-vision",
  "grok-2-vision-latest",
+  "grok-3-mini-fast-beta",
+  "grok-3-mini-fast",
+  "grok-3-mini-fast-latest",
+  "grok-3-mini-beta",
+  "grok-3-mini",
+  "grok-3-mini-latest",
+  "grok-3-fast-beta",
+  "grok-3-fast",
+  "grok-3-fast-latest",
+  "grok-3-beta",
+  "grok-3",
+  "grok-3-latest",
 ];

 const chatglmModels = [
Author	SHA1	Message	Date
Dakai	5870ce528b	Merge `c3b50a9c93` into `c30ddfbb07`	2025-06-13 18:36:35 +08:00
RiverRay	c30ddfbb07	Merge pull request #6425 from yunlingz/o_model_md_response Some checks failed Run Tests / test (push) Has been cancelled Details Fix: Encourage markdown inclusion in model responses for o1/o3	2025-06-12 11:19:24 +08:00
RiverRay	a2f0149786	Merge pull request #6460 from dreamsafari/main 加入Grok3模型列表	2025-06-12 11:13:31 +08:00
GH Action - Upstream Sync	03d36f96ed	Merge branch 'main' of https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web	2025-06-12 01:53:30 +00:00
dreamsafari	843dc52efa	加入Grok3模型列表	2025-04-22 13:06:54 +08:00
Yunling Zhu	c261ebc82c	use unshift to improve perf	2025-04-06 16:56:54 +08:00
Yunling Zhu	f7c747c65f	encourage markdown inclusion for o1/o3	2025-04-03 22:11:59 +08:00
dakai	c3b50a9c93	refactor: optimize playSpeech function in TTSConfigList component	2024-10-27 12:42:16 +08:00
dakai	ab4bf3ba67	remove redundancy code	2024-10-16 11:34:20 +08:00
dakai	67192a7946	follow the rabbit to fix potential issues	2024-10-14 03:46:51 +08:00
dakai	bcd50b89c8	feat: add voice audio preview button in tts-config option	2024-10-14 03:25:44 +08:00