Merge c3b50a9c93 into 705dffc664

refactor: optimize playSpeech function in TTSConfigList component
remove redundancy code
2025-10-12 13:03:43 +08:00 · 2025-06-11 19:43:09 +08:00 · 2024-10-27 12:42:16 +08:00 · 2024-10-16 11:34:20 +08:00 · 2024-10-14 03:46:51 +08:00 · 2024-10-14 03:25:44 +08:00
6 changed files with 105 additions and 45 deletions
--- a/README.md
+++ b/README.md
@@ -22,12 +22,12 @@ English / [简体中文](./README_CN.md)
 [![MacOS][MacOS-image]][download-url]
 [![Linux][Linux-image]][download-url]

-[NextChatAI](https://nextchat.club?utm_source=readme) / [iOS APP](https://apps.apple.com/us/app/nextchat-ai/id6743085599) / [Web App Demo](https://app.nextchat.club) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) / [Enterprise Edition](#enterprise-edition) 
+[NextChatAI](https://nextchat.club?utm_source=readme) / [iOS APP](https://apps.apple.com/us/app/nextchat-ai/id6743085599) / [Web App Demo](https://app.nextchat.dev) / [Desktop App](https://github.com/Yidadaa/ChatGPT-Next-Web/releases) / [Enterprise Edition](#enterprise-edition) 


 [saas-url]: https://nextchat.club?utm_source=readme
 [saas-image]: https://img.shields.io/badge/NextChat-Saas-green?logo=microsoftedge
-[web-url]: https://app.nextchat.club/
+[web-url]: https://app.nextchat.dev/
 [download-url]: https://github.com/Yidadaa/ChatGPT-Next-Web/releases
 [Web-image]: https://img.shields.io/badge/Web-PWA-orange?logo=microsoftedge
 [Windows-image]: https://img.shields.io/badge/-Windows-blue?logo=windows
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -56,7 +56,7 @@ export interface OpenAIListModelResponse {

 export interface RequestPayload {
  messages: {
-    role: "developer" | "system" | "user" | "assistant";
+    role: "system" | "user" | "assistant";
    content: string | MultimodalContent[];
  }[];
  stream?: boolean;
@@ -238,16 +238,8 @@ export class ChatGPTApi implements LLMApi {
        // Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
      };

+      // O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
      if (isO1OrO3) {
-        // by default the o1/o3 models will not attempt to produce output that includes markdown formatting
-        // manually add "Formatting re-enabled" developer message to encourage markdown inclusion in model responses
-        // (https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning?tabs=python-secure#markdown-output)
-        requestPayload["messages"].unshift({
-          role: "developer",
-          content: "Formatting re-enabled",
-        });
-
-        // o1/o3 uses max_completion_tokens to control the number of tokens (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
        requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
      }

--- a/app/components/tts-config.tsx
+++ b/app/components/tts-config.tsx
@@ -1,19 +1,76 @@
 import { TTSConfig, TTSConfigValidator } from "../store";
+import React, { useState } from "react";

 import Locale from "../locales";
 import { ListItem, Select } from "./ui-lib";
 import {
+  ModelProvider,
  DEFAULT_TTS_ENGINE,
  DEFAULT_TTS_ENGINES,
  DEFAULT_TTS_MODELS,
  DEFAULT_TTS_VOICES,
 } from "../constant";
 import { InputRange } from "./input-range";
+import { IconButton } from "./button";
+import SpeakIcon from "../icons/speak.svg";
+import SpeakStopIcon from "../icons/speak-stop.svg";
+import { createTTSPlayer } from "../utils/audio";
+import { useAppConfig } from "../store";
+import { ClientApi } from "../client/api";
+import { showToast } from "../components/ui-lib";

+const ttsPlayer = createTTSPlayer();
 export function TTSConfigList(props: {
  ttsConfig: TTSConfig;
  updateConfig: (updater: (config: TTSConfig) => void) => void;
 }) {
+  const [speechLoading, setSpeechLoading] = useState(false);
+  const [speechStatus, setSpeechStatus] = useState(false);
+
+  const config = useAppConfig.getState();
+
+  function stopSpeech() {
+    ttsPlayer.stop();
+    setSpeechStatus(false);
+  }
+
+  async function playSpeech(text: string, ttsConfig: TTSConfig) {
+    try {
+      const api = new ClientApi(ModelProvider.GPT);
+      setSpeechLoading(true);
+      ttsPlayer.init();
+
+      const audioBuffer = await api.llm.speech({
+        model: ttsConfig.model,
+        input: text,
+        voice: ttsConfig.voice,
+        speed: ttsConfig.speed,
+      });
+
+      setSpeechStatus(true);
+      await ttsPlayer.play(audioBuffer, () => {
+        setSpeechStatus(false);
+      });
+    } catch (error) {
+      console.error("[OpenAI Speech]", error);
+      setSpeechStatus(false);
+      // Implement user-facing error notification here
+      if (typeof (error as Error).message === "string") {
+        showToast((error as Error).message);
+      }
+    } finally {
+      setSpeechLoading(false);
+    }
+  }
+
+  async function openaiSpeech(text: string) {
+    if (speechStatus) {
+      stopSpeech();
+    } else {
+      await playSpeech(text, config.ttsConfig);
+    }
+  }
+
  return (
    <>
      <ListItem
@@ -88,23 +145,41 @@ export function TTSConfigList(props: {
            title={Locale.Settings.TTS.Voice.Title}
            subTitle={Locale.Settings.TTS.Voice.SubTitle}
          >
-            <Select
-              value={props.ttsConfig.voice}
-              onChange={(e) => {
-                props.updateConfig(
-                  (config) =>
-                    (config.voice = TTSConfigValidator.voice(
+            <div style={{ display: "flex", gap: "10px" }}>
+              <IconButton
+                aria={Locale.Chat.Actions.Speech}
+                icon={speechStatus ? <SpeakStopIcon /> : <SpeakIcon />}
+                text={
+                  speechLoading
+                    ? "Loading..."
+                    : speechStatus
+                    ? Locale.Chat.Actions.Stop
+                    : Locale.Chat.Actions.Speech
+                }
+                onClick={() => {
+                  openaiSpeech(
+                    "NextChat,Unleash your imagination, experience the future of AI conversation.",
+                  );
+                }}
+              />
+
+              <Select
+                value={props.ttsConfig.voice}
+                onChange={(e) => {
+                  props.updateConfig((config) => {
+                    config.voice = TTSConfigValidator.voice(
                      e.currentTarget.value,
-                    )),
-                );
-              }}
-            >
-              {DEFAULT_TTS_VOICES.map((v, i) => (
-                <option value={v} key={i}>
-                  {v}
-                </option>
-              ))}
-            </Select>
+                    );
+                  });
+                }}
+              >
+                {DEFAULT_TTS_VOICES.map((v, i) => (
+                  <option value={v} key={i}>
+                    {v}
+                  </option>
+                ))}
+              </Select>
+            </div>
          </ListItem>
          <ListItem
            title={Locale.Settings.TTS.Speed.Title}
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -523,15 +523,20 @@ const openaiModels = [
 ];

 const googleModels = [
+  "gemini-1.0-pro", // Deprecated on 2/15/2025
  "gemini-1.5-pro-latest",
  "gemini-1.5-pro",
  "gemini-1.5-pro-002",
+  "gemini-1.5-pro-exp-0827",
  "gemini-1.5-flash-latest",
  "gemini-1.5-flash-8b-latest",
  "gemini-1.5-flash",
  "gemini-1.5-flash-8b",
  "gemini-1.5-flash-002",
+  "gemini-1.5-flash-exp-0827",
  "learnlm-1.5-pro-experimental",
+  "gemini-exp-1114",
+  "gemini-exp-1121",
  "gemini-exp-1206",
  "gemini-2.0-flash",
  "gemini-2.0-flash-exp",
@@ -628,18 +633,6 @@ const xAIModes = [
  "grok-2-vision-1212",
  "grok-2-vision",
  "grok-2-vision-latest",
-  "grok-3-mini-fast-beta",
-  "grok-3-mini-fast",
-  "grok-3-mini-fast-latest",
-  "grok-3-mini-beta",
-  "grok-3-mini",
-  "grok-3-mini-latest",
-  "grok-3-fast-beta",
-  "grok-3-fast",
-  "grok-3-fast-latest",
-  "grok-3-beta",
-  "grok-3",
-  "grok-3-latest",
 ];

 const chatglmModels = [
--- a/package.json
+++ b/package.json
@@ -83,7 +83,7 @@
    "jest": "^29.7.0",
    "jest-environment-jsdom": "^29.7.0",
    "lint-staged": "^13.2.2",
-    "prettier": "^3.6.2",
+    "prettier": "^3.0.2",
    "ts-node": "^10.9.2",
    "tsx": "^4.16.0",
    "typescript": "5.2.2",
--- a/yarn.lock
+++ b/yarn.lock
@@ -7076,10 +7076,10 @@ prettier-linter-helpers@^1.0.0:
  dependencies:
    fast-diff "^1.1.2"

-prettier@^3.6.2:
-  version "3.6.2"
-  resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.6.2.tgz#ccda02a1003ebbb2bfda6f83a074978f608b9393"
-  integrity sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==
+prettier@^3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.0.2.tgz#78fcecd6d870551aa5547437cdae39d4701dca5b"
+  integrity sha512-o2YR9qtniXvwEZlOKbveKfDQVyqxbEIWn48Z8m3ZJjBjcCmUy3xZGIv+7AkaeuaTr6yPXJjwv07ZWlsWbEy1rQ==

 pretty-format@^27.0.2:
  version "27.5.1"
Author	SHA1	Message	Date
Dakai	9b4b57748f	Merge `c3b50a9c93` into `705dffc664`	2025-06-11 19:43:09 +08:00
dakai	c3b50a9c93	refactor: optimize playSpeech function in TTSConfigList component	2024-10-27 12:42:16 +08:00
dakai	ab4bf3ba67	remove redundancy code	2024-10-16 11:34:20 +08:00
dakai	67192a7946	follow the rabbit to fix potential issues	2024-10-14 03:46:51 +08:00
dakai	bcd50b89c8	feat: add voice audio preview button in tts-config option	2024-10-14 03:25:44 +08:00