feat: support openai tts

2025-11-13 20:53:45 +08:00 · 2024-03-03 15:15:23 +08:00
parent d2733a9128
commit c3656609ab
17 changed files with 475 additions and 20 deletions
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@@ -14,6 +14,8 @@ import RenameIcon from "../icons/rename.svg";
 import ExportIcon from "../icons/share.svg";
 import ReturnIcon from "../icons/return.svg";
 import CopyIcon from "../icons/copy.svg";
+import SpeakIcon from "../icons/speak.svg";
+import SpeakStopIcon from "../icons/speak-stop.svg";
 import LoadingIcon from "../icons/three-dots.svg";
 import PromptIcon from "../icons/prompt.svg";
 import MaskIcon from "../icons/mask.svg";
@@ -83,6 +85,7 @@ import {
  CHAT_PAGE_SIZE,
  LAST_INPUT_IMAGE_KEY,
  LAST_INPUT_KEY,
+  ModelProvider,
  Path,
  REQUEST_TIMEOUT_MS,
  UNFINISHED_INPUT,
@@ -97,6 +100,9 @@ import { getClientConfig } from "../config/client";
 import { useAllModels } from "../utils/hooks";
 import Image from "next/image";
 import { ClientApi } from "../client/api";
+import { createTTSPlayer } from "../utils/audio";
+
+const ttsPlayer = createTTSPlayer();

 const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
  loading: () => <LoadingIcon />,
@@ -1008,6 +1014,37 @@ function _Chat() {
    });
  };

+  const [speechStatus, setSpeechStatus] = useState(false);
+  const [speechLoading, setSpeechLoading] = useState(false);
+  async function openaiSpeech(text: string) {
+    if (speechStatus) {
+      ttsPlayer.stop();
+      setSpeechStatus(false);
+    } else {
+      var api: ClientApi;
+      api = new ClientApi(ModelProvider.GPT);
+      const config = useAppConfig.getState();
+      setSpeechLoading(true);
+      const audioBuffer = await api.llm.speech({
+        model: config.ttsConfig.model,
+        input: text,
+        voice: config.ttsConfig.voice,
+        speed: config.ttsConfig.speed,
+      });
+      setSpeechStatus(true);
+      ttsPlayer
+        .play(audioBuffer, () => {
+          setSpeechStatus(false);
+        })
+        .catch((e) => {
+          console.error("[OpenAI Speech]", e);
+          showToast(prettyObject(e));
+          setSpeechStatus(false);
+        })
+        .finally(() => setSpeechLoading(false));
+    }
+  }
+
  const context: RenderMessage[] = useMemo(() => {
    return session.mask.hideContext ? [] : session.mask.context.slice();
  }, [session.mask.context, session.mask.hideContext]);
@@ -1361,6 +1398,24 @@ function _Chat() {
                                icon={<CopyIcon />}
                                onClick={() => copyToClipboard(message.content)}
                              />
+                              {config.ttsConfig.enable && (
+                                <ChatAction
+                                  text={
+                                    speechStatus
+                                      ? Locale.Chat.Actions.StopSpeech
+                                      : Locale.Chat.Actions.Speech
+                                  }
+                                  loding={speechLoading}
+                                  icon={
+                                    speechStatus ? (
+                                      <SpeakStopIcon />
+                                    ) : (
+                                      <SpeakIcon />
+                                    )
+                                  }
+                                  onClick={() => openaiSpeech(message.content)}
+                                />
+                              )}
                            </>
                          )}
                        </div>