feat: #3110 add voice control

2025-11-18 06:53:41 +08:00 · 2023-11-14 03:42:23 +08:00
parent 9da455a7ea
commit d1d8b1f393
13 changed files with 273 additions and 9 deletions
--- a/app/components/voice/voice.tsx
+++ b/app/components/voice/voice.tsx
@@ -0,0 +1,117 @@
+import { useChatStore } from "@/app/store";
+import style from "./voice.module.scss";
+import { useEffect, useMemo, useRef, useState } from "react";
+import SpeechRecognition, {
+  useSpeechRecognition,
+} from "react-speech-recognition";
+import { IconButton } from "../button";
+import { api } from "@/app/client/api";
+
+function findLast<T>(array: T[], predictor: (_: T) => boolean) {
+  for (let i = array.length - 1; i >= 0; i -= 1) {
+    if (predictor(array[i])) {
+      return array[i];
+    }
+  }
+
+  return null;
+}
+
+export function VoicePage() {
+  const chatStore = useChatStore();
+  const session = chatStore.currentSession();
+  const lastAssistantMessage = useMemo(
+    () => findLast(session.messages, (m) => m.role === "assistant"),
+    [session.messages],
+  );
+  const lastUserMessage = useMemo(
+    () => findLast(session.messages, (m) => m.role === "user"),
+    [session.messages],
+  );
+  const speech = useSpeechRecognition({
+    clearTranscriptOnListen: true,
+  });
+
+  if (!speech.browserSupportsSpeechRecognition) {
+    throw Error("your browser does not support speech recognition api");
+  }
+
+  function startVoice() {
+    SpeechRecognition.startListening({
+      language: "zh-CN",
+    });
+    sourceNodeRef.current?.stop();
+  }
+
+  function stopVoice() {
+    SpeechRecognition.stopListening();
+  }
+
+  useEffect(() => {
+    if (!speech.listening) {
+      if (
+        speech.finalTranscript.length > 0 &&
+        speech.finalTranscript !== lastUserMessage?.content
+      ) {
+        chatStore.onUserInput(speech.finalTranscript);
+      }
+    }
+  }, [speech.listening]);
+
+  const [loadingTTS, setLoadingTTS] = useState(false);
+  const sourceNodeRef = useRef<AudioBufferSourceNode>();
+
+  function speak() {
+    const content = lastAssistantMessage?.content;
+    if (!content) return;
+    setLoadingTTS(true);
+    api.llm.speech(content).then(async (arrayBuffer) => {
+      const audioContext = new (window.AudioContext ||
+        (window as any).webkitAudioContext)();
+      const source = audioContext.createBufferSource();
+      try {
+        sourceNodeRef.current?.stop();
+      } catch {}
+      sourceNodeRef.current = source;
+      // 设置音频源的 buffer 属性
+      source.buffer = await audioContext.decodeAudioData(arrayBuffer);
+      // 连接到默认的输出设备（通常是扬声器）
+      source.connect(audioContext.destination);
+      // 开始播放
+      setLoadingTTS(false);
+      source.start(0);
+    });
+  }
+
+  const lastStream = useRef(false);
+  useEffect(() => {
+    if (
+      lastAssistantMessage?.streaming !== lastStream.current &&
+      lastStream.current
+    ) {
+      speak();
+    }
+    lastStream.current = !!lastAssistantMessage?.streaming;
+  }, [lastAssistantMessage?.streaming]);
+
+  return (
+    <div className={style["voice-page"]}>
+      <div className={style["top"] + ` ${style["active"]}`} onClick={speak}>
+        {lastAssistantMessage?.content}
+      </div>
+      <div className={style["center"]}></div>
+      <div
+        className={style["bottom"] + ` ${speech.listening && style["active"]}`}
+        onClick={() => {
+          if (speech.listening) {
+            stopVoice();
+          } else {
+            startVoice();
+          }
+        }}
+      >
+        {speech.transcript || lastUserMessage?.content}
+      </div>
+    </div>
+  );
+}