ChatGPT-Next-Web/app/components/voice/voice.tsx
2023-11-14 03:42:23 +08:00

118 lines
3.2 KiB
TypeScript

import { useChatStore } from "@/app/store";
import style from "./voice.module.scss";
import { useEffect, useMemo, useRef, useState } from "react";
import SpeechRecognition, {
useSpeechRecognition,
} from "react-speech-recognition";
import { IconButton } from "../button";
import { api } from "@/app/client/api";
function findLast<T>(array: T[], predictor: (_: T) => boolean) {
for (let i = array.length - 1; i >= 0; i -= 1) {
if (predictor(array[i])) {
return array[i];
}
}
return null;
}
export function VoicePage() {
const chatStore = useChatStore();
const session = chatStore.currentSession();
const lastAssistantMessage = useMemo(
() => findLast(session.messages, (m) => m.role === "assistant"),
[session.messages],
);
const lastUserMessage = useMemo(
() => findLast(session.messages, (m) => m.role === "user"),
[session.messages],
);
const speech = useSpeechRecognition({
clearTranscriptOnListen: true,
});
if (!speech.browserSupportsSpeechRecognition) {
throw Error("your browser does not support speech recognition api");
}
function startVoice() {
SpeechRecognition.startListening({
language: "zh-CN",
});
sourceNodeRef.current?.stop();
}
function stopVoice() {
SpeechRecognition.stopListening();
}
useEffect(() => {
if (!speech.listening) {
if (
speech.finalTranscript.length > 0 &&
speech.finalTranscript !== lastUserMessage?.content
) {
chatStore.onUserInput(speech.finalTranscript);
}
}
}, [speech.listening]);
const [loadingTTS, setLoadingTTS] = useState(false);
const sourceNodeRef = useRef<AudioBufferSourceNode>();
function speak() {
const content = lastAssistantMessage?.content;
if (!content) return;
setLoadingTTS(true);
api.llm.speech(content).then(async (arrayBuffer) => {
const audioContext = new (window.AudioContext ||
(window as any).webkitAudioContext)();
const source = audioContext.createBufferSource();
try {
sourceNodeRef.current?.stop();
} catch {}
sourceNodeRef.current = source;
// 设置音频源的 buffer 属性
source.buffer = await audioContext.decodeAudioData(arrayBuffer);
// 连接到默认的输出设备(通常是扬声器)
source.connect(audioContext.destination);
// 开始播放
setLoadingTTS(false);
source.start(0);
});
}
const lastStream = useRef(false);
useEffect(() => {
if (
lastAssistantMessage?.streaming !== lastStream.current &&
lastStream.current
) {
speak();
}
lastStream.current = !!lastAssistantMessage?.streaming;
}, [lastAssistantMessage?.streaming]);
return (
<div className={style["voice-page"]}>
<div className={style["top"] + ` ${style["active"]}`} onClick={speak}>
{lastAssistantMessage?.content}
</div>
<div className={style["center"]}></div>
<div
className={style["bottom"] + ` ${speech.listening && style["active"]}`}
onClick={() => {
if (speech.listening) {
stopVoice();
} else {
startVoice();
}
}}
>
{speech.transcript || lastUserMessage?.content}
</div>
</div>
);
}