增加语音输入

This commit is contained in:
sijinhui 2024-03-28 00:48:40 +08:00
parent 7ffabb77f9
commit 8620df325d
10 changed files with 217 additions and 110 deletions

View File

@ -1,16 +0,0 @@
import { Flex } from "antd";
import VoiceInput from "@/app/components/voice-input";
export default async function UsersPage() {
// const users: User[] = await getData();
// console.log("data", data);
return (
<>
<Flex gap="middle" vertical>
<VoiceInput />
</Flex>
</>
);
}

View File

@ -643,7 +643,7 @@
background-color: var(--white); background-color: var(--white);
color: var(--black); color: var(--black);
font-family: inherit; font-family: inherit;
padding: 10px 90px 10px 14px; padding: 10px 120px 10px 14px;
resize: none; resize: none;
outline: none; outline: none;
box-sizing: border-box; box-sizing: border-box;
@ -661,6 +661,14 @@
bottom: 32px; bottom: 32px;
} }
.chat-input-send-area {
color: white;
position: absolute;
right: 100px;
bottom: 32px;
}
@media only screen and (max-width: 600px) { @media only screen and (max-width: 600px) {
.chat-input { .chat-input {
font-size: 16px; font-size: 16px;

View File

@ -98,11 +98,14 @@ import { ChatCommandPrefix, useChatCommand, useCommand } from "../command";
import { prettyObject } from "../utils/format"; import { prettyObject } from "../utils/format";
import { ExportMessageModal } from "./exporter"; import { ExportMessageModal } from "./exporter";
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { Button } from "emoji-picker-react/src/components/atoms/Button";
import Image from "next/image"; import Image from "next/image";
import { useAllModels } from "../utils/hooks"; import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api"; import { MultimodalContent } from "../client/api";
import { getTokenLength } from "@/lib/utils"; import { getTokenLength } from "@/lib/utils";
import VoiceInput from "@/app/components/voice-input";
// const VoiceInput = dynamic(
// () => import('@/app/components/voice-input'), { ssr: false });
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, { const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
loading: () => <LoadingIcon />, loading: () => <LoadingIcon />,
@ -1251,6 +1254,21 @@ function _Chat() {
setAttachImages(images); setAttachImages(images);
} }
// const [ voiceInputText, setVoiceInputText ] = useState("");
// const [ voiceInputLoading, setVoiceInputLoading ] = useState(false);
// useEffect(() => {
// if (voiceInputLoading) {
// // 正在进行语音输入,输入框应该显示原有文本加上语音输入的。
// setUserInput(userInput + voiceInputText);
// } else {
// // 但是语音输入结束,应该清理多余字符。
// console.log('end', userInput, voiceInputText)
// }
//
// // eslint-disable-next-line react-hooks/exhaustive-deps
// }, [voiceInputLoading, voiceInputText]);
return ( return (
<div className={styles.chat} key={session.id}> <div className={styles.chat} key={session.id}>
<div className="window-header" data-tauri-drag-region> <div className="window-header" data-tauri-drag-region>
@ -1688,12 +1706,16 @@ function _Chat() {
})} })}
</div> </div>
)} )}
<IconButton <div className={styles["chat-input-send-area"]}>
icon={<SendWhiteIcon />} <VoiceInput
text={Locale.Chat.Send} // voiceInputText={voiceInputText}
type="primary" // setVoiceInputText={setVoiceInputText}
onClick={() => doSubmit(userInput)} // voiceInputLoading={voiceInputLoading}
/> // setVoiceInputLoading={setVoiceInputLoading}
userInput={userInput}
setUserInput={setUserInput}
/>
</div>
<IconButton <IconButton
icon={<SendWhiteIcon />} icon={<SendWhiteIcon />}
text={Locale.Chat.Send} text={Locale.Chat.Send}

View File

@ -1,82 +1,172 @@
"use client"; // "use client";
import { Button, Input, Space } from "antd"; import { Button, Input, Space } from "antd";
import { useEffect, useMemo, useRef, useState } from "react"; import {
Dispatch,
SetStateAction,
useEffect,
useMemo,
useRef,
useState,
} from "react";
import { AudioOutlined, LoadingOutlined } from "@ant-design/icons"; import { AudioOutlined, LoadingOutlined } from "@ant-design/icons";
import * as ms_audio_sdk from "microsoft-cognitiveservices-speech-sdk";
import {
Recognizer,
SpeechRecognitionCanceledEventArgs,
SpeechRecognitionEventArgs,
SpeechRecognitionResult,
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/src/sdk/Exports";
import { useAccessStore } from "@/app/store";
export default function VoiceInput() { interface VoiceInputInterface {
const [userInput, setUserInput] = useState(""); userInput: string;
const [loading, setLoading] = useState(false); setUserInput: Dispatch<SetStateAction<string>>;
const recognition = useRef(null); }
const lastLength = useRef(0); // @ts-ignore
export default function VoiceInput({
userInput,
setUserInput,
}: VoiceInputInterface) {
const [voiceInputText, setVoiceInputText] = useState("");
const [voiceInputLoading, setVoiceInputLoading] = useState(false);
// const recognition = useRef(null);
const recognizer = useRef<ms_audio_sdk.SpeechRecognizer | undefined>();
const [tempUserInput, setTempUserInput] = useState("");
const accessStore = useAccessStore();
// const lastLength = useRef(0);
useEffect(() => { // useEffect(() => {
if ("webkitSpeechRecognition" in window) { //
if (recognition.current === null) { // function onresult(event: any) {
recognition.current = new window.webkitSpeechRecognition(); // // 这个事件会把前面识别的结果都返回回来,所以需要取最后一个识别结果
} // const length = event.results.length;
} else { // // 没有新的识别结果的时候,事件也会触发,所以这里判断一下如果没有新的识别结果,就不取最后一个识别结果了。
console.error("此浏览器不支持webkitSpeechRecognition。"); // if (lastLength.current === length) {
return; // return;
} // }
if (!recognition.current) return; //
// 设置语言 // lastLength.current = length;
recognition.current.lang = "zh"; //
// 开启连续识别 // console.log(event.results);
recognition.current.continuous = true; //
// 开启实时识别 // // 获取最后一个识别结果
recognition.current.interimResults = true; // const transcript = event.results[length - 1]?.[0]?.transcript;
//
// // 将最后一个识别结果添加到文本
// if (transcript) {
// setVoiceInputText((voiceInputText) => voiceInputText + transcript);
// }
// }
//
// }, []);
function onresult(event: any) { function onRecognizedResult(result: SpeechRecognitionResult) {
// 这个事件会把前面识别的结果都返回回来,所以需要取最后一个识别结果 // setVoiceInputText("");
const length = event.results.length; setVoiceInputText(`${result.text}`);
// 没有新的识别结果的时候,事件也会触发,所以这里判断一下如果没有新的识别结果,就不取最后一个识别结果了。
if (lastLength.current === length) {
return;
}
lastLength.current = length; let intentJson = result.properties.getProperty(
ms_audio_sdk.PropertyId.LanguageUnderstandingServiceResponse_JsonResult,
console.log(event.results); );
if (intentJson) {
// 获取最后一个识别结果 setVoiceInputText(voiceInputText + `${intentJson}`);
const transcript = event.results[length - 1]?.[0]?.transcript;
// 将最后一个识别结果添加到文本
if (transcript) {
setUserInput((userInput) => userInput + transcript);
}
} }
// 监听语音识别结果 // setTempUserInput("");
recognition.current.addEventListener("result", onresult); console.log("3333", tempUserInput, "2", voiceInputText);
return () => { // if (result?.translations) {
if (recognition.current) { // let resultJson = JSON.parse(result.json);
recognition.current.removeEventListener("result", onresult); // resultJson['privTranslationPhrase']['Translation']['Translations'].forEach(
} // function (translation: { Language: any; Text: any; }) {
}; // setVoiceInputText(voiceInputText + ` [${translation.Language}] ${translation.Text}\r\n`);
}, []); // });
// }
}
function onCanceled(
sender: Recognizer,
event: SpeechRecognitionCanceledEventArgs,
) {
console.log(event);
function click() { // 展示取消事件
if (loading) { // statusDiv.innerHTML += "(cancel) Reason: " + ms_audio_sdk.CancellationReason[event.reason];
recognition.current.stop(); // if (event.reason === ms_audio_sdk.CancellationReason.Error) {
setLoading(false); // statusDiv.innerHTML += ": " + event.errorDetails;
return; // }
} // statusDiv.innerHTML += "\r\n";
setLoading(true); }
function onRecognizing(
sender: Recognizer,
event: SpeechRecognitionEventArgs,
) {
let result = event.result;
setUserInput(
tempUserInput +
voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
`${result.text} [...]`,
);
lastLength.current = 0; setVoiceInputText(
recognition.current.start(); voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
`${result.text} [...]`,
);
} }
const startRecognition = () => {
if (voiceInputLoading) {
recognizer.current?.close();
setVoiceInputLoading(false);
// setVoiceInputText("");
// setUserInput(tempUserInput);
return;
}
setVoiceInputLoading(true);
setTempUserInput(userInput); // 开始的时候拷贝一份用于复原
setVoiceInputText("");
const speechConfig = ms_audio_sdk.SpeechConfig.fromSubscription(
accessStore.azureVoiceKey,
"eastasia",
);
const audioConfig = ms_audio_sdk.AudioConfig.fromDefaultMicrophoneInput();
speechConfig.speechRecognitionLanguage = "zh-CN";
speechConfig.setProperty(
ms_audio_sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs,
"2500",
);
recognizer.current = new ms_audio_sdk.SpeechRecognizer(
speechConfig,
audioConfig,
);
recognizer.current.recognizing = onRecognizing; // 自定义分段显示
recognizer.current.canceled = onCanceled; // 自定义中断
recognizer.current.recognizeOnceAsync(
(result) => {
// onRecognizedResult(result);
setVoiceInputText(`${result.text}`);
console.log("3333", tempUserInput, "2", voiceInputText);
setUserInput(tempUserInput + voiceInputText + `${result.text}`);
// setVoiceInputText(result.text);
console.log("result", result.text);
setVoiceInputLoading(false);
// recognizer.close();
},
(err) => {
console.error("Recognition error: ", err); // 错误处理
setVoiceInputLoading(false);
},
);
};
const icon = useMemo(() => { const icon = useMemo(() => {
if (loading) { if (voiceInputLoading) {
return ( return (
<LoadingOutlined <LoadingOutlined
style={{ style={{
fontSize: 16, fontSize: 16,
color: "#ffffff", color: "rgb(234, 149, 24)",
}} }}
/> />
); );
@ -85,17 +175,17 @@ export default function VoiceInput() {
<AudioOutlined <AudioOutlined
style={{ style={{
fontSize: 16, fontSize: 16,
color: "#ffffff", color: "rgb(234, 149, 24)",
}} }}
/> />
); );
}, [loading]); }, [voiceInputLoading]);
return ( return (
<div style={{ textAlign: "center", marginTop: 200 }}> <div>
<Space.Compact style={{ width: 600 }}> <Space.Compact>
<Input size="large" value={userInput} /> {/*<Input value={voiceInputText} />*/}
<Button size="large" type="primary" onClick={click} icon={icon} /> <Button type="text" onClick={startRecognition} icon={icon} />
</Space.Compact> </Space.Compact>
</div> </div>
); );

View File

@ -27,6 +27,7 @@ declare global {
AZURE_URL?: string; // https://{azure-url}/openai/deployments/{deploy-name} AZURE_URL?: string; // https://{azure-url}/openai/deployments/{deploy-name}
AZURE_API_KEY?: string; AZURE_API_KEY?: string;
AZURE_API_VERSION?: string; AZURE_API_VERSION?: string;
AZURE_VOICE_KEY?: string;
// google only // google only
GOOGLE_API_KEY?: string; GOOGLE_API_KEY?: string;
@ -93,6 +94,7 @@ export const getServerSideConfig = () => {
azureUrl: process.env.AZURE_URL ?? "", azureUrl: process.env.AZURE_URL ?? "",
azureApiKey: process.env.AZURE_API_KEY ?? "", azureApiKey: process.env.AZURE_API_KEY ?? "",
azureApiVersion: process.env.AZURE_API_VERSION ?? "", azureApiVersion: process.env.AZURE_API_VERSION ?? "",
azureVoiceKey: process.env.AZURE_VOICE_KEY ?? "",
isGoogle, isGoogle,
googleApiKey: process.env.GOOGLE_API_KEY, googleApiKey: process.env.GOOGLE_API_KEY,

View File

@ -107,7 +107,7 @@ Latex inline: $x^2$
Latex block: $$e=mc^2$$ Latex block: $$e=mc^2$$
`; `;
export const SUMMARIZE_MODEL = "gpt-3.5-turbo-1106"; export const SUMMARIZE_MODEL = "gpt-3.5-turbo-0125";
export const GEMINI_SUMMARIZE_MODEL = "gemini-pro"; export const GEMINI_SUMMARIZE_MODEL = "gemini-pro";
export const KnowledgeCutOffDate: Record<string, string> = { export const KnowledgeCutOffDate: Record<string, string> = {
@ -132,8 +132,8 @@ export const DEFAULT_MODELS = [
// available: false, // available: false,
// }, // },
{ {
name: "gpt-3.5-turbo-1106", name: "gpt-3.5-turbo-0125",
describe: "GPT-3,最快,,最便宜", describe: "GPT-3,最快,效果一般,最便宜",
available: true, available: true,
provider: { provider: {
id: "openai", id: "openai",

View File

@ -58,7 +58,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -84,7 +84,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -110,7 +110,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -136,7 +136,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -162,7 +162,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -188,7 +188,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -214,7 +214,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -240,7 +240,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -272,7 +272,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 0.5, temperature: 0.5,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -298,7 +298,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -331,7 +331,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -364,7 +364,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,
@ -422,7 +422,7 @@ export const CN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 1, temperature: 1,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,

View File

@ -86,7 +86,7 @@ export const EN_MASKS: BuiltinMask[] = [
}, },
], ],
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106", model: "gpt-3.5-turbo-0125",
temperature: 0.5, temperature: 0.5,
max_tokens: 2000, max_tokens: 2000,
presence_penalty: 0, presence_penalty: 0,

View File

@ -30,6 +30,7 @@ const DEFAULT_ACCESS_STATE = {
azureUrl: "", azureUrl: "",
azureApiKey: "", azureApiKey: "",
azureApiVersion: "2023-05-15", azureApiVersion: "2023-05-15",
azureVoiceKey: "",
// google ai studio // google ai studio
googleUrl: "", googleUrl: "",

View File

@ -51,7 +51,7 @@ export const DEFAULT_CONFIG = {
dontUseModel: DISABLE_MODELS, dontUseModel: DISABLE_MODELS,
modelConfig: { modelConfig: {
model: "gpt-3.5-turbo-1106" as ModelType, model: "gpt-3.5-turbo-0125" as ModelType,
temperature: 0.8, temperature: 0.8,
top_p: 1, top_p: 1,
max_tokens: 2000, max_tokens: 2000,
@ -137,7 +137,7 @@ export const useAppConfig = createPersistStore(
}), }),
{ {
name: StoreKey.Config, name: StoreKey.Config,
version: 3.8991, version: 3.8992,
migrate(persistedState, version) { migrate(persistedState, version) {
const state = persistedState as ChatConfig; const state = persistedState as ChatConfig;
@ -168,7 +168,7 @@ export const useAppConfig = createPersistStore(
if (version < 3.8) { if (version < 3.8) {
state.lastUpdate = Date.now(); state.lastUpdate = Date.now();
} }
if (version < 3.8991) { if (version < 3.8992) {
state.lastUpdate = Date.now(); state.lastUpdate = Date.now();
return { ...DEFAULT_CONFIG }; return { ...DEFAULT_CONFIG };
} }