增加语音输入

This commit is contained in:
sijinhui 2024-03-28 00:48:40 +08:00
parent 7ffabb77f9
commit 8620df325d
10 changed files with 217 additions and 110 deletions

View File

@ -1,16 +0,0 @@
import { Flex } from "antd";
import VoiceInput from "@/app/components/voice-input";
export default async function UsersPage() {
// const users: User[] = await getData();
// console.log("data", data);
return (
<>
<Flex gap="middle" vertical>
<VoiceInput />
</Flex>
</>
);
}

View File

@ -643,7 +643,7 @@
background-color: var(--white);
color: var(--black);
font-family: inherit;
padding: 10px 90px 10px 14px;
padding: 10px 120px 10px 14px;
resize: none;
outline: none;
box-sizing: border-box;
@ -661,6 +661,14 @@
bottom: 32px;
}
.chat-input-send-area {
color: white;
position: absolute;
right: 100px;
bottom: 32px;
}
@media only screen and (max-width: 600px) {
.chat-input {
font-size: 16px;

View File

@ -98,11 +98,14 @@ import { ChatCommandPrefix, useChatCommand, useCommand } from "../command";
import { prettyObject } from "../utils/format";
import { ExportMessageModal } from "./exporter";
import { getClientConfig } from "../config/client";
import { Button } from "emoji-picker-react/src/components/atoms/Button";
import Image from "next/image";
import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api";
import { getTokenLength } from "@/lib/utils";
import VoiceInput from "@/app/components/voice-input";
// const VoiceInput = dynamic(
// () => import('@/app/components/voice-input'), { ssr: false });
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
loading: () => <LoadingIcon />,
@ -1251,6 +1254,21 @@ function _Chat() {
setAttachImages(images);
}
// const [ voiceInputText, setVoiceInputText ] = useState("");
// const [ voiceInputLoading, setVoiceInputLoading ] = useState(false);
// useEffect(() => {
// if (voiceInputLoading) {
// // 正在进行语音输入,输入框应该显示原有文本加上语音输入的。
// setUserInput(userInput + voiceInputText);
// } else {
// // 但是语音输入结束,应该清理多余字符。
// console.log('end', userInput, voiceInputText)
// }
//
// // eslint-disable-next-line react-hooks/exhaustive-deps
// }, [voiceInputLoading, voiceInputText]);
return (
<div className={styles.chat} key={session.id}>
<div className="window-header" data-tauri-drag-region>
@ -1688,12 +1706,16 @@ function _Chat() {
})}
</div>
)}
<IconButton
icon={<SendWhiteIcon />}
text={Locale.Chat.Send}
type="primary"
onClick={() => doSubmit(userInput)}
<div className={styles["chat-input-send-area"]}>
<VoiceInput
// voiceInputText={voiceInputText}
// setVoiceInputText={setVoiceInputText}
// voiceInputLoading={voiceInputLoading}
// setVoiceInputLoading={setVoiceInputLoading}
userInput={userInput}
setUserInput={setUserInput}
/>
</div>
<IconButton
icon={<SendWhiteIcon />}
text={Locale.Chat.Send}

View File

@ -1,82 +1,172 @@
"use client";
// "use client";
import { Button, Input, Space } from "antd";
import { useEffect, useMemo, useRef, useState } from "react";
import {
Dispatch,
SetStateAction,
useEffect,
useMemo,
useRef,
useState,
} from "react";
import { AudioOutlined, LoadingOutlined } from "@ant-design/icons";
import * as ms_audio_sdk from "microsoft-cognitiveservices-speech-sdk";
import {
Recognizer,
SpeechRecognitionCanceledEventArgs,
SpeechRecognitionEventArgs,
SpeechRecognitionResult,
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/src/sdk/Exports";
import { useAccessStore } from "@/app/store";
export default function VoiceInput() {
const [userInput, setUserInput] = useState("");
const [loading, setLoading] = useState(false);
const recognition = useRef(null);
interface VoiceInputInterface {
userInput: string;
setUserInput: Dispatch<SetStateAction<string>>;
}
const lastLength = useRef(0);
// @ts-ignore
export default function VoiceInput({
userInput,
setUserInput,
}: VoiceInputInterface) {
const [voiceInputText, setVoiceInputText] = useState("");
const [voiceInputLoading, setVoiceInputLoading] = useState(false);
// const recognition = useRef(null);
const recognizer = useRef<ms_audio_sdk.SpeechRecognizer | undefined>();
const [tempUserInput, setTempUserInput] = useState("");
const accessStore = useAccessStore();
// const lastLength = useRef(0);
useEffect(() => {
if ("webkitSpeechRecognition" in window) {
if (recognition.current === null) {
recognition.current = new window.webkitSpeechRecognition();
// useEffect(() => {
//
// function onresult(event: any) {
// // 这个事件会把前面识别的结果都返回回来,所以需要取最后一个识别结果
// const length = event.results.length;
// // 没有新的识别结果的时候,事件也会触发,所以这里判断一下如果没有新的识别结果,就不取最后一个识别结果了。
// if (lastLength.current === length) {
// return;
// }
//
// lastLength.current = length;
//
// console.log(event.results);
//
// // 获取最后一个识别结果
// const transcript = event.results[length - 1]?.[0]?.transcript;
//
// // 将最后一个识别结果添加到文本
// if (transcript) {
// setVoiceInputText((voiceInputText) => voiceInputText + transcript);
// }
// }
//
// }, []);
function onRecognizedResult(result: SpeechRecognitionResult) {
// setVoiceInputText("");
setVoiceInputText(`${result.text}`);
let intentJson = result.properties.getProperty(
ms_audio_sdk.PropertyId.LanguageUnderstandingServiceResponse_JsonResult,
);
if (intentJson) {
setVoiceInputText(voiceInputText + `${intentJson}`);
}
} else {
console.error("此浏览器不支持webkitSpeechRecognition。");
return;
}
if (!recognition.current) return;
// 设置语言
recognition.current.lang = "zh";
// 开启连续识别
recognition.current.continuous = true;
// 开启实时识别
recognition.current.interimResults = true;
function onresult(event: any) {
// 这个事件会把前面识别的结果都返回回来,所以需要取最后一个识别结果
const length = event.results.length;
// 没有新的识别结果的时候,事件也会触发,所以这里判断一下如果没有新的识别结果,就不取最后一个识别结果了。
if (lastLength.current === length) {
// setTempUserInput("");
console.log("3333", tempUserInput, "2", voiceInputText);
// if (result?.translations) {
// let resultJson = JSON.parse(result.json);
// resultJson['privTranslationPhrase']['Translation']['Translations'].forEach(
// function (translation: { Language: any; Text: any; }) {
// setVoiceInputText(voiceInputText + ` [${translation.Language}] ${translation.Text}\r\n`);
// });
// }
}
function onCanceled(
sender: Recognizer,
event: SpeechRecognitionCanceledEventArgs,
) {
console.log(event);
// 展示取消事件
// statusDiv.innerHTML += "(cancel) Reason: " + ms_audio_sdk.CancellationReason[event.reason];
// if (event.reason === ms_audio_sdk.CancellationReason.Error) {
// statusDiv.innerHTML += ": " + event.errorDetails;
// }
// statusDiv.innerHTML += "\r\n";
}
function onRecognizing(
sender: Recognizer,
event: SpeechRecognitionEventArgs,
) {
let result = event.result;
setUserInput(
tempUserInput +
voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
`${result.text} [...]`,
);
setVoiceInputText(
voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
`${result.text} [...]`,
);
}
const startRecognition = () => {
if (voiceInputLoading) {
recognizer.current?.close();
setVoiceInputLoading(false);
// setVoiceInputText("");
// setUserInput(tempUserInput);
return;
}
lastLength.current = length;
setVoiceInputLoading(true);
setTempUserInput(userInput); // 开始的时候拷贝一份用于复原
setVoiceInputText("");
console.log(event.results);
// 获取最后一个识别结果
const transcript = event.results[length - 1]?.[0]?.transcript;
// 将最后一个识别结果添加到文本
if (transcript) {
setUserInput((userInput) => userInput + transcript);
}
}
// 监听语音识别结果
recognition.current.addEventListener("result", onresult);
return () => {
if (recognition.current) {
recognition.current.removeEventListener("result", onresult);
}
const speechConfig = ms_audio_sdk.SpeechConfig.fromSubscription(
accessStore.azureVoiceKey,
"eastasia",
);
const audioConfig = ms_audio_sdk.AudioConfig.fromDefaultMicrophoneInput();
speechConfig.speechRecognitionLanguage = "zh-CN";
speechConfig.setProperty(
ms_audio_sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs,
"2500",
);
recognizer.current = new ms_audio_sdk.SpeechRecognizer(
speechConfig,
audioConfig,
);
recognizer.current.recognizing = onRecognizing; // 自定义分段显示
recognizer.current.canceled = onCanceled; // 自定义中断
recognizer.current.recognizeOnceAsync(
(result) => {
// onRecognizedResult(result);
setVoiceInputText(`${result.text}`);
console.log("3333", tempUserInput, "2", voiceInputText);
setUserInput(tempUserInput + voiceInputText + `${result.text}`);
// setVoiceInputText(result.text);
console.log("result", result.text);
setVoiceInputLoading(false);
// recognizer.close();
},
(err) => {
console.error("Recognition error: ", err); // 错误处理
setVoiceInputLoading(false);
},
);
};
}, []);
function click() {
if (loading) {
recognition.current.stop();
setLoading(false);
return;
}
setLoading(true);
lastLength.current = 0;
recognition.current.start();
}
const icon = useMemo(() => {
if (loading) {
if (voiceInputLoading) {
return (
<LoadingOutlined
style={{
fontSize: 16,
color: "#ffffff",
color: "rgb(234, 149, 24)",
}}
/>
);
@ -85,17 +175,17 @@ export default function VoiceInput() {
<AudioOutlined
style={{
fontSize: 16,
color: "#ffffff",
color: "rgb(234, 149, 24)",
}}
/>
);
}, [loading]);
}, [voiceInputLoading]);
return (
<div style={{ textAlign: "center", marginTop: 200 }}>
<Space.Compact style={{ width: 600 }}>
<Input size="large" value={userInput} />
<Button size="large" type="primary" onClick={click} icon={icon} />
<div>
<Space.Compact>
{/*<Input value={voiceInputText} />*/}
<Button type="text" onClick={startRecognition} icon={icon} />
</Space.Compact>
</div>
);

View File

@ -27,6 +27,7 @@ declare global {
AZURE_URL?: string; // https://{azure-url}/openai/deployments/{deploy-name}
AZURE_API_KEY?: string;
AZURE_API_VERSION?: string;
AZURE_VOICE_KEY?: string;
// google only
GOOGLE_API_KEY?: string;
@ -93,6 +94,7 @@ export const getServerSideConfig = () => {
azureUrl: process.env.AZURE_URL ?? "",
azureApiKey: process.env.AZURE_API_KEY ?? "",
azureApiVersion: process.env.AZURE_API_VERSION ?? "",
azureVoiceKey: process.env.AZURE_VOICE_KEY ?? "",
isGoogle,
googleApiKey: process.env.GOOGLE_API_KEY,

View File

@ -107,7 +107,7 @@ Latex inline: $x^2$
Latex block: $$e=mc^2$$
`;
export const SUMMARIZE_MODEL = "gpt-3.5-turbo-1106";
export const SUMMARIZE_MODEL = "gpt-3.5-turbo-0125";
export const GEMINI_SUMMARIZE_MODEL = "gemini-pro";
export const KnowledgeCutOffDate: Record<string, string> = {
@ -132,8 +132,8 @@ export const DEFAULT_MODELS = [
// available: false,
// },
{
name: "gpt-3.5-turbo-1106",
describe: "GPT-3,最快,,最便宜",
name: "gpt-3.5-turbo-0125",
describe: "GPT-3,最快,效果一般,最便宜",
available: true,
provider: {
id: "openai",

View File

@ -58,7 +58,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -84,7 +84,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -110,7 +110,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -136,7 +136,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -162,7 +162,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -188,7 +188,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -214,7 +214,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -240,7 +240,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -272,7 +272,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 0.5,
max_tokens: 2000,
presence_penalty: 0,
@ -298,7 +298,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -331,7 +331,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -364,7 +364,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,
@ -422,7 +422,7 @@ export const CN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 1,
max_tokens: 2000,
presence_penalty: 0,

View File

@ -86,7 +86,7 @@ export const EN_MASKS: BuiltinMask[] = [
},
],
modelConfig: {
model: "gpt-3.5-turbo-1106",
model: "gpt-3.5-turbo-0125",
temperature: 0.5,
max_tokens: 2000,
presence_penalty: 0,

View File

@ -30,6 +30,7 @@ const DEFAULT_ACCESS_STATE = {
azureUrl: "",
azureApiKey: "",
azureApiVersion: "2023-05-15",
azureVoiceKey: "",
// google ai studio
googleUrl: "",

View File

@ -51,7 +51,7 @@ export const DEFAULT_CONFIG = {
dontUseModel: DISABLE_MODELS,
modelConfig: {
model: "gpt-3.5-turbo-1106" as ModelType,
model: "gpt-3.5-turbo-0125" as ModelType,
temperature: 0.8,
top_p: 1,
max_tokens: 2000,
@ -137,7 +137,7 @@ export const useAppConfig = createPersistStore(
}),
{
name: StoreKey.Config,
version: 3.8991,
version: 3.8992,
migrate(persistedState, version) {
const state = persistedState as ChatConfig;
@ -168,7 +168,7 @@ export const useAppConfig = createPersistStore(
if (version < 3.8) {
state.lastUpdate = Date.now();
}
if (version < 3.8991) {
if (version < 3.8992) {
state.lastUpdate = Date.now();
return { ...DEFAULT_CONFIG };
}