mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-10-01 23:56:39 +08:00
增加语音输入
This commit is contained in:
parent
7ffabb77f9
commit
8620df325d
@ -1,16 +0,0 @@
|
||||
import { Flex } from "antd";
|
||||
import VoiceInput from "@/app/components/voice-input";
|
||||
|
||||
export default async function UsersPage() {
|
||||
// const users: User[] = await getData();
|
||||
|
||||
// console.log("data", data);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Flex gap="middle" vertical>
|
||||
<VoiceInput />
|
||||
</Flex>
|
||||
</>
|
||||
);
|
||||
}
|
@ -643,7 +643,7 @@
|
||||
background-color: var(--white);
|
||||
color: var(--black);
|
||||
font-family: inherit;
|
||||
padding: 10px 90px 10px 14px;
|
||||
padding: 10px 120px 10px 14px;
|
||||
resize: none;
|
||||
outline: none;
|
||||
box-sizing: border-box;
|
||||
@ -661,6 +661,14 @@
|
||||
bottom: 32px;
|
||||
}
|
||||
|
||||
.chat-input-send-area {
|
||||
color: white;
|
||||
|
||||
position: absolute;
|
||||
right: 100px;
|
||||
bottom: 32px;
|
||||
}
|
||||
|
||||
@media only screen and (max-width: 600px) {
|
||||
.chat-input {
|
||||
font-size: 16px;
|
||||
|
@ -98,11 +98,14 @@ import { ChatCommandPrefix, useChatCommand, useCommand } from "../command";
|
||||
import { prettyObject } from "../utils/format";
|
||||
import { ExportMessageModal } from "./exporter";
|
||||
import { getClientConfig } from "../config/client";
|
||||
import { Button } from "emoji-picker-react/src/components/atoms/Button";
|
||||
import Image from "next/image";
|
||||
import { useAllModels } from "../utils/hooks";
|
||||
import { MultimodalContent } from "../client/api";
|
||||
import { getTokenLength } from "@/lib/utils";
|
||||
import VoiceInput from "@/app/components/voice-input";
|
||||
|
||||
// const VoiceInput = dynamic(
|
||||
// () => import('@/app/components/voice-input'), { ssr: false });
|
||||
|
||||
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
|
||||
loading: () => <LoadingIcon />,
|
||||
@ -1251,6 +1254,21 @@ function _Chat() {
|
||||
setAttachImages(images);
|
||||
}
|
||||
|
||||
// const [ voiceInputText, setVoiceInputText ] = useState("");
|
||||
// const [ voiceInputLoading, setVoiceInputLoading ] = useState(false);
|
||||
|
||||
// useEffect(() => {
|
||||
// if (voiceInputLoading) {
|
||||
// // 正在进行语音输入,输入框应该显示原有文本加上语音输入的。
|
||||
// setUserInput(userInput + voiceInputText);
|
||||
// } else {
|
||||
// // 但是语音输入结束,应该清理多余字符。
|
||||
// console.log('end', userInput, voiceInputText)
|
||||
// }
|
||||
//
|
||||
// // eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
// }, [voiceInputLoading, voiceInputText]);
|
||||
|
||||
return (
|
||||
<div className={styles.chat} key={session.id}>
|
||||
<div className="window-header" data-tauri-drag-region>
|
||||
@ -1688,12 +1706,16 @@ function _Chat() {
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
<IconButton
|
||||
icon={<SendWhiteIcon />}
|
||||
text={Locale.Chat.Send}
|
||||
type="primary"
|
||||
onClick={() => doSubmit(userInput)}
|
||||
/>
|
||||
<div className={styles["chat-input-send-area"]}>
|
||||
<VoiceInput
|
||||
// voiceInputText={voiceInputText}
|
||||
// setVoiceInputText={setVoiceInputText}
|
||||
// voiceInputLoading={voiceInputLoading}
|
||||
// setVoiceInputLoading={setVoiceInputLoading}
|
||||
userInput={userInput}
|
||||
setUserInput={setUserInput}
|
||||
/>
|
||||
</div>
|
||||
<IconButton
|
||||
icon={<SendWhiteIcon />}
|
||||
text={Locale.Chat.Send}
|
||||
|
@ -1,82 +1,172 @@
|
||||
"use client";
|
||||
// "use client";
|
||||
import { Button, Input, Space } from "antd";
|
||||
import { useEffect, useMemo, useRef, useState } from "react";
|
||||
import {
|
||||
Dispatch,
|
||||
SetStateAction,
|
||||
useEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
} from "react";
|
||||
import { AudioOutlined, LoadingOutlined } from "@ant-design/icons";
|
||||
import * as ms_audio_sdk from "microsoft-cognitiveservices-speech-sdk";
|
||||
import {
|
||||
Recognizer,
|
||||
SpeechRecognitionCanceledEventArgs,
|
||||
SpeechRecognitionEventArgs,
|
||||
SpeechRecognitionResult,
|
||||
} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/src/sdk/Exports";
|
||||
import { useAccessStore } from "@/app/store";
|
||||
|
||||
export default function VoiceInput() {
|
||||
const [userInput, setUserInput] = useState("");
|
||||
const [loading, setLoading] = useState(false);
|
||||
const recognition = useRef(null);
|
||||
interface VoiceInputInterface {
|
||||
userInput: string;
|
||||
setUserInput: Dispatch<SetStateAction<string>>;
|
||||
}
|
||||
|
||||
const lastLength = useRef(0);
|
||||
// @ts-ignore
|
||||
export default function VoiceInput({
|
||||
userInput,
|
||||
setUserInput,
|
||||
}: VoiceInputInterface) {
|
||||
const [voiceInputText, setVoiceInputText] = useState("");
|
||||
const [voiceInputLoading, setVoiceInputLoading] = useState(false);
|
||||
// const recognition = useRef(null);
|
||||
const recognizer = useRef<ms_audio_sdk.SpeechRecognizer | undefined>();
|
||||
const [tempUserInput, setTempUserInput] = useState("");
|
||||
const accessStore = useAccessStore();
|
||||
// const lastLength = useRef(0);
|
||||
|
||||
useEffect(() => {
|
||||
if ("webkitSpeechRecognition" in window) {
|
||||
if (recognition.current === null) {
|
||||
recognition.current = new window.webkitSpeechRecognition();
|
||||
}
|
||||
} else {
|
||||
console.error("此浏览器不支持webkitSpeechRecognition。");
|
||||
return;
|
||||
}
|
||||
if (!recognition.current) return;
|
||||
// 设置语言
|
||||
recognition.current.lang = "zh";
|
||||
// 开启连续识别
|
||||
recognition.current.continuous = true;
|
||||
// 开启实时识别
|
||||
recognition.current.interimResults = true;
|
||||
// useEffect(() => {
|
||||
//
|
||||
// function onresult(event: any) {
|
||||
// // 这个事件会把前面识别的结果都返回回来,所以需要取最后一个识别结果
|
||||
// const length = event.results.length;
|
||||
// // 没有新的识别结果的时候,事件也会触发,所以这里判断一下如果没有新的识别结果,就不取最后一个识别结果了。
|
||||
// if (lastLength.current === length) {
|
||||
// return;
|
||||
// }
|
||||
//
|
||||
// lastLength.current = length;
|
||||
//
|
||||
// console.log(event.results);
|
||||
//
|
||||
// // 获取最后一个识别结果
|
||||
// const transcript = event.results[length - 1]?.[0]?.transcript;
|
||||
//
|
||||
// // 将最后一个识别结果添加到文本
|
||||
// if (transcript) {
|
||||
// setVoiceInputText((voiceInputText) => voiceInputText + transcript);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// }, []);
|
||||
|
||||
function onresult(event: any) {
|
||||
// 这个事件会把前面识别的结果都返回回来,所以需要取最后一个识别结果
|
||||
const length = event.results.length;
|
||||
// 没有新的识别结果的时候,事件也会触发,所以这里判断一下如果没有新的识别结果,就不取最后一个识别结果了。
|
||||
if (lastLength.current === length) {
|
||||
return;
|
||||
}
|
||||
function onRecognizedResult(result: SpeechRecognitionResult) {
|
||||
// setVoiceInputText("");
|
||||
setVoiceInputText(`${result.text}`);
|
||||
|
||||
lastLength.current = length;
|
||||
|
||||
console.log(event.results);
|
||||
|
||||
// 获取最后一个识别结果
|
||||
const transcript = event.results[length - 1]?.[0]?.transcript;
|
||||
|
||||
// 将最后一个识别结果添加到文本
|
||||
if (transcript) {
|
||||
setUserInput((userInput) => userInput + transcript);
|
||||
}
|
||||
let intentJson = result.properties.getProperty(
|
||||
ms_audio_sdk.PropertyId.LanguageUnderstandingServiceResponse_JsonResult,
|
||||
);
|
||||
if (intentJson) {
|
||||
setVoiceInputText(voiceInputText + `${intentJson}`);
|
||||
}
|
||||
|
||||
// 监听语音识别结果
|
||||
recognition.current.addEventListener("result", onresult);
|
||||
// setTempUserInput("");
|
||||
console.log("3333", tempUserInput, "2", voiceInputText);
|
||||
|
||||
return () => {
|
||||
if (recognition.current) {
|
||||
recognition.current.removeEventListener("result", onresult);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
// if (result?.translations) {
|
||||
// let resultJson = JSON.parse(result.json);
|
||||
// resultJson['privTranslationPhrase']['Translation']['Translations'].forEach(
|
||||
// function (translation: { Language: any; Text: any; }) {
|
||||
// setVoiceInputText(voiceInputText + ` [${translation.Language}] ${translation.Text}\r\n`);
|
||||
// });
|
||||
// }
|
||||
}
|
||||
function onCanceled(
|
||||
sender: Recognizer,
|
||||
event: SpeechRecognitionCanceledEventArgs,
|
||||
) {
|
||||
console.log(event);
|
||||
|
||||
function click() {
|
||||
if (loading) {
|
||||
recognition.current.stop();
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
// 展示取消事件
|
||||
// statusDiv.innerHTML += "(cancel) Reason: " + ms_audio_sdk.CancellationReason[event.reason];
|
||||
// if (event.reason === ms_audio_sdk.CancellationReason.Error) {
|
||||
// statusDiv.innerHTML += ": " + event.errorDetails;
|
||||
// }
|
||||
// statusDiv.innerHTML += "\r\n";
|
||||
}
|
||||
function onRecognizing(
|
||||
sender: Recognizer,
|
||||
event: SpeechRecognitionEventArgs,
|
||||
) {
|
||||
let result = event.result;
|
||||
setUserInput(
|
||||
tempUserInput +
|
||||
voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
|
||||
`${result.text} [...]`,
|
||||
);
|
||||
|
||||
lastLength.current = 0;
|
||||
recognition.current.start();
|
||||
setVoiceInputText(
|
||||
voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
|
||||
`${result.text} [...]`,
|
||||
);
|
||||
}
|
||||
|
||||
const startRecognition = () => {
|
||||
if (voiceInputLoading) {
|
||||
recognizer.current?.close();
|
||||
setVoiceInputLoading(false);
|
||||
// setVoiceInputText("");
|
||||
// setUserInput(tempUserInput);
|
||||
return;
|
||||
}
|
||||
|
||||
setVoiceInputLoading(true);
|
||||
setTempUserInput(userInput); // 开始的时候拷贝一份用于复原
|
||||
setVoiceInputText("");
|
||||
|
||||
const speechConfig = ms_audio_sdk.SpeechConfig.fromSubscription(
|
||||
accessStore.azureVoiceKey,
|
||||
"eastasia",
|
||||
);
|
||||
const audioConfig = ms_audio_sdk.AudioConfig.fromDefaultMicrophoneInput();
|
||||
speechConfig.speechRecognitionLanguage = "zh-CN";
|
||||
speechConfig.setProperty(
|
||||
ms_audio_sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs,
|
||||
"2500",
|
||||
);
|
||||
recognizer.current = new ms_audio_sdk.SpeechRecognizer(
|
||||
speechConfig,
|
||||
audioConfig,
|
||||
);
|
||||
recognizer.current.recognizing = onRecognizing; // 自定义分段显示
|
||||
recognizer.current.canceled = onCanceled; // 自定义中断
|
||||
recognizer.current.recognizeOnceAsync(
|
||||
(result) => {
|
||||
// onRecognizedResult(result);
|
||||
setVoiceInputText(`${result.text}`);
|
||||
console.log("3333", tempUserInput, "2", voiceInputText);
|
||||
setUserInput(tempUserInput + voiceInputText + `${result.text}`);
|
||||
// setVoiceInputText(result.text);
|
||||
console.log("result", result.text);
|
||||
setVoiceInputLoading(false);
|
||||
// recognizer.close();
|
||||
},
|
||||
(err) => {
|
||||
console.error("Recognition error: ", err); // 错误处理
|
||||
setVoiceInputLoading(false);
|
||||
},
|
||||
);
|
||||
};
|
||||
|
||||
const icon = useMemo(() => {
|
||||
if (loading) {
|
||||
if (voiceInputLoading) {
|
||||
return (
|
||||
<LoadingOutlined
|
||||
style={{
|
||||
fontSize: 16,
|
||||
color: "#ffffff",
|
||||
color: "rgb(234, 149, 24)",
|
||||
}}
|
||||
/>
|
||||
);
|
||||
@ -85,17 +175,17 @@ export default function VoiceInput() {
|
||||
<AudioOutlined
|
||||
style={{
|
||||
fontSize: 16,
|
||||
color: "#ffffff",
|
||||
color: "rgb(234, 149, 24)",
|
||||
}}
|
||||
/>
|
||||
);
|
||||
}, [loading]);
|
||||
}, [voiceInputLoading]);
|
||||
|
||||
return (
|
||||
<div style={{ textAlign: "center", marginTop: 200 }}>
|
||||
<Space.Compact style={{ width: 600 }}>
|
||||
<Input size="large" value={userInput} />
|
||||
<Button size="large" type="primary" onClick={click} icon={icon} />
|
||||
<div>
|
||||
<Space.Compact>
|
||||
{/*<Input value={voiceInputText} />*/}
|
||||
<Button type="text" onClick={startRecognition} icon={icon} />
|
||||
</Space.Compact>
|
||||
</div>
|
||||
);
|
||||
|
@ -27,6 +27,7 @@ declare global {
|
||||
AZURE_URL?: string; // https://{azure-url}/openai/deployments/{deploy-name}
|
||||
AZURE_API_KEY?: string;
|
||||
AZURE_API_VERSION?: string;
|
||||
AZURE_VOICE_KEY?: string;
|
||||
|
||||
// google only
|
||||
GOOGLE_API_KEY?: string;
|
||||
@ -93,6 +94,7 @@ export const getServerSideConfig = () => {
|
||||
azureUrl: process.env.AZURE_URL ?? "",
|
||||
azureApiKey: process.env.AZURE_API_KEY ?? "",
|
||||
azureApiVersion: process.env.AZURE_API_VERSION ?? "",
|
||||
azureVoiceKey: process.env.AZURE_VOICE_KEY ?? "",
|
||||
|
||||
isGoogle,
|
||||
googleApiKey: process.env.GOOGLE_API_KEY,
|
||||
|
@ -107,7 +107,7 @@ Latex inline: $x^2$
|
||||
Latex block: $$e=mc^2$$
|
||||
`;
|
||||
|
||||
export const SUMMARIZE_MODEL = "gpt-3.5-turbo-1106";
|
||||
export const SUMMARIZE_MODEL = "gpt-3.5-turbo-0125";
|
||||
export const GEMINI_SUMMARIZE_MODEL = "gemini-pro";
|
||||
|
||||
export const KnowledgeCutOffDate: Record<string, string> = {
|
||||
@ -132,8 +132,8 @@ export const DEFAULT_MODELS = [
|
||||
// available: false,
|
||||
// },
|
||||
{
|
||||
name: "gpt-3.5-turbo-1106",
|
||||
describe: "GPT-3,最快,笨,最便宜",
|
||||
name: "gpt-3.5-turbo-0125",
|
||||
describe: "GPT-3,最快,效果一般,最便宜",
|
||||
available: true,
|
||||
provider: {
|
||||
id: "openai",
|
||||
|
@ -58,7 +58,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -84,7 +84,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -110,7 +110,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -136,7 +136,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -162,7 +162,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -188,7 +188,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -214,7 +214,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -240,7 +240,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -272,7 +272,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 0.5,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -298,7 +298,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -331,7 +331,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -364,7 +364,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
@ -422,7 +422,7 @@ export const CN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 1,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
|
@ -86,7 +86,7 @@ export const EN_MASKS: BuiltinMask[] = [
|
||||
},
|
||||
],
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106",
|
||||
model: "gpt-3.5-turbo-0125",
|
||||
temperature: 0.5,
|
||||
max_tokens: 2000,
|
||||
presence_penalty: 0,
|
||||
|
@ -30,6 +30,7 @@ const DEFAULT_ACCESS_STATE = {
|
||||
azureUrl: "",
|
||||
azureApiKey: "",
|
||||
azureApiVersion: "2023-05-15",
|
||||
azureVoiceKey: "",
|
||||
|
||||
// google ai studio
|
||||
googleUrl: "",
|
||||
|
@ -51,7 +51,7 @@ export const DEFAULT_CONFIG = {
|
||||
dontUseModel: DISABLE_MODELS,
|
||||
|
||||
modelConfig: {
|
||||
model: "gpt-3.5-turbo-1106" as ModelType,
|
||||
model: "gpt-3.5-turbo-0125" as ModelType,
|
||||
temperature: 0.8,
|
||||
top_p: 1,
|
||||
max_tokens: 2000,
|
||||
@ -137,7 +137,7 @@ export const useAppConfig = createPersistStore(
|
||||
}),
|
||||
{
|
||||
name: StoreKey.Config,
|
||||
version: 3.8991,
|
||||
version: 3.8992,
|
||||
migrate(persistedState, version) {
|
||||
const state = persistedState as ChatConfig;
|
||||
|
||||
@ -168,7 +168,7 @@ export const useAppConfig = createPersistStore(
|
||||
if (version < 3.8) {
|
||||
state.lastUpdate = Date.now();
|
||||
}
|
||||
if (version < 3.8991) {
|
||||
if (version < 3.8992) {
|
||||
state.lastUpdate = Date.now();
|
||||
return { ...DEFAULT_CONFIG };
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user