From 8620df325d86103f2f9331ba2a2d5a9aa27e49c1 Mon Sep 17 00:00:00 2001
From: sijinhui <sijinhui@qq.com>
Date: Thu, 28 Mar 2024 00:48:40 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=AF=AD=E9=9F=B3=E8=BE=93?=
 =?UTF-8?q?=E5=85=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/app/(admin)/admin/t/page.tsx |  16 ---
 app/components/chat.module.scss  |  10 +-
 app/components/chat.tsx          |  36 ++++-
 app/components/voice-input.tsx   | 222 ++++++++++++++++++++++---------
 app/config/server.ts             |   2 +
 app/constant.ts                  |   6 +-
 app/masks/cn.ts                  |  26 ++--
 app/masks/en.ts                  |   2 +-
 app/store/access.ts              |   1 +
 app/store/config.ts              |   6 +-
 10 files changed, 217 insertions(+), 110 deletions(-)
 delete mode 100644 app/app/(admin)/admin/t/page.tsx
diff --git a/app/app/(admin)/admin/t/page.tsx b/app/app/(admin)/admin/t/page.tsx
deleted file mode 100644
index 1efac7b9f..000000000
--- a/app/app/(admin)/admin/t/page.tsx
+++ /dev/null
@@ -1,16 +0,0 @@
-import { Flex } from "antd";
-import VoiceInput from "@/app/components/voice-input";
-
-export default async function UsersPage() {
-  // const users: User[] = await getData();
-
-  // console.log("data", data);
-
-  return (
-    <>
-      <Flex gap="middle" vertical>
-        <VoiceInput />
-      </Flex>
-    </>
-  );
-}
diff --git a/app/components/chat.module.scss b/app/components/chat.module.scss
index 500748a9a..2707903be 100644
--- a/app/components/chat.module.scss
+++ b/app/components/chat.module.scss
@@ -643,7 +643,7 @@
   background-color: var(--white);
   color: var(--black);
   font-family: inherit;
-  padding: 10px 90px 10px 14px;
+  padding: 10px 120px 10px 14px;
   resize: none;
   outline: none;
   box-sizing: border-box;
@@ -661,6 +661,14 @@
   bottom: 32px;
 }
 
+.chat-input-send-area {
+  color: white;
+
+  position: absolute;
+  right: 100px;
+  bottom: 32px;
+}
+
 @media only screen and (max-width: 600px) {
   .chat-input {
     font-size: 16px;
diff --git a/app/components/chat.tsx b/app/components/chat.tsx
index ea32eceff..400558077 100644
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@@ -98,11 +98,14 @@ import { ChatCommandPrefix, useChatCommand, useCommand } from "../command";
 import { prettyObject } from "../utils/format";
 import { ExportMessageModal } from "./exporter";
 import { getClientConfig } from "../config/client";
-import { Button } from "emoji-picker-react/src/components/atoms/Button";
 import Image from "next/image";
 import { useAllModels } from "../utils/hooks";
 import { MultimodalContent } from "../client/api";
 import { getTokenLength } from "@/lib/utils";
+import VoiceInput from "@/app/components/voice-input";
+
+// const VoiceInput = dynamic(
+//     () => import('@/app/components/voice-input'), { ssr: false });
 
 const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
   loading: () => <LoadingIcon />,
@@ -1251,6 +1254,21 @@ function _Chat() {
     setAttachImages(images);
   }
 
+  // const [ voiceInputText, setVoiceInputText ] = useState("");
+  // const [ voiceInputLoading, setVoiceInputLoading ] = useState(false);
+
+  // useEffect(() => {
+  //   if (voiceInputLoading) {
+  //     // 正在进行语音输入，输入框应该显示原有文本加上语音输入的。
+  //     setUserInput(userInput + voiceInputText);
+  //   } else {
+  //     // 但是语音输入结束，应该清理多余字符。
+  //     console.log('end', userInput, voiceInputText)
+  //   }
+  //
+  //   // eslint-disable-next-line react-hooks/exhaustive-deps
+  // }, [voiceInputLoading, voiceInputText]);
+
   return (
     <div className={styles.chat} key={session.id}>
       <div className="window-header" data-tauri-drag-region>
@@ -1688,12 +1706,16 @@ function _Chat() {
               })}
             </div>
           )}
-          <IconButton
-            icon={<SendWhiteIcon />}
-            text={Locale.Chat.Send}
-            type="primary"
-            onClick={() => doSubmit(userInput)}
-          />
+          <div className={styles["chat-input-send-area"]}>
+            <VoiceInput
+              // voiceInputText={voiceInputText}
+              // setVoiceInputText={setVoiceInputText}
+              // voiceInputLoading={voiceInputLoading}
+              // setVoiceInputLoading={setVoiceInputLoading}
+              userInput={userInput}
+              setUserInput={setUserInput}
+            />
+          </div>
           <IconButton
             icon={<SendWhiteIcon />}
             text={Locale.Chat.Send}
diff --git a/app/components/voice-input.tsx b/app/components/voice-input.tsx
index ab82dfdcf..11f05908e 100644
--- a/app/components/voice-input.tsx
+++ b/app/components/voice-input.tsx
@@ -1,82 +1,172 @@
-"use client";
+// "use client";
 import { Button, Input, Space } from "antd";
-import { useEffect, useMemo, useRef, useState } from "react";
+import {
+  Dispatch,
+  SetStateAction,
+  useEffect,
+  useMemo,
+  useRef,
+  useState,
+} from "react";
 import { AudioOutlined, LoadingOutlined } from "@ant-design/icons";
+import * as ms_audio_sdk from "microsoft-cognitiveservices-speech-sdk";
+import {
+  Recognizer,
+  SpeechRecognitionCanceledEventArgs,
+  SpeechRecognitionEventArgs,
+  SpeechRecognitionResult,
+} from "microsoft-cognitiveservices-speech-sdk/distrib/lib/src/sdk/Exports";
+import { useAccessStore } from "@/app/store";
 
-export default function VoiceInput() {
-  const [userInput, setUserInput] = useState("");
-  const [loading, setLoading] = useState(false);
-  const recognition = useRef(null);
+interface VoiceInputInterface {
+  userInput: string;
+  setUserInput: Dispatch<SetStateAction<string>>;
+}
 
-  const lastLength = useRef(0);
+// @ts-ignore
+export default function VoiceInput({
+  userInput,
+  setUserInput,
+}: VoiceInputInterface) {
+  const [voiceInputText, setVoiceInputText] = useState("");
+  const [voiceInputLoading, setVoiceInputLoading] = useState(false);
+  // const recognition = useRef(null);
+  const recognizer = useRef<ms_audio_sdk.SpeechRecognizer | undefined>();
+  const [tempUserInput, setTempUserInput] = useState("");
+  const accessStore = useAccessStore();
+  // const lastLength = useRef(0);
 
-  useEffect(() => {
-    if ("webkitSpeechRecognition" in window) {
-      if (recognition.current === null) {
-        recognition.current = new window.webkitSpeechRecognition();
-      }
-    } else {
-      console.error("此浏览器不支持webkitSpeechRecognition。");
-      return;
-    }
-    if (!recognition.current) return;
-    // 设置语言
-    recognition.current.lang = "zh";
-    // 开启连续识别
-    recognition.current.continuous = true;
-    // 开启实时识别
-    recognition.current.interimResults = true;
+  // useEffect(() => {
+  //
+  //   function onresult(event: any) {
+  //     // 这个事件会把前面识别的结果都返回回来，所以需要取最后一个识别结果
+  //     const length = event.results.length;
+  //     // 没有新的识别结果的时候，事件也会触发，所以这里判断一下如果没有新的识别结果，就不取最后一个识别结果了。
+  //     if (lastLength.current === length) {
+  //       return;
+  //     }
+  //
+  //     lastLength.current = length;
+  //
+  //     console.log(event.results);
+  //
+  //     // 获取最后一个识别结果
+  //     const transcript = event.results[length - 1]?.[0]?.transcript;
+  //
+  //     // 将最后一个识别结果添加到文本
+  //     if (transcript) {
+  //       setVoiceInputText((voiceInputText) => voiceInputText + transcript);
+  //     }
+  //   }
+  //
+  // }, []);
 
-    function onresult(event: any) {
-      // 这个事件会把前面识别的结果都返回回来，所以需要取最后一个识别结果
-      const length = event.results.length;
-      // 没有新的识别结果的时候，事件也会触发，所以这里判断一下如果没有新的识别结果，就不取最后一个识别结果了。
-      if (lastLength.current === length) {
-        return;
-      }
+  function onRecognizedResult(result: SpeechRecognitionResult) {
+    // setVoiceInputText("");
+    setVoiceInputText(`${result.text}`);
 
-      lastLength.current = length;
-
-      console.log(event.results);
-
-      // 获取最后一个识别结果
-      const transcript = event.results[length - 1]?.[0]?.transcript;
-
-      // 将最后一个识别结果添加到文本
-      if (transcript) {
-        setUserInput((userInput) => userInput + transcript);
-      }
+    let intentJson = result.properties.getProperty(
+      ms_audio_sdk.PropertyId.LanguageUnderstandingServiceResponse_JsonResult,
+    );
+    if (intentJson) {
+      setVoiceInputText(voiceInputText + `${intentJson}`);
     }
 
-    // 监听语音识别结果
-    recognition.current.addEventListener("result", onresult);
+    // setTempUserInput("");
+    console.log("3333", tempUserInput, "2", voiceInputText);
 
-    return () => {
-      if (recognition.current) {
-        recognition.current.removeEventListener("result", onresult);
-      }
-    };
-  }, []);
+    // if (result?.translations) {
+    //   let resultJson = JSON.parse(result.json);
+    //   resultJson['privTranslationPhrase']['Translation']['Translations'].forEach(
+    //       function (translation: { Language: any; Text: any; }) {
+    //         setVoiceInputText(voiceInputText + ` [${translation.Language}] ${translation.Text}\r\n`);
+    //       });
+    // }
+  }
+  function onCanceled(
+    sender: Recognizer,
+    event: SpeechRecognitionCanceledEventArgs,
+  ) {
+    console.log(event);
 
-  function click() {
-    if (loading) {
-      recognition.current.stop();
-      setLoading(false);
-      return;
-    }
-    setLoading(true);
+    // 展示取消事件
+    // statusDiv.innerHTML += "(cancel) Reason: " + ms_audio_sdk.CancellationReason[event.reason];
+    // if (event.reason === ms_audio_sdk.CancellationReason.Error) {
+    //   statusDiv.innerHTML += ": " + event.errorDetails;
+    // }
+    // statusDiv.innerHTML += "\r\n";
+  }
+  function onRecognizing(
+    sender: Recognizer,
+    event: SpeechRecognitionEventArgs,
+  ) {
+    let result = event.result;
+    setUserInput(
+      tempUserInput +
+        voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
+        `${result.text} [...]`,
+    );
 
-    lastLength.current = 0;
-    recognition.current.start();
+    setVoiceInputText(
+      voiceInputText.replace(/(.*)(^|[\r\n]+).*\[\.\.\.][\r\n]+/, "$1$2") +
+        `${result.text} [...]`,
+    );
   }
 
+  const startRecognition = () => {
+    if (voiceInputLoading) {
+      recognizer.current?.close();
+      setVoiceInputLoading(false);
+      // setVoiceInputText("");
+      // setUserInput(tempUserInput);
+      return;
+    }
+
+    setVoiceInputLoading(true);
+    setTempUserInput(userInput); // 开始的时候拷贝一份用于复原
+    setVoiceInputText("");
+
+    const speechConfig = ms_audio_sdk.SpeechConfig.fromSubscription(
+      accessStore.azureVoiceKey,
+      "eastasia",
+    );
+    const audioConfig = ms_audio_sdk.AudioConfig.fromDefaultMicrophoneInput();
+    speechConfig.speechRecognitionLanguage = "zh-CN";
+    speechConfig.setProperty(
+      ms_audio_sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs,
+      "2500",
+    );
+    recognizer.current = new ms_audio_sdk.SpeechRecognizer(
+      speechConfig,
+      audioConfig,
+    );
+    recognizer.current.recognizing = onRecognizing; // 自定义分段显示
+    recognizer.current.canceled = onCanceled; // 自定义中断
+    recognizer.current.recognizeOnceAsync(
+      (result) => {
+        // onRecognizedResult(result);
+        setVoiceInputText(`${result.text}`);
+        console.log("3333", tempUserInput, "2", voiceInputText);
+        setUserInput(tempUserInput + voiceInputText + `${result.text}`);
+        // setVoiceInputText(result.text);
+        console.log("result", result.text);
+        setVoiceInputLoading(false);
+        // recognizer.close();
+      },
+      (err) => {
+        console.error("Recognition error: ", err); // 错误处理
+        setVoiceInputLoading(false);
+      },
+    );
+  };
+
   const icon = useMemo(() => {
-    if (loading) {
+    if (voiceInputLoading) {
       return (
         <LoadingOutlined
           style={{
             fontSize: 16,
-            color: "#ffffff",
+            color: "rgb(234, 149, 24)",
           }}
         />
       );
@@ -85,17 +175,17 @@ export default function VoiceInput() {
       <AudioOutlined
         style={{
           fontSize: 16,
-          color: "#ffffff",
+          color: "rgb(234, 149, 24)",
         }}
       />
     );
-  }, [loading]);
+  }, [voiceInputLoading]);
 
   return (
-    <div style={{ textAlign: "center", marginTop: 200 }}>
-      <Space.Compact style={{ width: 600 }}>
-        <Input size="large" value={userInput} />
-        <Button size="large" type="primary" onClick={click} icon={icon} />
+    <div>
+      <Space.Compact>
+        {/*<Input value={voiceInputText} />*/}
+        <Button type="text" onClick={startRecognition} icon={icon} />
       </Space.Compact>
     </div>
   );
diff --git a/app/config/server.ts b/app/config/server.ts
index eea8a18d6..72998cc17 100644
--- a/app/config/server.ts
+++ b/app/config/server.ts
@@ -27,6 +27,7 @@ declare global {
       AZURE_URL?: string; // https://{azure-url}/openai/deployments/{deploy-name}
       AZURE_API_KEY?: string;
       AZURE_API_VERSION?: string;
+      AZURE_VOICE_KEY?: string;
 
       // google only
       GOOGLE_API_KEY?: string;
@@ -93,6 +94,7 @@ export const getServerSideConfig = () => {
     azureUrl: process.env.AZURE_URL ?? "",
     azureApiKey: process.env.AZURE_API_KEY ?? "",
     azureApiVersion: process.env.AZURE_API_VERSION ?? "",
+    azureVoiceKey: process.env.AZURE_VOICE_KEY ?? "",
 
     isGoogle,
     googleApiKey: process.env.GOOGLE_API_KEY,
diff --git a/app/constant.ts b/app/constant.ts
index aa0b6be7e..dce78c587 100644
--- a/app/constant.ts
+++ b/app/constant.ts
@@ -107,7 +107,7 @@ Latex inline: $x^2$
 Latex block: $$e=mc^2$$
 `;
 
-export const SUMMARIZE_MODEL = "gpt-3.5-turbo-1106";
+export const SUMMARIZE_MODEL = "gpt-3.5-turbo-0125";
 export const GEMINI_SUMMARIZE_MODEL = "gemini-pro";
 
 export const KnowledgeCutOffDate: Record<string, string> = {
@@ -132,8 +132,8 @@ export const DEFAULT_MODELS = [
   //   available: false,
   // },
   {
-    name: "gpt-3.5-turbo-1106",
-    describe: "GPT-3,最快,笨,最便宜",
+    name: "gpt-3.5-turbo-0125",
+    describe: "GPT-3,最快,效果一般,最便宜",
     available: true,
     provider: {
       id: "openai",
diff --git a/app/masks/cn.ts b/app/masks/cn.ts
index efeecf802..c040ff4ec 100644
--- a/app/masks/cn.ts
+++ b/app/masks/cn.ts
@@ -58,7 +58,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -84,7 +84,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -110,7 +110,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -136,7 +136,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -162,7 +162,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -188,7 +188,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -214,7 +214,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -240,7 +240,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -272,7 +272,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 0.5,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -298,7 +298,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -331,7 +331,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -364,7 +364,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
@@ -422,7 +422,7 @@ export const CN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 1,
       max_tokens: 2000,
       presence_penalty: 0,
diff --git a/app/masks/en.ts b/app/masks/en.ts
index 0a3939de5..d315a5b6a 100644
--- a/app/masks/en.ts
+++ b/app/masks/en.ts
@@ -86,7 +86,7 @@ export const EN_MASKS: BuiltinMask[] = [
       },
     ],
     modelConfig: {
-      model: "gpt-3.5-turbo-1106",
+      model: "gpt-3.5-turbo-0125",
       temperature: 0.5,
       max_tokens: 2000,
       presence_penalty: 0,
diff --git a/app/store/access.ts b/app/store/access.ts
index dd8d45b4a..3e38f695e 100644
--- a/app/store/access.ts
+++ b/app/store/access.ts
@@ -30,6 +30,7 @@ const DEFAULT_ACCESS_STATE = {
   azureUrl: "",
   azureApiKey: "",
   azureApiVersion: "2023-05-15",
+  azureVoiceKey: "",
 
   // google ai studio
   googleUrl: "",
diff --git a/app/store/config.ts b/app/store/config.ts
index 4f32c590d..497f0dd0a 100644
--- a/app/store/config.ts
+++ b/app/store/config.ts
@@ -51,7 +51,7 @@ export const DEFAULT_CONFIG = {
   dontUseModel: DISABLE_MODELS,
 
   modelConfig: {
-    model: "gpt-3.5-turbo-1106" as ModelType,
+    model: "gpt-3.5-turbo-0125" as ModelType,
     temperature: 0.8,
     top_p: 1,
     max_tokens: 2000,
@@ -137,7 +137,7 @@ export const useAppConfig = createPersistStore(
   }),
   {
     name: StoreKey.Config,
-    version: 3.8991,
+    version: 3.8992,
     migrate(persistedState, version) {
       const state = persistedState as ChatConfig;
 
@@ -168,7 +168,7 @@ export const useAppConfig = createPersistStore(
       if (version < 3.8) {
         state.lastUpdate = Date.now();
       }
-      if (version < 3.8991) {
+      if (version < 3.8992) {
         state.lastUpdate = Date.now();
         return { ...DEFAULT_CONFIG };
       }