merge

2025-10-01 15:46:39 +08:00 · 2024-09-19 00:07:37 +08:00 · 2024-09-19 00:07:37 +08:00 · 5d6d676b87
commit 5d6d676b87
parent bd172064b5 a8c70d84a9
52 changed files with 3763 additions and 2647 deletions
--- a/.eslintrc.json
+++ b/.eslintrc.json
@ -1,4 +1,7 @@
 {
  "extends": "next/core-web-vitals",
-  "plugins": ["prettier"]
+  "plugins": ["prettier", "unused-imports"],
+  "rules": {
+    "unused-imports/no-unused-imports": "warn"
+  }
 }
--- a/app/api/[provider]/[...path]/route.ts
+++ b/app/api/[provider]/[...path]/route.ts
@ -1,5 +1,5 @@
 import { ApiPath } from "@/app/constant";
-import { NextRequest, NextResponse } from "next/server";
+import { NextRequest } from "next/server";
 import { handle as openaiHandler } from "../../openai";
 import { handle as azureHandler } from "../../azure";
 import { handle as googleHandler } from "../../google";
--- a/app/api/alibaba.ts
+++ b/app/api/alibaba.ts
@ -1,6 +1,5 @@
 import { getServerSideConfig } from "@/app/config/server";
 import {
-  Alibaba,
  ALIBABA_BASE_URL,
  ApiPath,
  ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
 import { isModelAvailableInServer } from "@/app/utils/model";
-import type { RequestPayload } from "@/app/client/platforms/openai";

 const serverConfig = getServerSideConfig();

--- a/app/api/anthropic.ts
+++ b/app/api/anthropic.ts
@ -3,7 +3,6 @@ import {
  ANTHROPIC_BASE_URL,
  Anthropic,
  ApiPath,
-  DEFAULT_MODELS,
  ServiceProvider,
  ModelProvider,
 } from "@/app/constant";
@ -98,6 +97,7 @@ async function request(req: NextRequest) {
    headers: {
      "Content-Type": "application/json",
      "Cache-Control": "no-store",
+      "anthropic-dangerous-direct-browser-access": "true",
      [authHeaderName]: authValue,
      "anthropic-version":
        req.headers.get("anthropic-version") ||
--- a/app/api/azure.ts
+++ b/app/api/azure.ts
@ -1,4 +1,3 @@
-import { getServerSideConfig } from "@/app/config/server";
 import { ModelProvider } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
--- a/app/api/baidu.ts
+++ b/app/api/baidu.ts
@ -3,7 +3,6 @@ import {
  BAIDU_BASE_URL,
  ApiPath,
  ModelProvider,
-  BAIDU_OATUH_URL,
  ServiceProvider,
 } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";
--- a/app/api/common.ts
+++ b/app/api/common.ts
@ -1,11 +1,6 @@
 import { NextRequest, NextResponse } from "next/server";
 import { getServerSideConfig } from "../config/server";
-import {
-  DEFAULT_MODELS,
-  OPENAI_BASE_URL,
-  GEMINI_BASE_URL,
-  ServiceProvider,
-} from "../constant";
+import { OPENAI_BASE_URL, ServiceProvider } from "../constant";

 // import { makeAzurePath } from "../azure";
 import { getIP } from "@/app/api/auth";
--- a/app/api/google.ts
+++ b/app/api/google.ts
@ -1,12 +1,7 @@
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "./auth";
 import { getServerSideConfig } from "@/app/config/server";
-import {
-  ApiPath,
-  GEMINI_BASE_URL,
-  Google,
-  ModelProvider,
-} from "@/app/constant";
+import { ApiPath, GEMINI_BASE_URL, ModelProvider } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";

 const serverConfig = getServerSideConfig();
--- a/app/api/iflytek.ts
+++ b/app/api/iflytek.ts
@ -1,6 +1,5 @@
 import { getServerSideConfig } from "@/app/config/server";
 import {
-  Iflytek,
  IFLYTEK_BASE_URL,
  ApiPath,
  ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
 import { isModelAvailableInServer } from "@/app/utils/model";
-import type { RequestPayload } from "@/app/client/platforms/openai";
 // iflytek

 const serverConfig = getServerSideConfig();
--- a/app/api/moonshot.ts
+++ b/app/api/moonshot.ts
@ -1,6 +1,5 @@
 import { getServerSideConfig } from "@/app/config/server";
 import {
-  Moonshot,
  MOONSHOT_BASE_URL,
  ApiPath,
  ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
 import { isModelAvailableInServer } from "@/app/utils/model";
-import type { RequestPayload } from "@/app/client/platforms/openai";

 const serverConfig = getServerSideConfig();

--- a/app/api/tencent/route.ts
+++ b/app/api/tencent/route.ts
@ -1,15 +1,8 @@
 import { getServerSideConfig } from "@/app/config/server";
-import {
-  TENCENT_BASE_URL,
-  ApiPath,
-  ModelProvider,
-  ServiceProvider,
-  Tencent,
-} from "@/app/constant";
+import { TENCENT_BASE_URL, ModelProvider } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
-import { isModelAvailableInServer } from "@/app/utils/model";
 import { getHeader } from "@/app/utils/tencent";

 const serverConfig = getServerSideConfig();
--- a/app/client/api.ts
+++ b/app/client/api.ts
@ -1,7 +1,6 @@
 import { getClientConfig } from "../config/client";
 import {
  ACCESS_CODE_PREFIX,
-  Azure,
  // AZURE_MODELS,
  ModelProvider,
  ServiceProvider,
@ -26,13 +25,8 @@ import { SparkApi } from "./platforms/iflytek";
 export const ROLES = ["system", "user", "assistant"] as const;
 export type MessageRole = (typeof ROLES)[number];

-export const Models = [
-  "gpt-3.5-turbo-16k",
-  "gpt-4-0613",
-  "gpt-4-32k",
-  "midjourney",
-  "emini-pro",
-] as const;
+export const Models = ["gpt-3.5-turbo", "gpt-4", "midjourney"] as const;
+export const TTSModels = ["tts-1", "tts-1-hd"] as const;
 export type ChatModel = ModelType;

 export interface MultimodalContent {
@ -61,6 +55,15 @@ export interface LLMConfig {
  style?: DalleRequestPayload["style"];
 }

+export interface SpeechOptions {
+  model: string;
+  input: string;
+  voice: string;
+  response_format?: string;
+  speed?: number;
+  onController?: (controller: AbortController) => void;
+}
+
 export interface ChatOptions {
  messages: RequestMessage[];
  config: LLMConfig;
@ -96,6 +99,7 @@ export interface LLMModelProvider {

 export abstract class LLMApi {
  abstract chat(options: ChatOptions): Promise<void>;
+  abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
  abstract usage(): Promise<LLMUsage>;
  abstract models(): Promise<LLMModel[]>;
 }
@ -214,13 +218,16 @@ export function validString(x: string): boolean {
  return x?.length > 0;
 }

-export function getHeaders() {
+export function getHeaders(ignoreHeaders: boolean = false) {
  const accessStore = useAccessStore.getState();
  const chatStore = useChatStore.getState();
-  const headers: Record<string, string> = {
-    "Content-Type": "application/json",
-    Accept: "application/json",
-  };
+  let headers: Record<string, string> = {};
+  if (!ignoreHeaders) {
+    headers = {
+      "Content-Type": "application/json",
+      Accept: "application/json",
+    };
+  }

  const clientConfig = getClientConfig();

--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@ -12,6 +12,7 @@ import {
  getHeaders,
  LLMApi,
  LLMModel,
+  SpeechOptions,
  MultimodalContent,
 } from "../api";
 import Locale from "../../locales";
@ -83,6 +84,10 @@ export class QwenApi implements LLMApi {
    return res?.output?.choices?.at(0)?.message?.content ?? "";
  }

+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
      role: v.role,
--- a/app/client/platforms/anthropic.ts
+++ b/app/client/platforms/anthropic.ts
@ -1,5 +1,5 @@
-import { ACCESS_CODE_PREFIX, Anthropic, ApiPath } from "@/app/constant";
-import { ChatOptions, getHeaders, LLMApi, MultimodalContent } from "../api";
+import { Anthropic, ApiPath } from "@/app/constant";
+import { ChatOptions, getHeaders, LLMApi, SpeechOptions } from "../api";
 import {
  useAccessStore,
  useAppConfig,
@ -9,13 +9,6 @@ import {
 } from "@/app/store";
 import { getClientConfig } from "@/app/config/client";
 import { DEFAULT_API_HOST } from "@/app/constant";
-import {
-  EventStreamContentType,
-  fetchEventSource,
-} from "@fortaine/fetch-event-source";
-
-import Locale from "../../locales";
-import { prettyObject } from "@/app/utils/format";
 import { getMessageTextContent, isVisionModel } from "@/app/utils";
 import { preProcessImageContent, stream } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
@ -80,6 +73,10 @@ const ClaudeMapper = {
 const keys = ["claude-2, claude-instant-1"];

 export class ClaudeApi implements LLMApi {
+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  extractMessage(res: any) {
    console.log("[Response] claude response: ", res);

--- a/app/client/platforms/baidu.ts
+++ b/app/client/platforms/baidu.ts
@ -14,6 +14,7 @@ import {
  LLMApi,
  LLMModel,
  MultimodalContent,
+  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@ -75,6 +76,10 @@ export class ErnieApi implements LLMApi {
    return [baseUrl, path].join("/");
  }

+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
      // "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function",
--- a/app/client/platforms/bytedance.ts
+++ b/app/client/platforms/bytedance.ts
@ -13,6 +13,7 @@ import {
  LLMApi,
  LLMModel,
  MultimodalContent,
+  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@ -77,6 +78,10 @@ export class DoubaoApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? "";
  }

+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
      role: v.role,
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@ -1,5 +1,12 @@
 import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant";
-import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
+import {
+  ChatOptions,
+  getHeaders,
+  LLMApi,
+  LLMModel,
+  LLMUsage,
+  SpeechOptions,
+} from "../api";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
 import { getClientConfig } from "@/app/config/client";
 import { DEFAULT_API_HOST } from "@/app/constant";
@ -56,6 +63,10 @@ export class GeminiProApi implements LLMApi {
      ""
    );
  }
+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  async chat(options: ChatOptions): Promise<void> {
    const apiClient = this;
    let multimodal = false;
--- a/app/client/platforms/iflytek.ts
+++ b/app/client/platforms/iflytek.ts
@ -7,7 +7,13 @@ import {
 } from "@/app/constant";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";

-import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api";
+import {
+  ChatOptions,
+  getHeaders,
+  LLMApi,
+  LLMModel,
+  SpeechOptions,
+} from "../api";
 import Locale from "../../locales";
 import {
  EventStreamContentType,
@ -17,7 +23,7 @@ import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";

-import { OpenAIListModelResponse, RequestPayload } from "./openai";
+import { RequestPayload } from "./openai";

 export class SparkApi implements LLMApi {
  private disableListModels = true;
@ -53,6 +59,10 @@ export class SparkApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? "";
  }

+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
    for (const v of options.messages) {
--- a/app/client/platforms/moonshot.ts
+++ b/app/client/platforms/moonshot.ts
@ -3,10 +3,8 @@
 import {
  ApiPath,
  DEFAULT_API_HOST,
-  DEFAULT_MODELS,
  Moonshot,
  REQUEST_TIMEOUT_MS,
-  ServiceProvider,
 } from "@/app/constant";
 import {
  useAccessStore,
@ -15,28 +13,17 @@ import {
  ChatMessageTool,
  usePluginStore,
 } from "@/app/store";
-import { collectModelsWithDefaultModel } from "@/app/utils/model";
-import { preProcessImageContent, stream } from "@/app/utils/chat";
-import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
-
+import { stream } from "@/app/utils/chat";
 import {
  ChatOptions,
  getHeaders,
  LLMApi,
  LLMModel,
-  LLMUsage,
-  MultimodalContent,
+  SpeechOptions,
 } from "../api";
-import Locale from "../../locales";
-import {
-  EventStreamContentType,
-  fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
-
-import { OpenAIListModelResponse, RequestPayload } from "./openai";
+import { RequestPayload } from "./openai";

 export class MoonshotApi implements LLMApi {
  private disableListModels = true;
@ -72,6 +59,10 @@ export class MoonshotApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? "";
  }

+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
    for (const v of options.messages) {
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@ -34,17 +34,12 @@ import {
  LLMModel,
  LLMUsage,
  MultimodalContent,
+  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
-import {
-  EventStreamContentType,
-  fetchEventSource,
-} from "@fortaine/fetch-event-source";
-import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import {
  getMessageTextContent,
-  getMessageImages,
  isVisionModel,
  isDalle3 as _isDalle3,
 } from "@/app/utils";
@ -148,6 +143,44 @@ export class ChatGPTApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? res;
  }

+  async speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    const requestPayload = {
+      model: options.model,
+      input: options.input,
+      voice: options.voice,
+      response_format: options.response_format,
+      speed: options.speed,
+    };
+
+    console.log("[Request] openai speech payload: ", requestPayload);
+
+    const controller = new AbortController();
+    options.onController?.(controller);
+
+    try {
+      const speechPath = this.path(OpenaiPath.SpeechPath);
+      const speechPayload = {
+        method: "POST",
+        body: JSON.stringify(requestPayload),
+        signal: controller.signal,
+        headers: getHeaders(),
+      };
+
+      // make a fetch request
+      const requestTimeoutId = setTimeout(
+        () => controller.abort(),
+        REQUEST_TIMEOUT_MS,
+      );
+
+      const res = await fetch(speechPath, speechPayload);
+      clearTimeout(requestTimeoutId);
+      return await res.arrayBuffer();
+    } catch (e) {
+      console.log("[Request] failed to make a speech request", e);
+      throw e;
+    }
+  }
+
  async chat(options: ChatOptions) {
    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
--- a/app/client/platforms/tencent.ts
+++ b/app/client/platforms/tencent.ts
@ -8,6 +8,7 @@ import {
  LLMApi,
  LLMModel,
  MultimodalContent,
+  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@ -89,6 +90,10 @@ export class HunyuanApi implements LLMApi {
    return res.Choices?.at(0)?.Message?.Content ?? "";
  }

+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
+
  async chat(options: ChatOptions) {
    const visionModel = isVisionModel(options.config.model);
    const messages = options.messages.map((v, index) => ({
--- a/app/command.ts
+++ b/app/command.ts
@ -38,6 +38,7 @@ interface ChatCommands {
  next?: Command;
  prev?: Command;
  clear?: Command;
+  fork?: Command;
  del?: Command;
 }

--- a/app/components/artifacts.tsx
+++ b/app/components/artifacts.tsx
@ -7,7 +7,6 @@ import {
  useImperativeHandle,
 } from "react";
 import { useParams } from "react-router";
-import { useWindowSize } from "@/app/utils";
 import { IconButton } from "./button";
 import { nanoid } from "nanoid";
 import ExportIcon from "../icons/share.svg";
--- a/app/components/chat-list.tsx
+++ b/app/components/chat-list.tsx
@ -1,5 +1,4 @@
 import DeleteIcon from "../icons/delete.svg";
-import BotIcon from "../icons/bot.svg";

 import styles from "./home.module.scss";
 import {
@ -12,7 +11,7 @@ import {
 import { useChatStore } from "../store";

 import Locale from "../locales";
-import { Link, useLocation, useNavigate } from "react-router-dom";
+import { useLocation, useNavigate } from "react-router-dom";
 import { Path } from "../constant";
 import { MaskAvatar } from "./mask";
 import { Mask } from "../store/mask";
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@ -15,15 +15,14 @@ import RenameIcon from "../icons/rename.svg";
 import ExportIcon from "../icons/share.svg";
 import ReturnIcon from "../icons/return.svg";
 import CopyIcon from "../icons/copy.svg";
+import SpeakIcon from "../icons/speak.svg";
+import SpeakStopIcon from "../icons/speak-stop.svg";
 import LoadingIcon from "../icons/three-dots.svg";
 import LoadingButtonIcon from "../icons/loading.svg";
-import PromptIcon from "../icons/prompt.svg";
-import MaskIcon from "../icons/mask.svg";
 import MaxIcon from "../icons/max.svg";
 import MinIcon from "../icons/min.svg";
 import ResetIcon from "../icons/reload.svg";
 import BreakIcon from "../icons/break.svg";
-import SettingsIcon from "../icons/chat-settings.svg";
 import DeleteIcon from "../icons/clear.svg";
 import PinIcon from "../icons/pin.svg";
 import EditIcon from "../icons/rename.svg";
@ -43,6 +42,7 @@ import QualityIcon from "../icons/hd.svg";
 import StyleIcon from "../icons/palette.svg";
 import PluginIcon from "../icons/plugin.svg";
 import ShortcutkeyIcon from "../icons/shortcutkey.svg";
+import ReloadIcon from "../icons/reload.svg";
 // import UploadIcon from "../icons/upload.svg";

 import {
@ -57,12 +57,10 @@ import {
  DEFAULT_TOPIC,
  ModelType,
  usePluginStore,
-  ChatSession,
 } from "../store";

 import {
  copyToClipboard,
-  downloadAs,
  selectOrCopy,
  autoGrowTextArea,
  useMobileScreen,
@ -99,7 +97,8 @@ import {
 import { useNavigate } from "react-router-dom";
 import {
  CHAT_PAGE_SIZE,
-  LAST_INPUT_KEY,
+  DEFAULT_TTS_ENGINE,
+  ModelProvider,
  Path,
  REQUEST_TIMEOUT_MS,
  UNFINISHED_INPUT,
@ -118,12 +117,16 @@ import { MultimodalContent } from "../client/api";
 // import { getTokenLength } from "@/lib/utils";
 import VoiceInput from "./voice-input";
 import { Progress, Tooltip } from "antd";
-import { white } from "kleur/colors";
 // import GptPrompts from "./gpt-prompts";
 // const VoiceInput = dynamic(
 //     () => import('@/app/components/voice-input'), { ssr: false });

 const localStorage = safeLocalStorage();
+import { ClientApi } from "../client/api";
+import { createTTSPlayer } from "../utils/audio";
+import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
+
+const ttsPlayer = createTTSPlayer();

 const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
  loading: () => <LoadingIcon />,
@ -465,6 +468,7 @@ export function ChatActions(props: {
  hitBottom: boolean;
  uploading: boolean;
  setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>;
+  setUserInput: (input: string) => void;
 }) {
  const config = useAppConfig();
  const navigate = useNavigate();
@ -543,8 +547,8 @@ export function ChatActions(props: {

    // if current model is not available
    // switch to first available model
-    const isUnavaliableModel = !models.some((m) => m.name === currentModel);
-    if (isUnavaliableModel && models.length > 0) {
+    const isUnavailableModel = !models.some((m) => m.name === currentModel);
+    if (isUnavailableModel && models.length > 0) {
      // show next model to default model if exist
      let nextModel = models.find((model) => model.isDefault) || models[0];
      chatStore.updateCurrentSession((session) => {
@ -1008,6 +1012,7 @@ function _Chat() {
      chatStore.updateCurrentSession(
        (session) => (session.clearContextIndex = session.messages.length),
      ),
+    fork: () => chatStore.forkSession(),
    del: () => chatStore.deleteSession(chatStore.currentSessionIndex),
  });

@ -1215,10 +1220,55 @@ function _Chat() {
    });
  };

+  const accessStore = useAccessStore();
+  const [speechStatus, setSpeechStatus] = useState(false);
+  const [speechLoading, setSpeechLoading] = useState(false);
+  async function openaiSpeech(text: string) {
+    if (speechStatus) {
+      ttsPlayer.stop();
+      setSpeechStatus(false);
+    } else {
+      var api: ClientApi;
+      api = new ClientApi(ModelProvider.GPT);
+      const config = useAppConfig.getState();
+      setSpeechLoading(true);
+      ttsPlayer.init();
+      let audioBuffer: ArrayBuffer;
+      const { markdownToTxt } = require("markdown-to-txt");
+      const textContent = markdownToTxt(text);
+      if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) {
+        const edgeVoiceName = accessStore.edgeVoiceName();
+        const tts = new MsEdgeTTS();
+        await tts.setMetadata(
+          edgeVoiceName,
+          OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
+        );
+        audioBuffer = await tts.toArrayBuffer(textContent);
+      } else {
+        audioBuffer = await api.llm.speech({
+          model: config.ttsConfig.model,
+          input: textContent,
+          voice: config.ttsConfig.voice,
+          speed: config.ttsConfig.speed,
+        });
+      }
+      setSpeechStatus(true);
+      ttsPlayer
+        .play(audioBuffer, () => {
+          setSpeechStatus(false);
+        })
+        .catch((e) => {
+          console.error("[OpenAI Speech]", e);
+          showToast(prettyObject(e));
+          setSpeechStatus(false);
+        })
+        .finally(() => setSpeechLoading(false));
+    }
+  }
+
  const context: RenderMessage[] = useMemo(() => {
    return session.mask.hideContext ? [] : session.mask.context.slice();
  }, [session.mask.context, session.mask.hideContext]);
-  const accessStore = useAccessStore();

  if (
    context.length === 0 &&
@ -1637,6 +1687,17 @@ function _Chat() {
          </div>
        </div>
        <div className="window-actions">
+          <div className="window-action-button">
+            <IconButton
+              icon={<ReloadIcon />}
+              bordered
+              title={Locale.Chat.Actions.RefreshTitle}
+              onClick={() => {
+                showToast(Locale.Chat.Actions.RefreshToast);
+                chatStore.summarizeSession(true);
+              }}
+            />
+          </div>
          {!isMobileScreen && (
            <div className="window-action-button">
              <IconButton
@ -1776,6 +1837,68 @@ function _Chat() {
                        ? Locale.Chat.IsContext
                        : message.date.toLocaleString()}
                    </div>
+
+                    {showActions && (
+                      <div className={styles["chat-message-actions"]}>
+                        <div className={styles["chat-input-actions"]}>
+                          {message.streaming ? (
+                            <ChatAction
+                              text={Locale.Chat.Actions.Stop}
+                              icon={<StopIcon />}
+                              onClick={() => onUserStop(message.id ?? i)}
+                            />
+                          ) : (
+                            <>
+                              <ChatAction
+                                text={Locale.Chat.Actions.Retry}
+                                icon={<ResetIcon />}
+                                onClick={() => onResend(message)}
+                              />
+
+                              <ChatAction
+                                text={Locale.Chat.Actions.Delete}
+                                icon={<DeleteIcon />}
+                                onClick={() => onDelete(message.id ?? i)}
+                              />
+
+                              <ChatAction
+                                text={Locale.Chat.Actions.Pin}
+                                icon={<PinIcon />}
+                                onClick={() => onPinMessage(message)}
+                              />
+                              <ChatAction
+                                text={Locale.Chat.Actions.Copy}
+                                icon={<CopyIcon />}
+                                onClick={() =>
+                                  copyToClipboard(
+                                    getMessageTextContent(message),
+                                  )
+                                }
+                              />
+                              {config.ttsConfig.enable && (
+                                <ChatAction
+                                  text={
+                                    speechStatus
+                                      ? Locale.Chat.Actions.StopSpeech
+                                      : Locale.Chat.Actions.Speech
+                                  }
+                                  icon={
+                                    speechStatus ? (
+                                      <SpeakStopIcon />
+                                    ) : (
+                                      <SpeakIcon />
+                                    )
+                                  }
+                                  onClick={() =>
+                                    openaiSpeech(getMessageTextContent(message))
+                                  }
+                                />
+                              )}
+                            </>
+                          )}
+                        </div>
+                      </div>
+                    )}
                  </div>
                  {message?.tools?.length == 0 && showTyping && (
                    <div className={styles["chat-message-status"]}>
@ -2027,6 +2150,7 @@ function _Chat() {
            onSearch("");
          }}
          setShowShortcutKeyModal={setShowShortcutKeyModal}
+          setUserInput={setUserInput}
        />
        <label
          className={`${styles["chat-input-panel-inner"]} ${
--- a/app/components/exporter.tsx
+++ b/app/components/exporter.tsx
@ -1,5 +1,5 @@
 /* eslint-disable @next/next/no-img-element */
-import { ChatMessage, ModelType, useAppConfig, useChatStore } from "../store";
+import { ChatMessage, useAppConfig, useChatStore } from "../store";
 import Locale from "../locales";
 import styles from "./exporter.module.scss";
 import {
@ -276,7 +276,7 @@ export function RenderExport(props: {
      return {
        id: i.toString(),
        role: role as any,
-        content: role === "user" ? v.textContent ?? "" : v.innerHTML,
+        content: role === "user" ? (v.textContent ?? "") : v.innerHTML,
        date: "",
      };
    });
--- a/app/components/mask.tsx
+++ b/app/components/mask.tsx
@ -37,7 +37,7 @@ import Locale, { AllLangs, ALL_LANG_OPTIONS, Lang } from "../locales";
 import { useNavigate } from "react-router-dom";

 import chatStyle from "./chat.module.scss";
-import { useEffect, useState } from "react";
+import { useState } from "react";
 import {
  copyToClipboard,
  downloadAs,
@ -48,7 +48,6 @@ import { Updater } from "../typing";
 import { ModelConfigList } from "./model-config";
 import { FileName, Path } from "../constant";
 import { BUILTIN_MASK_STORE } from "../masks";
-import { nanoid } from "nanoid";
 import {
  DragDropContext,
  Droppable,
--- a/app/components/plugin.tsx
+++ b/app/components/plugin.tsx
@ -28,7 +28,7 @@ import {
 } from "./ui-lib";
 import Locale from "../locales";
 import { useNavigate } from "react-router-dom";
-import { useEffect, useState } from "react";
+import { useState } from "react";
 import { getClientConfig } from "../config/client";

 export function PluginPage() {
--- a/app/components/settings.tsx
+++ b/app/components/settings.tsx
@ -2,7 +2,6 @@ import { useState, useEffect, useMemo } from "react";

 import styles from "./settings.module.scss";

-import ResetIcon from "../icons/reload.svg";
 import AddIcon from "../icons/add.svg";
 import CloseIcon from "../icons/close.svg";
 import CopyIcon from "../icons/copy.svg";
@ -12,7 +11,6 @@ import EditIcon from "../icons/edit.svg";
 import EyeIcon from "../icons/eye.svg";
 import DownloadIcon from "../icons/download.svg";
 import UploadIcon from "../icons/upload.svg";
-import ConfigIcon from "../icons/config.svg";
 import ConfirmIcon from "../icons/confirm.svg";

 import ConnectionIcon from "../icons/connection.svg";
@ -28,13 +26,11 @@ import {
  Popover,
  Select,
  showConfirm,
-  showToast,
 } from "./ui-lib";
 import { ModelConfigList } from "./model-config";

 import { IconButton } from "./button";
 import {
-  SubmitKey,
  useChatStore,
  Theme,
  useUpdateStore,
@ -49,7 +45,6 @@ import Locale, {
  getLang,
 } from "../locales";
 import { copyToClipboard } from "../utils";
-import Link from "next/link";
 import {
  Anthropic,
  Azure,
@ -65,7 +60,6 @@ import {
  RELEASE_URL,
  STORAGE_KEY,
  ServiceProvider,
-  SlotID,
  UPDATE_URL,
  Stability,
  Iflytek,
@ -80,6 +74,7 @@ import { useSyncStore } from "../store/sync";
 import { nanoid } from "nanoid";
 import { useMaskStore } from "../store/mask";
 import { ProviderType } from "../utils/cloud";
+import { TTSConfigList } from "./tts-config";

 function EditPromptModal(props: { id: string; onClose: () => void }) {
  const promptStore = usePromptStore();
@ -1646,6 +1641,17 @@ export function Settings() {
          <UserPromptModal onClose={() => setShowPromptModal(false)} />
        )}

+        <List>
+          <TTSConfigList
+            ttsConfig={config.ttsConfig}
+            updateConfig={(updater) => {
+              const ttsConfig = { ...config.ttsConfig };
+              updater(ttsConfig);
+              config.update((config) => (config.ttsConfig = ttsConfig));
+            }}
+          />
+        </List>
+
        <DangerItems />
      </div>
    </ErrorBoundary>
--- a/app/components/sidebar.tsx
+++ b/app/components/sidebar.tsx
@ -6,11 +6,9 @@ import { IconButton } from "./button";
 import SettingsIcon from "../icons/settings.svg";
 import ChatGptIcon from "../icons/chatgpt.svg";
 import AddIcon from "../icons/add.svg";
-import CloseIcon from "../icons/close.svg";
-import DeleteIcon from "../icons/clear.svg";
+import DeleteIcon from "../icons/delete.svg";
 // import MaskIcon from "../icons/mask.svg";
 import CoffeeIcon from "../icons/coffee.svg";
-import MaskIcon from "../icons/mask.svg";
 import DragIcon from "../icons/drag.svg";
 import DiscoveryIcon from "../icons/discovery.svg";

@ -26,7 +24,6 @@ import {
  NARROW_SIDEBAR_WIDTH,
  Path,
  PLUGINS,
-  REPO_URL,
  ServiceProvider,
 } from "../constant";

@ -324,21 +321,14 @@ export function SideBar(props: { className?: string }) {
        </div>
        {showPluginSelector && (
          <Selector
-            items={
-              [
-                {
-                  title: "👇 Please select the plugin you need to use",
-                  value: "-",
-                  disable: true,
-                },
-                ...PLUGINS.map((item) => {
-                  return {
-                    title: item.name,
-                    value: item.path,
-                  };
-                }),
-              ] as any
-            }
+            items={[
+              ...PLUGINS.map((item) => {
+                return {
+                  title: item.name,
+                  value: item.path,
+                };
+              }),
+            ]}
            onClose={() => setShowPluginSelector(false)}
            onSelection={(s) => {
              navigate(s[0] as any, { state: { fromHome: true } });
--- a/app/components/tts-config.tsx
+++ b/app/components/tts-config.tsx
@ -0,0 +1,133 @@
+import { TTSConfig, TTSConfigValidator } from "../store";
+
+import Locale from "../locales";
+import { ListItem, Select } from "./ui-lib";
+import {
+  DEFAULT_TTS_ENGINE,
+  DEFAULT_TTS_ENGINES,
+  DEFAULT_TTS_MODELS,
+  DEFAULT_TTS_VOICES,
+} from "../constant";
+import { InputRange } from "./input-range";
+
+export function TTSConfigList(props: {
+  ttsConfig: TTSConfig;
+  updateConfig: (updater: (config: TTSConfig) => void) => void;
+}) {
+  return (
+    <>
+      <ListItem
+        title={Locale.Settings.TTS.Enable.Title}
+        subTitle={Locale.Settings.TTS.Enable.SubTitle}
+      >
+        <input
+          type="checkbox"
+          checked={props.ttsConfig.enable}
+          onChange={(e) =>
+            props.updateConfig(
+              (config) => (config.enable = e.currentTarget.checked),
+            )
+          }
+        ></input>
+      </ListItem>
+      {/* <ListItem
+        title={Locale.Settings.TTS.Autoplay.Title}
+        subTitle={Locale.Settings.TTS.Autoplay.SubTitle}
+      >
+        <input
+          type="checkbox"
+          checked={props.ttsConfig.autoplay}
+          onChange={(e) =>
+            props.updateConfig(
+              (config) => (config.autoplay = e.currentTarget.checked),
+            )
+          }
+        ></input>
+      </ListItem> */}
+      <ListItem title={Locale.Settings.TTS.Engine}>
+        <Select
+          value={props.ttsConfig.engine}
+          onChange={(e) => {
+            props.updateConfig(
+              (config) =>
+                (config.engine = TTSConfigValidator.engine(
+                  e.currentTarget.value,
+                )),
+            );
+          }}
+        >
+          {DEFAULT_TTS_ENGINES.map((v, i) => (
+            <option value={v} key={i}>
+              {v}
+            </option>
+          ))}
+        </Select>
+      </ListItem>
+      {props.ttsConfig.engine === DEFAULT_TTS_ENGINE && (
+        <>
+          <ListItem title={Locale.Settings.TTS.Model}>
+            <Select
+              value={props.ttsConfig.model}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) =>
+                    (config.model = TTSConfigValidator.model(
+                      e.currentTarget.value,
+                    )),
+                );
+              }}
+            >
+              {DEFAULT_TTS_MODELS.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.TTS.Voice.Title}
+            subTitle={Locale.Settings.TTS.Voice.SubTitle}
+          >
+            <Select
+              value={props.ttsConfig.voice}
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) =>
+                    (config.voice = TTSConfigValidator.voice(
+                      e.currentTarget.value,
+                    )),
+                );
+              }}
+            >
+              {DEFAULT_TTS_VOICES.map((v, i) => (
+                <option value={v} key={i}>
+                  {v}
+                </option>
+              ))}
+            </Select>
+          </ListItem>
+          <ListItem
+            title={Locale.Settings.TTS.Speed.Title}
+            subTitle={Locale.Settings.TTS.Speed.SubTitle}
+          >
+            <InputRange
+              aria={Locale.Settings.TTS.Speed.Title}
+              value={props.ttsConfig.speed?.toFixed(1)}
+              min="0.3"
+              max="4.0"
+              step="0.1"
+              onChange={(e) => {
+                props.updateConfig(
+                  (config) =>
+                    (config.speed = TTSConfigValidator.speed(
+                      e.currentTarget.valueAsNumber,
+                    )),
+                );
+              }}
+            ></InputRange>
+          </ListItem>
+        </>
+      )}
+    </>
+  );
+}
--- a/app/components/tts.module.scss
+++ b/app/components/tts.module.scss
@ -0,0 +1,119 @@
+@import "../styles/animation.scss";
+.plugin-page {
+  height: 100%;
+  display: flex;
+  flex-direction: column;
+
+  .plugin-page-body {
+    padding: 20px;
+    overflow-y: auto;
+
+    .plugin-filter {
+      width: 100%;
+      max-width: 100%;
+      margin-bottom: 20px;
+      animation: slide-in ease 0.3s;
+      height: 40px;
+
+      display: flex;
+
+      .search-bar {
+        flex-grow: 1;
+        max-width: 100%;
+        min-width: 0;
+        outline: none;
+      }
+
+      .search-bar:focus {
+        border: 1px solid var(--primary);
+      }
+
+      .plugin-filter-lang {
+        height: 100%;
+        margin-left: 10px;
+      }
+
+      .plugin-create {
+        height: 100%;
+        margin-left: 10px;
+        box-sizing: border-box;
+        min-width: 80px;
+      }
+    }
+
+    .plugin-item {
+      display: flex;
+      justify-content: space-between;
+      padding: 20px;
+      border: var(--border-in-light);
+      animation: slide-in ease 0.3s;
+
+      &:not(:last-child) {
+        border-bottom: 0;
+      }
+
+      &:first-child {
+        border-top-left-radius: 10px;
+        border-top-right-radius: 10px;
+      }
+
+      &:last-child {
+        border-bottom-left-radius: 10px;
+        border-bottom-right-radius: 10px;
+      }
+
+      .plugin-header {
+        display: flex;
+        align-items: center;
+
+        .plugin-icon {
+          display: flex;
+          align-items: center;
+          justify-content: center;
+          margin-right: 10px;
+        }
+
+        .plugin-title {
+          .plugin-name {
+            font-size: 14px;
+            font-weight: bold;
+          }
+          .plugin-info {
+            font-size: 12px;
+          }
+          .plugin-runtime-warning {
+            font-size: 12px;
+            color: #f86c6c;
+          }
+        }
+      }
+
+      .plugin-actions {
+        display: flex;
+        flex-wrap: nowrap;
+        transition: all ease 0.3s;
+        justify-content: center;
+        align-items: center;
+      }
+
+      @media screen and (max-width: 600px) {
+        display: flex;
+        flex-direction: column;
+        padding-bottom: 10px;
+        border-radius: 10px;
+        margin-bottom: 20px;
+        box-shadow: var(--card-shadow);
+
+        &:not(:last-child) {
+          border-bottom: var(--border-in-light);
+        }
+
+        .plugin-actions {
+          width: 100%;
+          justify-content: space-between;
+          padding-top: 10px;
+        }
+      }
+    }
+  }
+}
--- a/app/constant.ts
+++ b/app/constant.ts
@ -1,5 +1,3 @@
-import path from "path";
-
 export const OWNER = "ChatGPTNextWeb";
 export const REPO = "ChatGPT-Next-Web";
 export const REPO_URL = `https://github.com/${OWNER}/${REPO}`;
@ -153,6 +151,7 @@ export const Anthropic = {

 export const OpenaiPath = {
  ChatPath: "v1/chat/completions",
+  SpeechPath: "v1/audio/speech",
  ImagePath: "v1/images/generations",
  UsagePath: "dashboard/billing/usage",
  SubsPath: "dashboard/billing/subscription",
@ -260,6 +259,20 @@ export const KnowledgeCutOffDate: Record<string, string> = {
  "gemini-pro-vision": "2023-12",
 };

+export const DEFAULT_TTS_ENGINE = "OpenAI-TTS";
+export const DEFAULT_TTS_ENGINES = ["OpenAI-TTS", "Edge-TTS"];
+export const DEFAULT_TTS_MODEL = "tts-1";
+export const DEFAULT_TTS_VOICE = "alloy";
+export const DEFAULT_TTS_MODELS = ["tts-1", "tts-1-hd"];
+export const DEFAULT_TTS_VOICES = [
+  "alloy",
+  "echo",
+  "fable",
+  "onyx",
+  "nova",
+  "shimmer",
+];
+
 const openaiModels = [
  "gpt-3.5-turbo",
  "gpt-3.5-turbo-1106",
--- a/app/icons/speak-stop.svg
+++ b/app/icons/speak-stop.svg
@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M17.25 9.75 19.5 12m0 0 2.25 2.25M19.5 12l2.25-2.25M19.5 12l-2.25 2.25m-10.5-6 4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z"></path></svg>
--- a/app/icons/speak.svg
+++ b/app/icons/speak.svg
@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M19.114 5.636a9 9 0 010 12.728M16.463 8.288a5.25 5.25 0 010 7.424M6.75 8.25l4.72-4.72a.75.75 0 011.28.53v15.88a.75.75 0 01-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.01 9.01 0 012.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75z"></path></svg>
--- a/app/icons/voice-white.svg
+++ b/app/icons/voice-white.svg
@ -0,0 +1,16 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="16" height="16" fill="none" viewBox="0 0 20 20">
+	<defs>
+		<rect id="path_0" width="20" height="20" x="0" y="0" />
+	</defs>
+	<g opacity="1" transform="translate(0 0) rotate(0 8 8)">
+		<mask id="bg-mask-0" fill="#fff">
+			<use xlink:href="#path_0" />
+		</mask>
+		<g mask="url(#bg-mask-0)">
+			<path d="M7 4a3 3 0 016 0v6a3 3 0 11-6 0V4z" fill="#333333">
+			</path>
+			<path d="M5.5 9.643a.75.75 0 00-1.5 0V10c0 3.06 2.29 5.585 5.25 5.954V17.5h-1.5a.75.75 0 000 1.5h4.5a.75.75 0 000-1.5h-1.5v-1.546A6.001 6.001 0 0016 10v-.357a.75.75 0 00-1.5 0V10a4.5 4.5 0 01-9 0v-.357z" fill="#333333">
+			</path>
+		</g>
+	</g>
+</svg>
--- a/app/locales/cn.ts
+++ b/app/locales/cn.ts
@ -1,4 +1,3 @@
-import { ShortcutKeyModal } from "../components/chat";
 import { getClientConfig } from "../config/client";
 import { SubmitKey } from "../store/config";

@ -89,6 +88,10 @@ const cn = {
      Delete: "删除",
      Edit: "编辑",
      FullScreen: "全屏",
+      RefreshTitle: "刷新标题",
+      RefreshToast: "已发送刷新标题请求",
+      Speech: "朗读",
+      StopSpeech: "停止",
    },
    Commands: {
      new: "新建聊天",
@ -96,6 +99,7 @@ const cn = {
      next: "下一个聊天",
      prev: "上一个聊天",
      clear: "清除上下文",
+      fork: "复制聊天",
      del: "删除聊天",
    },
    InputActions: {
@ -122,6 +126,8 @@ const cn = {
      return inputHints + "，: 快捷功能";
    },
    Send: "发送",
+    StartSpeak: "说话",
+    StopSpeak: "停止",
    Config: {
      Reset: "清除记忆",
      SaveAs: "存为面具",
@ -539,6 +545,26 @@ const cn = {
      Title: "频率惩罚度 (frequency_penalty)",
      SubTitle: "值越大，越有可能降低重复字词",
    },
+    TTS: {
+      Enable: {
+        Title: "启用文本转语音",
+        SubTitle: "启用文本生成语音服务",
+      },
+      Autoplay: {
+        Title: "启用自动朗读",
+        SubTitle: "自动生成语音并播放，需先开启文本转语音开关",
+      },
+      Model: "模型",
+      Engine: "转换引擎",
+      Voice: {
+        Title: "声音",
+        SubTitle: "生成语音时使用的声音",
+      },
+      Speed: {
+        Title: "速度",
+        SubTitle: "生成语音的速度",
+      },
+    },
  },
  Store: {
    DefaultTopic: "新的聊天",
--- a/app/locales/en.ts
+++ b/app/locales/en.ts
@ -90,6 +90,10 @@ const en: LocaleType = {
      Delete: "Delete",
      Edit: "Edit",
      FullScreen: "FullScreen",
+      RefreshTitle: "Refresh Title",
+      RefreshToast: "Title refresh request sent",
+      Speech: "Play",
+      StopSpeech: "Stop",
    },
    Commands: {
      new: "Start a new chat",
@ -97,6 +101,7 @@ const en: LocaleType = {
      next: "Next Chat",
      prev: "Previous Chat",
      clear: "Clear Context",
+      fork: "Copy Chat",
      del: "Delete Chat",
    },
    InputActions: {
@ -123,6 +128,8 @@ const en: LocaleType = {
      return inputHints + ", / to search prompts, : to use commands";
    },
    Send: "Send",
+    StartSpeak: "Start Speak",
+    StopSpeak: "Stop Speak",
    Config: {
      Reset: "Reset to Default",
      SaveAs: "Save as Mask",
@ -545,6 +552,27 @@ const en: LocaleType = {
      SubTitle:
        "A larger value decreasing the likelihood to repeat the same line",
    },
+    TTS: {
+      Enable: {
+        Title: "Enable TTS",
+        SubTitle: "Enable text-to-speech service",
+      },
+      Autoplay: {
+        Title: "Enable Autoplay",
+        SubTitle:
+          "Automatically generate speech and play, you need to enable the text-to-speech switch first",
+      },
+      Model: "Model",
+      Voice: {
+        Title: "Voice",
+        SubTitle: "The voice to use when generating the audio",
+      },
+      Speed: {
+        Title: "Speed",
+        SubTitle: "The speed of the generated audio",
+      },
+      Engine: "TTS Engine",
+    },
  },
  Store: {
    DefaultTopic: "New Conversation",
--- a/app/locales/index.ts
+++ b/app/locales/index.ts
@ -136,3 +136,34 @@ export function getISOLang() {
  const lang = getLang();
  return isoLangString[lang] ?? lang;
 }
+
+const DEFAULT_STT_LANG = "zh-CN";
+export const STT_LANG_MAP: Record<Lang, string> = {
+  cn: "zh-CN",
+  en: "en-US",
+  // pt: "pt-BR",
+  // tw: "zh-TW",
+  // jp: "ja-JP",
+  // ko: "ko-KR",
+  // id: "id-ID",
+  // fr: "fr-FR",
+  // es: "es-ES",
+  // it: "it-IT",
+  // tr: "tr-TR",
+  // de: "de-DE",
+  // vi: "vi-VN",
+  // ru: "ru-RU",
+  // cs: "cs-CZ",
+  // no: "no-NO",
+  // ar: "ar-SA",
+  // bn: "bn-BD",
+  // sk: "sk-SK",
+};
+
+export function getSTTLang(): string {
+  try {
+    return STT_LANG_MAP[getLang()];
+  } catch {
+    return DEFAULT_STT_LANG;
+  }
+}
--- a/app/locales/tw.ts
+++ b/app/locales/tw.ts
@ -1,527 +0,0 @@
-import { getClientConfig } from "../config/client";
-import { SubmitKey } from "../store/config";
-
-const isApp = !!getClientConfig()?.isApp;
-
-const tw = {
-  WIP: "此功能仍在開發中……",
-  Error: {
-    Unauthorized: isApp
-      ? "偵測到無效的 API Key，請前往[設定](/#/settings)頁面檢查 API Key 是否設定正確。"
-      : "存取密碼不正確或尚未填寫，請前往[登入](/#/auth)頁面輸入正確的存取密碼，或者在[設定](/#/settings)頁面填入你自己的 OpenAI API Key。",
-  },
-
-  Auth: {
-    Title: "需要密碼",
-    Tips: "管理員開啟了密碼驗證，請在下方填入存取密碼",
-    SubTips: "或者輸入你的 OpenAI 或 Google API 金鑰",
-    Input: "在此處填寫存取密碼",
-    Confirm: "確認",
-    Later: "稍候再說",
-  },
-  ChatItem: {
-    ChatItemCount: (count: number) => `${count} 則對話`,
-  },
-  Chat: {
-    SubTitle: (count: number) => `您已經與 ChatGPT 進行了 ${count} 則對話`,
-    EditMessage: {
-      Title: "編輯訊息記錄",
-      Topic: {
-        Title: "聊天主題",
-        SubTitle: "更改目前聊天主題",
-      },
-    },
-    Actions: {
-      ChatList: "檢視訊息列表",
-      CompressedHistory: "檢視壓縮後的歷史 Prompt",
-      Export: "匯出聊天紀錄",
-      Copy: "複製",
-      Stop: "停止",
-      Retry: "重試",
-      Pin: "固定",
-      PinToastContent: "已將 1 條對話固定至預設提示詞",
-      PinToastAction: "檢視",
-      Delete: "刪除",
-      Edit: "編輯",
-    },
-    Commands: {
-      new: "新建聊天",
-      newm: "從角色範本新建聊天",
-      next: "下一個聊天",
-      prev: "上一個聊天",
-      clear: "清除上下文",
-      del: "刪除聊天",
-    },
-    InputActions: {
-      Stop: "停止回應",
-      ToBottom: "移至最新",
-      Theme: {
-        auto: "自動主題",
-        light: "亮色模式",
-        dark: "深色模式",
-      },
-      Prompt: "快捷指令",
-      Masks: "所有角色範本",
-      Clear: "清除聊天",
-      Settings: "對話設定",
-      UploadImage: "上傳圖片",
-    },
-    Rename: "重新命名對話",
-    Typing: "正在輸入…",
-    Input: (submitKey: string) => {
-      var inputHints = `輸入訊息後，按下 ${submitKey} 鍵即可傳送`;
-      if (submitKey === String(SubmitKey.Enter)) {
-        inputHints += "，Shift + Enter 鍵換行";
-      }
-      return inputHints;
-    },
-    Send: "傳送",
-    Config: {
-      Reset: "重設",
-      SaveAs: "另存新檔",
-    },
-    IsContext: "預設提示詞",
-    ShortcutKey: {
-      Title: "鍵盤快捷方式",
-      newChat: "打開新聊天",
-      focusInput: "聚焦輸入框",
-      copyLastMessage: "複製最後一個回覆",
-      copyLastCode: "複製最後一個代碼塊",
-      showShortcutKey: "顯示快捷方式",
-    },
-  },
-  Export: {
-    Title: "將聊天記錄匯出為 Markdown",
-    Copy: "複製全部",
-    Download: "下載檔案",
-    Share: "分享到 ShareGPT",
-    MessageFromYou: "來自您的訊息",
-    MessageFromChatGPT: "來自 ChatGPT 的訊息",
-    Format: {
-      Title: "匯出格式",
-      SubTitle: "可以匯出 Markdown 文字檔或者 PNG 圖片",
-    },
-    IncludeContext: {
-      Title: "包含角色範本上下文",
-      SubTitle: "是否在訊息中顯示角色範本上下文",
-    },
-    Steps: {
-      Select: "選取",
-      Preview: "預覽",
-    },
-    Image: {
-      Toast: "正在產生截圖",
-      Modal: "長按或按右鍵儲存圖片",
-    },
-  },
-  Select: {
-    Search: "查詢訊息",
-    All: "選取全部",
-    Latest: "最近幾條",
-    Clear: "清除選取",
-  },
-  Memory: {
-    Title: "上下文記憶 Prompt",
-    EmptyContent: "尚未記憶",
-    Copy: "複製全部",
-    Send: "傳送記憶",
-    Reset: "重設對話",
-    ResetConfirm: "重設後將清除目前對話記錄以及歷史記憶，確認重設？",
-  },
-  Home: {
-    NewChat: "開新對話",
-    DeleteChat: "確定要刪除選取的對話嗎？",
-    DeleteToast: "已刪除對話",
-    Revert: "撤銷",
-  },
-  Settings: {
-    Title: "設定",
-    SubTitle: "設定選項",
-
-    Danger: {
-      Reset: {
-        Title: "重設所有設定",
-        SubTitle: "重設所有設定項回預設值",
-        Action: "立即重設",
-        Confirm: "確認重設所有設定？",
-      },
-      Clear: {
-        Title: "清除所有資料",
-        SubTitle: "清除所有聊天、設定資料",
-        Action: "立即清除",
-        Confirm: "確認清除所有聊天、設定資料？",
-      },
-    },
-    Lang: {
-      Name: "Language", // ATTENTION: if you wanna add a new translation, please do not translate this value, leave it as `Language`
-      All: "所有語言",
-    },
-    Avatar: "大頭貼",
-    FontSize: {
-      Title: "字型大小",
-      SubTitle: "聊天內容的字型大小",
-    },
-    FontFamily: {
-      Title: "聊天字體",
-      SubTitle: "聊天內容的字體，若置空則應用全局默認字體",
-      Placeholder: "字體名稱",
-    },
-    InjectSystemPrompts: {
-      Title: "匯入系統提示",
-      SubTitle: "強制在每個請求的訊息列表開頭新增一個模擬 ChatGPT 的系統提示",
-    },
-    InputTemplate: {
-      Title: "使用者輸入預處理",
-      SubTitle: "使用者最新的一則訊息會填充到此範本",
-    },
-
-    Update: {
-      Version: (x: string) => `目前版本：${x}`,
-      IsLatest: "已是最新版本",
-      CheckUpdate: "檢查更新",
-      IsChecking: "正在檢查更新...",
-      FoundUpdate: (x: string) => `發現新版本：${x}`,
-      GoToUpdate: "前往更新",
-    },
-    SendKey: "傳送鍵",
-    Theme: "主題",
-    TightBorder: "緊湊邊框",
-    SendPreviewBubble: {
-      Title: "預覽氣泡",
-      SubTitle: "在預覽氣泡中預覽 Markdown 內容",
-    },
-    AutoGenerateTitle: {
-      Title: "自動產生標題",
-      SubTitle: "根據對話內容產生合適的標題",
-    },
-    Sync: {
-      CloudState: "雲端資料",
-      NotSyncYet: "還沒有進行過同步",
-      Success: "同步成功",
-      Fail: "同步失敗",
-
-      Config: {
-        Modal: {
-          Title: "設定雲端同步",
-          Check: "檢查可用性",
-        },
-        SyncType: {
-          Title: "同步類型",
-          SubTitle: "選擇偏好的同步伺服器",
-        },
-        Proxy: {
-          Title: "啟用代理伺服器",
-          SubTitle: "在瀏覽器中同步時，啟用代理伺服器以避免跨域限制",
-        },
-        ProxyUrl: {
-          Title: "代理伺服器位置",
-          SubTitle: "僅適用於本專案內建的跨域代理",
-        },
-
-        WebDav: {
-          Endpoint: "WebDAV 位置",
-          UserName: "使用者名稱",
-          Password: "密碼",
-        },
-
-        UpStash: {
-          Endpoint: "UpStash Redis REST Url",
-          UserName: "備份名稱",
-          Password: "UpStash Redis REST Token",
-        },
-      },
-
-      LocalState: "本機資料",
-      Overview: (overview: any) => {
-        return `${overview.chat} 次對話，${overview.message} 則訊息，${overview.prompt} 條提示詞，${overview.mask} 個角色範本`;
-      },
-      ImportFailed: "匯入失敗",
-    },
-    Mask: {
-      Splash: {
-        Title: "角色範本啟動頁面",
-        SubTitle: "新增聊天時，呈現角色範本啟動頁面",
-      },
-      Builtin: {
-        Title: "隱藏內建角色範本",
-        SubTitle: "在所有角色範本列表中隱藏內建角色範本",
-      },
-    },
-    Prompt: {
-      Disable: {
-        Title: "停用提示詞自動補齊",
-        SubTitle: "在輸入框開頭輸入 / 即可觸發自動補齊",
-      },
-      List: "自訂提示詞列表",
-      ListCount: (builtin: number, custom: number) =>
-        `內建 ${builtin} 條，使用者自訂 ${custom} 條`,
-      Edit: "編輯",
-      Modal: {
-        Title: "提示詞列表",
-        Add: "新增一則",
-        Search: "搜尋提示詞",
-      },
-      EditModal: {
-        Title: "編輯提示詞",
-      },
-    },
-    HistoryCount: {
-      Title: "附帶歷史訊息數",
-      SubTitle: "每次請求附帶的歷史訊息數",
-    },
-    CompressThreshold: {
-      Title: "歷史訊息長度壓縮閾值",
-      SubTitle: "當未壓縮的歷史訊息超過該值時，將進行壓縮",
-    },
-
-    Usage: {
-      Title: "帳戶餘額",
-      SubTitle(used: any, total: any) {
-        return `本月已使用 $${used}，訂閱總額 $${total}`;
-      },
-      IsChecking: "正在檢查…",
-      Check: "重新檢查",
-      NoAccess: "輸入 API Key 檢視餘額",
-    },
-
-    Access: {
-      AccessCode: {
-        Title: "存取密碼",
-        SubTitle: "管理員已開啟加密存取",
-        Placeholder: "請輸入存取密碼",
-      },
-      CustomEndpoint: {
-        Title: "自訂 API 端點 (Endpoint)",
-        SubTitle: "是否使用自訂 Azure 或 OpenAI 服務",
-      },
-      Provider: {
-        Title: "模型供應商",
-        SubTitle: "切換不同的服務供應商",
-      },
-      OpenAI: {
-        ApiKey: {
-          Title: "API Key",
-          SubTitle: "使用自訂 OpenAI Key 繞過密碼存取限制",
-          Placeholder: "OpenAI API Key",
-        },
-
-        Endpoint: {
-          Title: "API 端點 (Endpoint) 位址",
-          SubTitle: "除預設位址外，必須包含 http(s)://",
-        },
-      },
-      Azure: {
-        ApiKey: {
-          Title: "API 金鑰",
-          SubTitle: "使用自訂 Azure Key 繞過密碼存取限制",
-          Placeholder: "Azure API Key",
-        },
-
-        Endpoint: {
-          Title: "API 端點 (Endpoint) 位址",
-          SubTitle: "範例：",
-        },
-
-        ApiVerion: {
-          Title: "API 版本 (azure api version)",
-          SubTitle: "指定一個特定的 API 版本",
-        },
-      },
-      Anthropic: {
-        ApiKey: {
-          Title: "API 金鑰",
-          SubTitle: "從 Anthropic AI 取得您的 API 金鑰",
-          Placeholder: "Anthropic API Key",
-        },
-
-        Endpoint: {
-          Title: "端點位址",
-          SubTitle: "範例：",
-        },
-
-        ApiVerion: {
-          Title: "API 版本 (claude api version)",
-          SubTitle: "指定一個特定的 API 版本",
-        },
-      },
-      Google: {
-        ApiKey: {
-          Title: "API 金鑰",
-          SubTitle: "從 Google AI 取得您的 API 金鑰",
-          Placeholder: "輸入您的 Google AI Studio API 金鑰",
-        },
-
-        Endpoint: {
-          Title: "端點位址",
-          SubTitle: "範例：",
-        },
-
-        ApiVersion: {
-          Title: "API 版本（僅適用於 gemini-pro）",
-          SubTitle: "選擇一個特定的 API 版本",
-        },
-      },
-      CustomModel: {
-        Title: "自訂模型名稱",
-        SubTitle: "增加自訂模型可選擇項目，使用英文逗號隔開",
-      },
-    },
-
-    Model: "模型 (model)",
-    CompressModel: {
-      Title: "壓縮模型",
-      SubTitle: "用於壓縮歷史記錄的模型",
-    },
-    Temperature: {
-      Title: "隨機性 (temperature)",
-      SubTitle: "值越大，回應越隨機",
-    },
-    TopP: {
-      Title: "核心採樣 (top_p)",
-      SubTitle: "與隨機性類似，但不要和隨機性一起更改",
-    },
-    MaxTokens: {
-      Title: "單次回應限制 (max_tokens)",
-      SubTitle: "單次互動所用的最大 Token 數",
-    },
-    PresencePenalty: {
-      Title: "話題新穎度 (presence_penalty)",
-      SubTitle: "值越大，越有可能拓展到新話題",
-    },
-    FrequencyPenalty: {
-      Title: "頻率懲罰度 (frequency_penalty)",
-      SubTitle: "值越大，越有可能降低重複字詞",
-    },
-  },
-  Store: {
-    DefaultTopic: "新的對話",
-    BotHello: "請問需要我的協助嗎？",
-    Error: "出錯了，請稍後再嘗試",
-    Prompt: {
-      History: (content: string) =>
-        "這是 AI 與使用者的歷史聊天總結，作為前情提要：" + content,
-      Topic:
-        "Use the language used by the user (e.g. en for english conversation, zh-hant for chinese conversation, etc.) to generate a title (at most 6 words) summarizing our conversation without any lead-in, quotation marks, preamble like 'Title:', direct text copies, single-word replies, quotation marks, translations, or brackets. Remove enclosing quotation marks. The title should make third-party grasp the essence of the conversation in first sight.",
-      Summarize:
-        "Use the language used by the user (e.g. en-us for english conversation, zh-hant for chinese conversation, etc.) to summarise the conversation in at most 200 words. The summary will be used as prompt for you to continue the conversation in the future.",
-    },
-  },
-  Copy: {
-    Success: "已複製到剪貼簿中",
-    Failed: "複製失敗，請賦予剪貼簿權限",
-  },
-  Download: {
-    Success: "內容已下載到您的目錄。",
-    Failed: "下載失敗。",
-  },
-  Context: {
-    Toast: (x: any) => `已設定 ${x} 條前置上下文`,
-    Edit: "前置上下文和歷史記憶",
-    Add: "新增一則",
-    Clear: "上下文已清除",
-    Revert: "恢復上下文",
-  },
-  Plugin: { Name: "外掛" },
-  FineTuned: { Sysmessage: "你是一個助手" },
-  Mask: {
-    Name: "角色範本",
-    Page: {
-      Title: "預設角色角色範本",
-      SubTitle: (count: number) => `${count} 個預設角色定義`,
-      Search: "搜尋角色角色範本",
-      Create: "新增",
-    },
-    Item: {
-      Info: (count: number) => `包含 ${count} 條預設對話`,
-      Chat: "對話",
-      View: "檢視",
-      Edit: "編輯",
-      Delete: "刪除",
-      DeleteConfirm: "確認刪除？",
-    },
-    EditModal: {
-      Title: (readonly: boolean) =>
-        `編輯預設角色範本 ${readonly ? "（唯讀）" : ""}`,
-      Download: "下載預設值",
-      Clone: "以此預設值建立副本",
-    },
-    Config: {
-      Avatar: "角色頭像",
-      Name: "角色名稱",
-      Sync: {
-        Title: "使用全域設定",
-        SubTitle: "目前對話是否使用全域模型設定",
-        Confirm: "目前對話的自訂設定將會被自動覆蓋，確認啟用全域設定？",
-      },
-      HideContext: {
-        Title: "隱藏預設對話",
-        SubTitle: "隱藏後預設對話不會出現在聊天介面",
-      },
-      Share: {
-        Title: "分享此角色範本",
-        SubTitle: "產生此角色範本的直達連結",
-        Action: "複製連結",
-      },
-    },
-  },
-  SearchChat: {
-    Name: "搜索",
-    Page: {
-      Title: "搜索聊天記錄",
-      Search: "輸入搜索關鍵詞",
-      NoResult: "沒有找到結果",
-      NoData: "沒有數據",
-      Loading: "加載中",
-
-      SubTitle: (count: number) => `找到 ${count} 條結果`,
-    },
-    Item: {
-      View: "查看",
-    },
-  },
-  NewChat: {
-    Return: "返回",
-    Skip: "跳過",
-    NotShow: "不再顯示",
-    ConfirmNoShow: "確認停用？停用後可以隨時在設定中重新啟用。",
-    Title: "挑選一個角色範本",
-    SubTitle: "現在開始，與角色範本背後的靈魂思維碰撞",
-    More: "搜尋更多",
-  },
-  URLCommand: {
-    Code: "偵測到連結中已經包含存取密碼，是否自動填入？",
-    Settings: "偵測到連結中包含了預設設定，是否自動填入？",
-  },
-  UI: {
-    Confirm: "確認",
-    Cancel: "取消",
-    Close: "關閉",
-    Create: "新增",
-    Edit: "編輯",
-    Export: "匯出",
-    Import: "匯入",
-    Sync: "同步",
-    Config: "設定",
-  },
-  Exporter: {
-    Description: {
-      Title: "只有清除上下文之後的訊息會被顯示",
-    },
-    Model: "模型",
-    Messages: "訊息",
-    Topic: "主題",
-    Time: "時間",
-  },
-};
-
-type DeepPartial<T> = T extends object
-  ? {
-      [P in keyof T]?: DeepPartial<T[P]>;
-    }
-  : T;
-
-export type LocaleType = typeof tw;
-export type PartialLocaleType = DeepPartial<typeof tw>;
-
-export default tw;
-// Translated by @chunkiuuu, feel free the submit new pr if there are typo/incorrect translations :D
--- a/app/store/access.ts
+++ b/app/store/access.ts
@ -123,6 +123,9 @@ const DEFAULT_ACCESS_STATE = {
  disableFastLink: false,
  customModels: "",
  defaultModel: "",
+
+  // tts config
+  edgeTTSVoiceName: "zh-CN-YunxiNeural",
 };

 export const useAccessStore = createPersistStore(
@ -135,6 +138,12 @@ export const useAccessStore = createPersistStore(
      return get().needCode;
    },

+    edgeVoiceName() {
+      this.fetch();
+
+      return get().edgeTTSVoiceName;
+    },
+
    isValidOpenAI() {
      return ensure(get(), ["openaiApiKey"]);
    },
@ -197,40 +206,40 @@ export const useAccessStore = createPersistStore(
    fetch() {
      if (fetchState > 0 || getClientConfig()?.buildMode === "export") return;
      fetchState = 1;
-
-      const res = {
-        needCode: false,
-        hideUserApiKey: true,
-        disableGPT4: false,
-        hideBalanceQuery: true,
-      };
-      set(() => ({ ...res }));
-      fetchState = 2; // 设置 fetchState 值为 "获取已完成"
-      // fetch("/api/config", {
-      //   method: "post",
-      //   body: null,
-      //   headers: {
-      //     ...getHeaders(),
-      //   },
-      // })
-      //   .then((res) => res.json())
-      //   .then((res) => {
-      //     // Set default model from env request
-      //     let defaultModel = res.defaultModel ?? "";
-      //     DEFAULT_CONFIG.modelConfig.model =
-      //       defaultModel !== "" ? defaultModel : "gpt-3.5-turbo";
-      //     return res;
-      //   })
-      //   .then((res: DangerConfig) => {
-      //     console.log("[Config] got config from server", res);
-      //     set(() => ({ ...res }));
-      //   })
-      //   .catch(() => {
-      //     console.error("[Config] failed to fetch config");
-      //   })
-      //   .finally(() => {
-      //     fetchState = 2;
-      //   });
+      // const res = {
+      //     needCode: false,
+      //     hideUserApiKey: true,
+      //     disableGPT4: false,
+      //     hideBalanceQuery: true,
+      // };
+      // set(() => ({ ...res }));
+      // fetchState = 2; // 设置 fetchState 值为 "获取已完成"
+      // TODO: 可能有问题
+      fetch("/api/config", {
+        method: "post",
+        body: null,
+        headers: {
+          ...getHeaders(),
+        },
+      })
+        .then((res) => res.json())
+        .then((res) => {
+          // Set default model from env request
+          let defaultModel = res.defaultModel ?? "";
+          if (defaultModel !== "")
+            DEFAULT_CONFIG.modelConfig.model = defaultModel;
+          return res;
+        })
+        .then((res: DangerConfig) => {
+          console.log("[Config] got config from server", res);
+          set(() => ({ ...res }));
+        })
+        .catch(() => {
+          console.error("[Config] failed to fetch config");
+        })
+        .finally(() => {
+          fetchState = 2;
+        });
    },
  }),
  {
--- a/app/store/chat.ts
+++ b/app/store/chat.ts
@ -178,6 +178,28 @@ export const useChatStore = createPersistStore(
    }

    const methods = {
+      forkSession() {
+        // 获取当前会话
+        const currentSession = get().currentSession();
+        if (!currentSession) return;
+
+        const newSession = createEmptySession();
+
+        newSession.topic = currentSession.topic;
+        newSession.messages = [...currentSession.messages];
+        newSession.mask = {
+          ...currentSession.mask,
+          modelConfig: {
+            ...currentSession.mask.modelConfig,
+          },
+        };
+
+        set((state) => ({
+          currentSessionIndex: 0,
+          sessions: [newSession, ...state.sessions],
+        }));
+      },
+
      clearSessions() {
        set(() => ({
          sessions: [createEmptySession()],
@ -889,7 +911,7 @@ export const useChatStore = createPersistStore(
        });
      },

-      summarizeSession() {
+      summarizeSession(refreshTitle: boolean = false) {
        const config = useAppConfig.getState();
        const session = get().currentSession();
        const modelConfig = session.mask.modelConfig;
@ -907,16 +929,26 @@ export const useChatStore = createPersistStore(
        // should summarize topic after chating more than 50 words
        const SUMMARIZE_MIN_LEN = 50;
        if (
-          config.enableAutoGenerateTitle &&
-          session.topic === DEFAULT_TOPIC &&
-          countMessages(messages) >= SUMMARIZE_MIN_LEN
+          (config.enableAutoGenerateTitle &&
+            session.topic === DEFAULT_TOPIC &&
+            countMessages(messages) >= SUMMARIZE_MIN_LEN) ||
+          refreshTitle
        ) {
-          const topicMessages = messages.concat(
-            createMessage({
-              role: "user",
-              content: Locale.Store.Prompt.Topic,
-            }),
+          const startIndex = Math.max(
+            0,
+            messages.length - modelConfig.historyMessageCount,
          );
+          const topicMessages = messages
+            .slice(
+              startIndex < messages.length ? startIndex : messages.length - 1,
+              messages.length,
+            )
+            .concat(
+              createMessage({
+                role: "user",
+                content: Locale.Store.Prompt.Topic,
+              }),
+            );
          api.llm.chat({
            messages: topicMessages,
            config: {
@ -942,7 +974,7 @@ export const useChatStore = createPersistStore(

        const historyMsgLength = countMessages(toBeSummarizedMsgs);

-        if (historyMsgLength > modelConfig?.max_tokens ?? 4000) {
+        if (historyMsgLength > (modelConfig?.max_tokens ?? 4000)) {
          const n = toBeSummarizedMsgs.length;
          toBeSummarizedMsgs = toBeSummarizedMsgs.slice(
            Math.max(0, n - modelConfig.historyMessageCount),
--- a/app/store/config.ts
+++ b/app/store/config.ts
@ -5,14 +5,22 @@ import {
  DEFAULT_INPUT_TEMPLATE,
  DEFAULT_MODELS,
  DEFAULT_SIDEBAR_WIDTH,
+  DEFAULT_TTS_ENGINE,
+  DEFAULT_TTS_ENGINES,
+  DEFAULT_TTS_MODEL,
+  DEFAULT_TTS_MODELS,
+  DEFAULT_TTS_VOICE,
+  DEFAULT_TTS_VOICES,
  DISABLE_MODELS,
  StoreKey,
  ServiceProvider,
 } from "../constant";
 import { createPersistStore } from "../utils/store";
-import { get } from "immutable";

 export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
+export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
+export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
+export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number];

 export enum SubmitKey {
  Enter = "Enter",
@ -73,11 +81,21 @@ export const DEFAULT_CONFIG = {
    quality: "standard" as DalleQuality,
    style: "vivid" as DalleStyle,
  },
+
+  ttsConfig: {
+    enable: false,
+    autoplay: false,
+    engine: DEFAULT_TTS_ENGINE,
+    model: DEFAULT_TTS_MODEL,
+    voice: DEFAULT_TTS_VOICE,
+    speed: 1.0,
+  },
 };

 export type ChatConfig = typeof DEFAULT_CONFIG;

 export type ModelConfig = ChatConfig["modelConfig"];
+export type TTSConfig = ChatConfig["ttsConfig"];

 export function limitNumber(
  x: number,
@ -92,6 +110,21 @@ export function limitNumber(
  return Math.min(max, Math.max(min, x));
 }

+export const TTSConfigValidator = {
+  engine(x: string) {
+    return x as TTSEngineType;
+  },
+  model(x: string) {
+    return x as TTSModelType;
+  },
+  voice(x: string) {
+    return x as TTSVoiceType;
+  },
+  speed(x: number) {
+    return limitNumber(x, 0.25, 4.0, 1.0);
+  },
+};
+
 export const ModalConfigValidator = {
  model(x: string) {
    return x as ModelType;
@ -148,6 +181,21 @@ export const useAppConfig = createPersistStore(
  {
    name: StoreKey.Config,
    version: 4,
+
+    merge(persistedState, currentState) {
+      const state = persistedState as ChatConfig | undefined;
+      if (!state) return { ...currentState };
+      const models = currentState.models.slice();
+      state.models.forEach((pModel) => {
+        const idx = models.findIndex(
+          (v) => v.name === pModel.name && v.provider === pModel.provider,
+        );
+        if (idx !== -1) models[idx] = pModel;
+        else models.push(pModel);
+      });
+      return { ...currentState, ...state, models: models };
+    },
+
    migrate(persistedState, version) {
      const state = persistedState as ChatConfig;

--- a/app/store/plugin.ts
+++ b/app/store/plugin.ts
@ -1,5 +1,4 @@
 import OpenAPIClientAxios from "openapi-client-axios";
-import { getLang, Lang } from "../locales";
 import { StoreKey } from "../constant";
 import { nanoid } from "nanoid";
 import { createPersistStore } from "../utils/store";
--- a/app/store/sync.ts
+++ b/app/store/sync.ts
@ -1,5 +1,4 @@
 import { getClientConfig } from "../config/client";
-import { Updater } from "../typing";
 import { ApiPath, STORAGE_KEY, StoreKey } from "../constant";
 import { createPersistStore } from "../utils/store";
 import {
@ -100,15 +99,17 @@ export const useSyncStore = createPersistStore(
        const remoteState = await client.get(config.username);
        if (!remoteState || remoteState === "") {
          await client.set(config.username, JSON.stringify(localState));
-          console.log("[Sync] Remote state is empty, using local state instead.");
-          return
+          console.log(
+            "[Sync] Remote state is empty, using local state instead.",
+          );
+          return;
        } else {
          const parsedRemoteState = JSON.parse(
            await client.get(config.username),
          ) as AppState;
          mergeAppState(localState, parsedRemoteState);
          setLocalAppState(localState);
-       } 
+        }
      } catch (e) {
        console.log("[Sync] failed to get remote state", e);
        throw e;
--- a/app/store/update.ts
+++ b/app/store/update.ts
@ -8,8 +8,6 @@ import { getClientConfig } from "../config/client";
 import { createPersistStore } from "../utils/store";
 import ChatGptIcon from "../icons/chatgpt.png";
 import Locale from "../locales";
-import { use } from "react";
-import { useAppConfig } from ".";
 import { ClientApi } from "../client/api";

 const ONE_MINUTE = 60 * 1000;
--- a/app/utils.ts
+++ b/app/utils.ts
@ -3,8 +3,7 @@ import { showToast } from "./components/ui-lib";
 import Locale from "./locales";
 import { RequestMessage } from "./client/api";
 import { ServiceProvider, REQUEST_TIMEOUT_MS } from "./constant";
-import isObject from "lodash-es/isObject";
-import { fetch as tauriFetch, Body, ResponseType } from "@tauri-apps/api/http";
+import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";

 export function trimTopic(topic: string) {
  // Fix an issue where double quotes still show in the Indonesian language
--- a/app/utils/audio.ts
+++ b/app/utils/audio.ts
@ -0,0 +1,45 @@
+type TTSPlayer = {
+  init: () => void;
+  play: (audioBuffer: ArrayBuffer, onended: () => void | null) => Promise<void>;
+  stop: () => void;
+};
+
+export function createTTSPlayer(): TTSPlayer {
+  let audioContext: AudioContext | null = null;
+  let audioBufferSourceNode: AudioBufferSourceNode | null = null;
+
+  const init = () => {
+    audioContext = new (window.AudioContext || window.webkitAudioContext)();
+    audioContext.suspend();
+  };
+
+  const play = async (audioBuffer: ArrayBuffer, onended: () => void | null) => {
+    if (audioBufferSourceNode) {
+      audioBufferSourceNode.stop();
+      audioBufferSourceNode.disconnect();
+    }
+
+    const buffer = await audioContext!.decodeAudioData(audioBuffer);
+    audioBufferSourceNode = audioContext!.createBufferSource();
+    audioBufferSourceNode.buffer = buffer;
+    audioBufferSourceNode.connect(audioContext!.destination);
+    audioContext!.resume().then(() => {
+      audioBufferSourceNode!.start();
+    });
+    audioBufferSourceNode.onended = onended;
+  };
+
+  const stop = () => {
+    if (audioBufferSourceNode) {
+      audioBufferSourceNode.stop();
+      audioBufferSourceNode.disconnect();
+      audioBufferSourceNode = null;
+    }
+    if (audioContext) {
+      audioContext.close();
+      audioContext = null;
+    }
+  };
+
+  return { init, play, stop };
+}
--- a/app/utils/cors.ts
+++ b/app/utils/cors.ts
@ -1,5 +1,5 @@
 import { getClientConfig } from "../config/client";
-import { ApiPath, DEFAULT_API_HOST } from "../constant";
+import { DEFAULT_API_HOST } from "../constant";

 export function corsPath(path: string) {
  const baseUrl = getClientConfig()?.isApp ? `${DEFAULT_API_HOST}` : "";
--- a/app/utils/ms_edge_tts.ts
+++ b/app/utils/ms_edge_tts.ts
@ -0,0 +1,391 @@
+// import axios from "axios";
+import { Buffer } from "buffer";
+import { randomBytes } from "crypto";
+import { Readable } from "stream";
+
+// Modified according to https://github.com/Migushthe2nd/MsEdgeTTS
+
+/**
+ * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume
+ */
+export enum VOLUME {
+  SILENT = "silent",
+  X_SOFT = "x-soft",
+  SOFT = "soft",
+  MEDIUM = "medium",
+  LOUD = "loud",
+  X_LOUD = "x-LOUD",
+  DEFAULT = "default",
+}
+
+/**
+ * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking
+ */
+export enum RATE {
+  X_SLOW = "x-slow",
+  SLOW = "slow",
+  MEDIUM = "medium",
+  FAST = "fast",
+  X_FAST = "x-fast",
+  DEFAULT = "default",
+}
+
+/**
+ * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline
+ */
+export enum PITCH {
+  X_LOW = "x-low",
+  LOW = "low",
+  MEDIUM = "medium",
+  HIGH = "high",
+  X_HIGH = "x-high",
+  DEFAULT = "default",
+}
+
+/**
+ * Only a few of the [possible formats](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs) are accepted.
+ */
+export enum OUTPUT_FORMAT {
+  // Streaming =============================
+  // AMR_WB_16000HZ = "amr-wb-16000hz",
+  // AUDIO_16KHZ_16BIT_32KBPS_MONO_OPUS = "audio-16khz-16bit-32kbps-mono-opus",
+  // AUDIO_16KHZ_32KBITRATE_MONO_MP3 = "audio-16khz-32kbitrate-mono-mp3",
+  // AUDIO_16KHZ_64KBITRATE_MONO_MP3 = "audio-16khz-64kbitrate-mono-mp3",
+  // AUDIO_16KHZ_128KBITRATE_MONO_MP3 = "audio-16khz-128kbitrate-mono-mp3",
+  // AUDIO_24KHZ_16BIT_24KBPS_MONO_OPUS = "audio-24khz-16bit-24kbps-mono-opus",
+  // AUDIO_24KHZ_16BIT_48KBPS_MONO_OPUS = "audio-24khz-16bit-48kbps-mono-opus",
+  AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3",
+  AUDIO_24KHZ_96KBITRATE_MONO_MP3 = "audio-24khz-96kbitrate-mono-mp3",
+  // AUDIO_24KHZ_160KBITRATE_MONO_MP3 = "audio-24khz-160kbitrate-mono-mp3",
+  // AUDIO_48KHZ_96KBITRATE_MONO_MP3 = "audio-48khz-96kbitrate-mono-mp3",
+  // AUDIO_48KHZ_192KBITRATE_MONO_MP3 = "audio-48khz-192kbitrate-mono-mp3",
+  // OGG_16KHZ_16BIT_MONO_OPUS = "ogg-16khz-16bit-mono-opus",
+  // OGG_24KHZ_16BIT_MONO_OPUS = "ogg-24khz-16bit-mono-opus",
+  // OGG_48KHZ_16BIT_MONO_OPUS = "ogg-48khz-16bit-mono-opus",
+  // RAW_8KHZ_8BIT_MONO_ALAW = "raw-8khz-8bit-mono-alaw",
+  // RAW_8KHZ_8BIT_MONO_MULAW = "raw-8khz-8bit-mono-mulaw",
+  // RAW_8KHZ_16BIT_MONO_PCM = "raw-8khz-16bit-mono-pcm",
+  // RAW_16KHZ_16BIT_MONO_PCM = "raw-16khz-16bit-mono-pcm",
+  // RAW_16KHZ_16BIT_MONO_TRUESILK = "raw-16khz-16bit-mono-truesilk",
+  // RAW_22050HZ_16BIT_MONO_PCM = "raw-22050hz-16bit-mono-pcm",
+  // RAW_24KHZ_16BIT_MONO_PCM = "raw-24khz-16bit-mono-pcm",
+  // RAW_24KHZ_16BIT_MONO_TRUESILK = "raw-24khz-16bit-mono-truesilk",
+  // RAW_44100HZ_16BIT_MONO_PCM = "raw-44100hz-16bit-mono-pcm",
+  // RAW_48KHZ_16BIT_MONO_PCM = "raw-48khz-16bit-mono-pcm",
+  // WEBM_16KHZ_16BIT_MONO_OPUS = "webm-16khz-16bit-mono-opus",
+  // WEBM_24KHZ_16BIT_24KBPS_MONO_OPUS = "webm-24khz-16bit-24kbps-mono-opus",
+  WEBM_24KHZ_16BIT_MONO_OPUS = "webm-24khz-16bit-mono-opus",
+  // Non-streaming =============================
+  // RIFF_8KHZ_8BIT_MONO_ALAW = "riff-8khz-8bit-mono-alaw",
+  // RIFF_8KHZ_8BIT_MONO_MULAW = "riff-8khz-8bit-mono-mulaw",
+  // RIFF_8KHZ_16BIT_MONO_PCM = "riff-8khz-16bit-mono-pcm",
+  // RIFF_22050HZ_16BIT_MONO_PCM = "riff-22050hz-16bit-mono-pcm",
+  // RIFF_24KHZ_16BIT_MONO_PCM = "riff-24khz-16bit-mono-pcm",
+  // RIFF_44100HZ_16BIT_MONO_PCM = "riff-44100hz-16bit-mono-pcm",
+  // RIFF_48KHZ_16BIT_MONO_PCM = "riff-48khz-16bit-mono-pcm",
+}
+
+export type Voice = {
+  Name: string;
+  ShortName: string;
+  Gender: string;
+  Locale: string;
+  SuggestedCodec: string;
+  FriendlyName: string;
+  Status: string;
+};
+
+export class ProsodyOptions {
+  /**
+   * The pitch to use.
+   * Can be any {@link PITCH}, or a relative frequency in Hz (+50Hz), a relative semitone (+2st), or a relative percentage (+50%).
+   * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline)
+   */
+  pitch?: PITCH | string = "+0Hz";
+  /**
+   * The rate to use.
+   * Can be any {@link RATE}, or a relative number (0.5), or string with a relative percentage (+50%).
+   * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking)
+   */
+  rate?: RATE | string | number = 1.0;
+  /**
+   * The volume to use.
+   * Can be any {@link VOLUME}, or an absolute number (0, 100), a string with a relative number (+50), or a relative percentage (+50%).
+   * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume)
+   */
+  volume?: VOLUME | string | number = 100.0;
+}
+
+export class MsEdgeTTS {
+  static OUTPUT_FORMAT = OUTPUT_FORMAT;
+  private static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
+  private static VOICES_URL = `https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
+  private static SYNTH_URL = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
+  private static BINARY_DELIM = "Path:audio\r\n";
+  private static VOICE_LANG_REGEX = /\w{2}-\w{2}/;
+  private readonly _enableLogger;
+  private _ws: WebSocket | undefined;
+  private _voice: any;
+  private _voiceLocale: any;
+  private _outputFormat: any;
+  private _streams: { [key: string]: Readable } = {};
+  private _startTime = 0;
+
+  private _log(...o: any[]) {
+    if (this._enableLogger) {
+      console.log(...o);
+    }
+  }
+
+  /**
+   * Create a new `MsEdgeTTS` instance.
+   *
+   * @param agent (optional, **NOT SUPPORTED IN BROWSER**) Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
+   * @param enableLogger=false whether to enable the built-in logger. This logs connections inits, disconnects, and incoming data to the console
+   */
+  public constructor(enableLogger: boolean = false) {
+    this._enableLogger = enableLogger;
+  }
+
+  private async _send(message: any) {
+    for (let i = 1; i <= 3 && this._ws!.readyState !== this._ws!.OPEN; i++) {
+      if (i == 1) {
+        this._startTime = Date.now();
+      }
+      this._log("connecting: ", i);
+      await this._initClient();
+    }
+    this._ws!.send(message);
+  }
+
+  private _initClient() {
+    this._ws = new WebSocket(MsEdgeTTS.SYNTH_URL);
+
+    this._ws.binaryType = "arraybuffer";
+    return new Promise((resolve, reject) => {
+      this._ws!.onopen = () => {
+        this._log(
+          "Connected in",
+          (Date.now() - this._startTime) / 1000,
+          "seconds",
+        );
+        this._send(
+          `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
+                    {
+                        "context": {
+                            "synthesis": {
+                                "audio": {
+                                    "metadataoptions": {
+                                        "sentenceBoundaryEnabled": "false",
+                                        "wordBoundaryEnabled": "false"
+                                    },
+                                    "outputFormat": "${this._outputFormat}" 
+                                }
+                            }
+                        }
+                    }
+                `,
+        ).then(resolve);
+      };
+      this._ws!.onmessage = (m: any) => {
+        const buffer = Buffer.from(m.data as ArrayBuffer);
+        const message = buffer.toString();
+        const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)![1];
+        if (message.includes("Path:turn.start")) {
+          // start of turn, ignore
+        } else if (message.includes("Path:turn.end")) {
+          // end of turn, close stream
+          this._streams[requestId].push(null);
+        } else if (message.includes("Path:response")) {
+          // context response, ignore
+        } else if (
+          message.includes("Path:audio") &&
+          m.data instanceof ArrayBuffer
+        ) {
+          this._pushAudioData(buffer, requestId);
+        } else {
+          this._log("UNKNOWN MESSAGE", message);
+        }
+      };
+      this._ws!.onclose = () => {
+        this._log(
+          "disconnected after:",
+          (Date.now() - this._startTime) / 1000,
+          "seconds",
+        );
+        for (const requestId in this._streams) {
+          this._streams[requestId].push(null);
+        }
+      };
+      this._ws!.onerror = function (error: any) {
+        reject("Connect Error: " + error);
+      };
+    });
+  }
+
+  private _pushAudioData(audioBuffer: Buffer, requestId: string) {
+    const audioStartIndex =
+      audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) +
+      MsEdgeTTS.BINARY_DELIM.length;
+    const audioData = audioBuffer.subarray(audioStartIndex);
+    this._streams[requestId].push(audioData);
+    this._log("received audio chunk, size: ", audioData?.length);
+  }
+
+  private _SSMLTemplate(input: string, options: ProsodyOptions = {}): string {
+    // in case future updates to the edge API block these elements, we'll be concatenating strings.
+    options = { ...new ProsodyOptions(), ...options };
+    return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${this._voiceLocale}">
+                <voice name="${this._voice}">
+                    <prosody pitch="${options.pitch}" rate="${options.rate}" volume="${options.volume}">
+                        ${input}
+                    </prosody> 
+                </voice>
+            </speak>`;
+  }
+
+  /**
+   * Fetch the list of voices available in Microsoft Edge.
+   * These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
+   */
+  // getVoices(): Promise<Voice[]> {
+  //   return new Promise((resolve, reject) => {
+  //     axios
+  //       .get(MsEdgeTTS.VOICES_URL)
+  //       .then((res) => resolve(res.data))
+  //       .catch(reject);
+  //   });
+  // }
+  getVoices(): Promise<Voice[]> {
+    return fetch(MsEdgeTTS.VOICES_URL)
+      .then((response) => {
+        if (!response.ok) {
+          throw new Error("Network response was not ok");
+        }
+        return response.json();
+      })
+      .then((data) => data as Voice[])
+      .catch((error) => {
+        throw error;
+      });
+  }
+
+  /**
+   * Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
+   * Must be called at least once before text can be synthesised.
+   * Saved in this instance. Can be called at any time times to update the metadata.
+   *
+   * @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
+   * @param outputFormat any {@link OUTPUT_FORMAT}
+   * @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
+   */
+  async setMetadata(
+    voiceName: string,
+    outputFormat: OUTPUT_FORMAT,
+    voiceLocale?: string,
+  ) {
+    const oldVoice = this._voice;
+    const oldVoiceLocale = this._voiceLocale;
+    const oldOutputFormat = this._outputFormat;
+
+    this._voice = voiceName;
+    this._voiceLocale = voiceLocale;
+    if (!this._voiceLocale) {
+      const voiceLangMatch = MsEdgeTTS.VOICE_LANG_REGEX.exec(this._voice);
+      if (!voiceLangMatch)
+        throw new Error("Could not infer voiceLocale from voiceName!");
+      this._voiceLocale = voiceLangMatch[0];
+    }
+    this._outputFormat = outputFormat;
+
+    const changed =
+      oldVoice !== this._voice ||
+      oldVoiceLocale !== this._voiceLocale ||
+      oldOutputFormat !== this._outputFormat;
+
+    // create new client
+    if (changed || this._ws!.readyState !== this._ws!.OPEN) {
+      this._startTime = Date.now();
+      await this._initClient();
+    }
+  }
+
+  private _metadataCheck() {
+    if (!this._ws)
+      throw new Error(
+        "Speech synthesis not configured yet. Run setMetadata before calling toStream or toFile.",
+      );
+  }
+
+  /**
+   * Close the WebSocket connection.
+   */
+  close() {
+    this._ws!.close();
+  }
+
+  /**
+   * Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
+   *
+   * @param input the text to synthesise. Can include SSML elements.
+   * @param options (optional) {@link ProsodyOptions}
+   * @returns {Readable} - a `stream.Readable` with the audio data
+   */
+  toStream(input: string, options?: ProsodyOptions): Readable {
+    const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options));
+    return stream;
+  }
+
+  toArrayBuffer(input: string, options?: ProsodyOptions): Promise<ArrayBuffer> {
+    return new Promise((resolve, reject) => {
+      let data: Uint8Array[] = [];
+      const readable = this.toStream(input, options);
+      readable.on("data", (chunk) => {
+        data.push(chunk);
+      });
+
+      readable.on("end", () => {
+        resolve(Buffer.concat(data).buffer);
+      });
+
+      readable.on("error", (err) => {
+        reject(err);
+      });
+    });
+  }
+
+  /**
+   * Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
+   *
+   * @param requestSSML the SSML to send. SSML elements required in order to work.
+   * @returns {Readable} - a `stream.Readable` with the audio data
+   */
+  rawToStream(requestSSML: string): Readable {
+    const { stream } = this._rawSSMLRequest(requestSSML);
+    return stream;
+  }
+
+  private _rawSSMLRequest(requestSSML: string): {
+    stream: Readable;
+    requestId: string;
+  } {
+    this._metadataCheck();
+
+    const requestId = randomBytes(16).toString("hex");
+    const request =
+      `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
+                ` + requestSSML.trim();
+    // https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
+    const self = this;
+    const stream = new Readable({
+      read() {},
+      destroy(error: Error | null, callback: (error: Error | null) => void) {
+        delete self._streams[requestId];
+        callback(error);
+      },
+    });
+    this._streams[requestId] = stream;
+    this._send(request).then();
+    return { stream, requestId };
+  }
+}
--- a/package.json
+++ b/package.json
@ -39,6 +39,7 @@
    "html-to-image": "^1.11.11",
    "idb-keyval": "^6.2.1",
    "lodash-es": "^4.17.21",
+    "markdown-to-txt": "^2.0.1",
    "mermaid": "^10.7.0",
    "microsoft-cognitiveservices-speech-sdk": "^1.36.0",
    "nanoid": "^5.0.3",
@ -87,6 +88,7 @@
    "eslint-config-next": "14.2.5",
    "eslint-config-prettier": "^9.1.0",
    "eslint-plugin-prettier": "^5.1.3",
+    "eslint-plugin-unused-imports": "^3.2.0",
    "husky": "^9.0.7",
    "lint-staged": "^15.2.0",
    "postcss": "^8.4.33",
@ -104,4 +106,4 @@
    "strip-ansi": "6.0.1"
  },
  "packageManager": "yarn@1.22.19"
-}
+}
--- a/yarn.lock
+++ b/yarn.lock
				`@ -0,0 +1 @@`
				`<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M17.25 9.75 19.5 12m0 0 2.25 2.25M19.5 12l2.25-2.25M19.5 12l-2.25 2.25m-10.5-6 4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z"></path></svg>`