Merge pull request #52 from ChatGPTNextWeb/main

[pull] main from ChatGPTNextWeb:main
2025-10-01 15:46:39 +08:00 · 2024-09-23 15:40:06 +08:00 · 2024-09-23 15:40:06 +08:00 · 39bc036e30
commit 39bc036e30
parent 027e5adf67 23f2b6213c
62 changed files with 1174 additions and 120 deletions
--- a/.env.template
+++ b/.env.template
@ -66,4 +66,4 @@ ANTHROPIC_API_VERSION=
 ANTHROPIC_URL=
 ### (optional)
-WHITE_WEBDEV_ENDPOINTS=
+WHITE_WEBDAV_ENDPOINTS=
--- a/.eslintrc.json
+++ b/.eslintrc.json
@ -1,4 +1,7 @@
 {
  "extends": "next/core-web-vitals",
-  "plugins": ["prettier"]
+  "plugins": ["prettier", "unused-imports"],
  "rules": {
    "unused-imports/no-unused-imports": "warn"
  }
 }
--- a/.github/workflows/deploy_preview.yml
+++ b/.github/workflows/deploy_preview.yml
@ -49,7 +49,7 @@ jobs:
        run: npm install --global vercel@latest
      - name: Cache dependencies
-        uses: actions/cache@v2
+        uses: actions/cache@v4
        id: cache-npm
        with:
          path: ~/.npm
--- a/README.md
+++ b/README.md
@ -340,7 +340,7 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name
 Change default model
-### `WHITE_WEBDEV_ENDPOINTS` (optional)
+### `WHITE_WEBDAV_ENDPOINTS` (optional)
 You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format：
 - Each address must be a complete endpoint 
--- a/README_CN.md
+++ b/README_CN.md
@ -202,7 +202,7 @@ ByteDance Api Url.
 如果你想禁用从链接解析预制设置，将此环境变量设置为 1 即可。
-### `WHITE_WEBDEV_ENDPOINTS` (可选)
+### `WHITE_WEBDAV_ENDPOINTS` (可选)
 如果你想增加允许访问的webdav服务地址，可以使用该选项，格式要求：
 - 每一个地址必须是一个完整的 endpoint
--- a/README_JA.md
+++ b/README_JA.md
@ -193,7 +193,7 @@ ByteDance API の URL。
 リンクからのプリセット設定解析を無効にしたい場合は、この環境変数を 1 に設定します。
-### `WHITE_WEBDEV_ENDPOINTS` (オプション)
+### `WHITE_WEBDAV_ENDPOINTS` (オプション)
 アクセス許可を与える WebDAV サービスのアドレスを追加したい場合、このオプションを使用します。フォーマット要件：
 - 各アドレスは完全なエンドポイントでなければなりません。
--- a/app/api/[provider]/[...path]/route.ts
+++ b/app/api/[provider]/[...path]/route.ts
@ -1,5 +1,5 @@
 import { ApiPath } from "@/app/constant";
-import { NextRequest, NextResponse } from "next/server";
+import { NextRequest } from "next/server";
 import { handle as openaiHandler } from "../../openai";
 import { handle as azureHandler } from "../../azure";
 import { handle as googleHandler } from "../../google";
--- a/app/api/alibaba.ts
+++ b/app/api/alibaba.ts
@ -1,6 +1,5 @@
 import { getServerSideConfig } from "@/app/config/server";
 import {
  Alibaba,
  ALIBABA_BASE_URL,
  ApiPath,
  ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
 import { isModelAvailableInServer } from "@/app/utils/model";
 import type { RequestPayload } from "@/app/client/platforms/openai";
 const serverConfig = getServerSideConfig();
--- a/app/api/anthropic.ts
+++ b/app/api/anthropic.ts
@ -3,7 +3,6 @@ import {
  ANTHROPIC_BASE_URL,
  Anthropic,
  ApiPath,
  DEFAULT_MODELS,
  ServiceProvider,
  ModelProvider,
 } from "@/app/constant";
--- a/app/api/azure.ts
+++ b/app/api/azure.ts
@ -1,4 +1,3 @@
 import { getServerSideConfig } from "@/app/config/server";
 import { ModelProvider } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
--- a/app/api/baidu.ts
+++ b/app/api/baidu.ts
@ -3,7 +3,6 @@ import {
  BAIDU_BASE_URL,
  ApiPath,
  ModelProvider,
  BAIDU_OATUH_URL,
  ServiceProvider,
 } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";
--- a/app/api/common.ts
+++ b/app/api/common.ts
@ -1,11 +1,6 @@
 import { NextRequest, NextResponse } from "next/server";
 import { getServerSideConfig } from "../config/server";
-import {
+import { OPENAI_BASE_URL, ServiceProvider } from "../constant";
  DEFAULT_MODELS,
  OPENAI_BASE_URL,
  GEMINI_BASE_URL,
  ServiceProvider,
 } from "../constant";
 import { isModelAvailableInServer } from "../utils/model";
 import { cloudflareAIGatewayUrl } from "../utils/cloudflare";
--- a/app/api/google.ts
+++ b/app/api/google.ts
@ -1,12 +1,7 @@
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "./auth";
 import { getServerSideConfig } from "@/app/config/server";
-import {
+import { ApiPath, GEMINI_BASE_URL, ModelProvider } from "@/app/constant";
  ApiPath,
  GEMINI_BASE_URL,
  Google,
  ModelProvider,
 } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";
 const serverConfig = getServerSideConfig();
--- a/app/api/iflytek.ts
+++ b/app/api/iflytek.ts
@ -1,6 +1,5 @@
 import { getServerSideConfig } from "@/app/config/server";
 import {
  Iflytek,
  IFLYTEK_BASE_URL,
  ApiPath,
  ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
 import { isModelAvailableInServer } from "@/app/utils/model";
 import type { RequestPayload } from "@/app/client/platforms/openai";
 // iflytek
 const serverConfig = getServerSideConfig();
--- a/app/api/moonshot.ts
+++ b/app/api/moonshot.ts
@ -1,6 +1,5 @@
 import { getServerSideConfig } from "@/app/config/server";
 import {
  Moonshot,
  MOONSHOT_BASE_URL,
  ApiPath,
  ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
 import { isModelAvailableInServer } from "@/app/utils/model";
 import type { RequestPayload } from "@/app/client/platforms/openai";
 const serverConfig = getServerSideConfig();
--- a/app/api/tencent/route.ts
+++ b/app/api/tencent/route.ts
@ -1,15 +1,8 @@
 import { getServerSideConfig } from "@/app/config/server";
-import {
+import { TENCENT_BASE_URL, ModelProvider } from "@/app/constant";
  TENCENT_BASE_URL,
  ApiPath,
  ModelProvider,
  ServiceProvider,
  Tencent,
 } from "@/app/constant";
 import { prettyObject } from "@/app/utils/format";
 import { NextRequest, NextResponse } from "next/server";
 import { auth } from "@/app/api/auth";
 import { isModelAvailableInServer } from "@/app/utils/model";
 import { getHeader } from "@/app/utils/tencent";
 const serverConfig = getServerSideConfig();
--- a/app/api/webdav/[...path]/route.ts
+++ b/app/api/webdav/[...path]/route.ts
@ -6,7 +6,7 @@ const config = getServerSideConfig();
 const mergedAllowedWebDavEndpoints = [
  ...internalAllowedWebDavEndpoints,
-  ...config.allowedWebDevEndpoints,
+  ...config.allowedWebDavEndpoints,
 ].filter((domain) => Boolean(domain.trim()));
 const normalizeUrl = (url: string) => {
--- a/app/client/api.ts
+++ b/app/client/api.ts
@ -1,7 +1,6 @@
 import { getClientConfig } from "../config/client";
 import {
  ACCESS_CODE_PREFIX,
  Azure,
  ModelProvider,
  ServiceProvider,
 } from "../constant";
@ -26,6 +25,7 @@ export const ROLES = ["system", "user", "assistant"] as const;
 export type MessageRole = (typeof ROLES)[number];
 export const Models = ["gpt-3.5-turbo", "gpt-4"] as const;
 export const TTSModels = ["tts-1", "tts-1-hd"] as const;
 export type ChatModel = ModelType;
 export interface MultimodalContent {
@ -54,6 +54,15 @@ export interface LLMConfig {
  style?: DalleRequestPayload["style"];
 }
 export interface SpeechOptions {
  model: string;
  input: string;
  voice: string;
  response_format?: string;
  speed?: number;
  onController?: (controller: AbortController) => void;
 }
 export interface ChatOptions {
  messages: RequestMessage[];
  config: LLMConfig;
@ -88,6 +97,7 @@ export interface LLMModelProvider {
 export abstract class LLMApi {
  abstract chat(options: ChatOptions): Promise<void>;
  abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
  abstract usage(): Promise<LLMUsage>;
  abstract models(): Promise<LLMModel[]>;
 }
@ -206,13 +216,16 @@ export function validString(x: string): boolean {
  return x?.length > 0;
 }
-export function getHeaders() {
+export function getHeaders(ignoreHeaders: boolean = false) {
  const accessStore = useAccessStore.getState();
  const chatStore = useChatStore.getState();
-  const headers: Record<string, string> = {
+  let headers: Record<string, string> = {};
-    "Content-Type": "application/json",
+  if (!ignoreHeaders) {
-    Accept: "application/json",
+    headers = {
-  };
+      "Content-Type": "application/json",
      Accept: "application/json",
    };
  }
  const clientConfig = getClientConfig();
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@ -12,6 +12,7 @@ import {
  getHeaders,
  LLMApi,
  LLMModel,
  SpeechOptions,
  MultimodalContent,
 } from "../api";
 import Locale from "../../locales";
@ -83,6 +84,10 @@ export class QwenApi implements LLMApi {
    return res?.output?.choices?.at(0)?.message?.content ?? "";
  }
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
      role: v.role,
--- a/app/client/platforms/anthropic.ts
+++ b/app/client/platforms/anthropic.ts
@ -1,5 +1,5 @@
-import { ACCESS_CODE_PREFIX, Anthropic, ApiPath } from "@/app/constant";
+import { Anthropic, ApiPath } from "@/app/constant";
-import { ChatOptions, getHeaders, LLMApi, MultimodalContent } from "../api";
+import { ChatOptions, getHeaders, LLMApi, SpeechOptions } from "../api";
 import {
  useAccessStore,
  useAppConfig,
@ -9,13 +9,6 @@ import {
 } from "@/app/store";
 import { getClientConfig } from "@/app/config/client";
 import { DEFAULT_API_HOST } from "@/app/constant";
 import {
  EventStreamContentType,
  fetchEventSource,
 } from "@fortaine/fetch-event-source";
 import Locale from "../../locales";
 import { prettyObject } from "@/app/utils/format";
 import { getMessageTextContent, isVisionModel } from "@/app/utils";
 import { preProcessImageContent, stream } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
@ -80,6 +73,10 @@ const ClaudeMapper = {
 const keys = ["claude-2, claude-instant-1"];
 export class ClaudeApi implements LLMApi {
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  extractMessage(res: any) {
    console.log("[Response] claude response: ", res);
--- a/app/client/platforms/baidu.ts
+++ b/app/client/platforms/baidu.ts
@ -14,6 +14,7 @@ import {
  LLMApi,
  LLMModel,
  MultimodalContent,
  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@ -75,6 +76,10 @@ export class ErnieApi implements LLMApi {
    return [baseUrl, path].join("/");
  }
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
      // "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function",
--- a/app/client/platforms/bytedance.ts
+++ b/app/client/platforms/bytedance.ts
@ -13,6 +13,7 @@ import {
  LLMApi,
  LLMModel,
  MultimodalContent,
  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@ -77,6 +78,10 @@ export class DoubaoApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? "";
  }
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  async chat(options: ChatOptions) {
    const messages = options.messages.map((v) => ({
      role: v.role,
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@ -1,5 +1,12 @@
 import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant";
-import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
+import {
  ChatOptions,
  getHeaders,
  LLMApi,
  LLMModel,
  LLMUsage,
  SpeechOptions,
 } from "../api";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
 import { getClientConfig } from "@/app/config/client";
 import { DEFAULT_API_HOST } from "@/app/constant";
@ -56,6 +63,10 @@ export class GeminiProApi implements LLMApi {
      ""
    );
  }
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  async chat(options: ChatOptions): Promise<void> {
    const apiClient = this;
    let multimodal = false;
--- a/app/client/platforms/iflytek.ts
+++ b/app/client/platforms/iflytek.ts
@ -7,7 +7,13 @@ import {
 } from "@/app/constant";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
-import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api";
+import {
  ChatOptions,
  getHeaders,
  LLMApi,
  LLMModel,
  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
  EventStreamContentType,
@ -17,7 +23,7 @@ import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
-import { OpenAIListModelResponse, RequestPayload } from "./openai";
+import { RequestPayload } from "./openai";
 export class SparkApi implements LLMApi {
  private disableListModels = true;
@ -53,6 +59,10 @@ export class SparkApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? "";
  }
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
    for (const v of options.messages) {
--- a/app/client/platforms/moonshot.ts
+++ b/app/client/platforms/moonshot.ts
@ -3,10 +3,8 @@
 import {
  ApiPath,
  DEFAULT_API_HOST,
  DEFAULT_MODELS,
  Moonshot,
  REQUEST_TIMEOUT_MS,
  ServiceProvider,
 } from "@/app/constant";
 import {
  useAccessStore,
@ -15,28 +13,17 @@ import {
  ChatMessageTool,
  usePluginStore,
 } from "@/app/store";
-import { collectModelsWithDefaultModel } from "@/app/utils/model";
+import { stream } from "@/app/utils/chat";
 import { preProcessImageContent, stream } from "@/app/utils/chat";
 import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
 import {
  ChatOptions,
  getHeaders,
  LLMApi,
  LLMModel,
-  LLMUsage,
+  SpeechOptions,
  MultimodalContent,
 } from "../api";
 import Locale from "../../locales";
 import {
  EventStreamContentType,
  fetchEventSource,
 } from "@fortaine/fetch-event-source";
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import { getMessageTextContent } from "@/app/utils";
-
+import { RequestPayload } from "./openai";
 import { OpenAIListModelResponse, RequestPayload } from "./openai";
 export class MoonshotApi implements LLMApi {
  private disableListModels = true;
@ -72,6 +59,10 @@ export class MoonshotApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? "";
  }
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  async chat(options: ChatOptions) {
    const messages: ChatOptions["messages"] = [];
    for (const v of options.messages) {
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@ -33,17 +33,12 @@ import {
  LLMModel,
  LLMUsage,
  MultimodalContent,
  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
  EventStreamContentType,
  fetchEventSource,
 } from "@fortaine/fetch-event-source";
 import { prettyObject } from "@/app/utils/format";
 import { getClientConfig } from "@/app/config/client";
 import {
  getMessageTextContent,
  getMessageImages,
  isVisionModel,
  isDalle3 as _isDalle3,
 } from "@/app/utils";
@ -147,6 +142,44 @@ export class ChatGPTApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? res;
  }
  async speech(options: SpeechOptions): Promise<ArrayBuffer> {
    const requestPayload = {
      model: options.model,
      input: options.input,
      voice: options.voice,
      response_format: options.response_format,
      speed: options.speed,
    };
    console.log("[Request] openai speech payload: ", requestPayload);
    const controller = new AbortController();
    options.onController?.(controller);
    try {
      const speechPath = this.path(OpenaiPath.SpeechPath);
      const speechPayload = {
        method: "POST",
        body: JSON.stringify(requestPayload),
        signal: controller.signal,
        headers: getHeaders(),
      };
      // make a fetch request
      const requestTimeoutId = setTimeout(
        () => controller.abort(),
        REQUEST_TIMEOUT_MS,
      );
      const res = await fetch(speechPath, speechPayload);
      clearTimeout(requestTimeoutId);
      return await res.arrayBuffer();
    } catch (e) {
      console.log("[Request] failed to make a speech request", e);
      throw e;
    }
  }
  async chat(options: ChatOptions) {
    const modelConfig = {
      ...useAppConfig.getState().modelConfig,
@ -244,6 +277,7 @@ export class ChatGPTApi implements LLMApi {
        );
      }
      if (shouldStream) {
        let index = -1;
        const [tools, funcs] = usePluginStore
          .getState()
          .getAsTools(
@ -269,10 +303,10 @@ export class ChatGPTApi implements LLMApi {
            }>;
            const tool_calls = choices[0]?.delta?.tool_calls;
            if (tool_calls?.length > 0) {
              const index = tool_calls[0]?.index;
              const id = tool_calls[0]?.id;
              const args = tool_calls[0]?.function?.arguments;
              if (id) {
                index += 1;
                runTools.push({
                  id,
                  type: tool_calls[0]?.type,
@ -294,6 +328,8 @@ export class ChatGPTApi implements LLMApi {
            toolCallMessage: any,
            toolCallResult: any[],
          ) => {
            // reset index value
            index = -1;
            // @ts-ignore
            requestPayload?.messages?.splice(
              // @ts-ignore
--- a/app/client/platforms/tencent.ts
+++ b/app/client/platforms/tencent.ts
@ -8,6 +8,7 @@ import {
  LLMApi,
  LLMModel,
  MultimodalContent,
  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@ -89,6 +90,10 @@ export class HunyuanApi implements LLMApi {
    return res.Choices?.at(0)?.Message?.Content ?? "";
  }
  speech(options: SpeechOptions): Promise<ArrayBuffer> {
    throw new Error("Method not implemented.");
  }
  async chat(options: ChatOptions) {
    const visionModel = isVisionModel(options.config.model);
    const messages = options.messages.map((v, index) => ({
--- a/app/command.ts
+++ b/app/command.ts
@ -38,6 +38,7 @@ interface ChatCommands {
  next?: Command;
  prev?: Command;
  clear?: Command;
  fork?: Command;
  del?: Command;
 }
--- a/app/components/artifacts.tsx
+++ b/app/components/artifacts.tsx
@ -7,7 +7,6 @@ import {
  useImperativeHandle,
 } from "react";
 import { useParams } from "react-router";
 import { useWindowSize } from "@/app/utils";
 import { IconButton } from "./button";
 import { nanoid } from "nanoid";
 import ExportIcon from "../icons/share.svg";
--- a/app/components/chat-list.tsx
+++ b/app/components/chat-list.tsx
@ -1,5 +1,4 @@
 import DeleteIcon from "../icons/delete.svg";
 import BotIcon from "../icons/bot.svg";
 import styles from "./home.module.scss";
 import {
@ -12,7 +11,7 @@ import {
 import { useChatStore } from "../store";
 import Locale from "../locales";
-import { Link, useLocation, useNavigate } from "react-router-dom";
+import { useLocation, useNavigate } from "react-router-dom";
 import { Path } from "../constant";
 import { MaskAvatar } from "./mask";
 import { Mask } from "../store/mask";
--- a/app/components/chat.tsx
+++ b/app/components/chat.tsx
@ -15,6 +15,8 @@ import RenameIcon from "../icons/rename.svg";
 import ExportIcon from "../icons/share.svg";
 import ReturnIcon from "../icons/return.svg";
 import CopyIcon from "../icons/copy.svg";
 import SpeakIcon from "../icons/speak.svg";
 import SpeakStopIcon from "../icons/speak-stop.svg";
 import LoadingIcon from "../icons/three-dots.svg";
 import LoadingButtonIcon from "../icons/loading.svg";
 import PromptIcon from "../icons/prompt.svg";
@ -96,7 +98,8 @@ import {
 import { useNavigate } from "react-router-dom";
 import {
  CHAT_PAGE_SIZE,
-  LAST_INPUT_KEY,
+  DEFAULT_TTS_ENGINE,
  ModelProvider,
  Path,
  REQUEST_TIMEOUT_MS,
  UNFINISHED_INPUT,
@ -113,6 +116,11 @@ import { useAllModels } from "../utils/hooks";
 import { MultimodalContent } from "../client/api";
 const localStorage = safeLocalStorage();
 import { ClientApi } from "../client/api";
 import { createTTSPlayer } from "../utils/audio";
 import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
 const ttsPlayer = createTTSPlayer();
 const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
  loading: () => <LoadingIcon />,
@ -443,6 +451,7 @@ export function ChatActions(props: {
  hitBottom: boolean;
  uploading: boolean;
  setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>;
  setUserInput: (input: string) => void;
 }) {
  const config = useAppConfig();
  const navigate = useNavigate();
@ -981,6 +990,7 @@ function _Chat() {
      chatStore.updateCurrentSession(
        (session) => (session.clearContextIndex = session.messages.length),
      ),
    fork: () => chatStore.forkSession(),
    del: () => chatStore.deleteSession(chatStore.currentSessionIndex),
  });
@ -1184,10 +1194,55 @@ function _Chat() {
    });
  };
  const accessStore = useAccessStore();
  const [speechStatus, setSpeechStatus] = useState(false);
  const [speechLoading, setSpeechLoading] = useState(false);
  async function openaiSpeech(text: string) {
    if (speechStatus) {
      ttsPlayer.stop();
      setSpeechStatus(false);
    } else {
      var api: ClientApi;
      api = new ClientApi(ModelProvider.GPT);
      const config = useAppConfig.getState();
      setSpeechLoading(true);
      ttsPlayer.init();
      let audioBuffer: ArrayBuffer;
      const { markdownToTxt } = require("markdown-to-txt");
      const textContent = markdownToTxt(text);
      if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) {
        const edgeVoiceName = accessStore.edgeVoiceName();
        const tts = new MsEdgeTTS();
        await tts.setMetadata(
          edgeVoiceName,
          OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
        );
        audioBuffer = await tts.toArrayBuffer(textContent);
      } else {
        audioBuffer = await api.llm.speech({
          model: config.ttsConfig.model,
          input: textContent,
          voice: config.ttsConfig.voice,
          speed: config.ttsConfig.speed,
        });
      }
      setSpeechStatus(true);
      ttsPlayer
        .play(audioBuffer, () => {
          setSpeechStatus(false);
        })
        .catch((e) => {
          console.error("[OpenAI Speech]", e);
          showToast(prettyObject(e));
          setSpeechStatus(false);
        })
        .finally(() => setSpeechLoading(false));
    }
  }
  const context: RenderMessage[] = useMemo(() => {
    return session.mask.hideContext ? [] : session.mask.context.slice();
  }, [session.mask.context, session.mask.hideContext]);
  const accessStore = useAccessStore();
  if (
    context.length === 0 &&
@ -1724,6 +1779,25 @@ function _Chat() {
                                  )
                                }
                              />
                              {config.ttsConfig.enable && (
                                <ChatAction
                                  text={
                                    speechStatus
                                      ? Locale.Chat.Actions.StopSpeech
                                      : Locale.Chat.Actions.Speech
                                  }
                                  icon={
                                    speechStatus ? (
                                      <SpeakStopIcon />
                                    ) : (
                                      <SpeakIcon />
                                    )
                                  }
                                  onClick={() =>
                                    openaiSpeech(getMessageTextContent(message))
                                  }
                                />
                              )}
                            </>
                          )}
                        </div>
@ -1842,6 +1916,7 @@ function _Chat() {
            onSearch("");
          }}
          setShowShortcutKeyModal={setShowShortcutKeyModal}
          setUserInput={setUserInput}
        />
        <label
          className={`${styles["chat-input-panel-inner"]} ${
--- a/app/components/exporter.tsx
+++ b/app/components/exporter.tsx
@ -1,5 +1,5 @@
 /* eslint-disable @next/next/no-img-element */
-import { ChatMessage, ModelType, useAppConfig, useChatStore } from "../store";
+import { ChatMessage, useAppConfig, useChatStore } from "../store";
 import Locale from "../locales";
 import styles from "./exporter.module.scss";
 import {
--- a/app/components/mask.tsx
+++ b/app/components/mask.tsx
@ -37,7 +37,7 @@ import Locale, { AllLangs, ALL_LANG_OPTIONS, Lang } from "../locales";
 import { useNavigate } from "react-router-dom";
 import chatStyle from "./chat.module.scss";
-import { useEffect, useState } from "react";
+import { useState } from "react";
 import {
  copyToClipboard,
  downloadAs,
@ -48,7 +48,6 @@ import { Updater } from "../typing";
 import { ModelConfigList } from "./model-config";
 import { FileName, Path } from "../constant";
 import { BUILTIN_MASK_STORE } from "../masks";
 import { nanoid } from "nanoid";
 import {
  DragDropContext,
  Droppable,
--- a/app/components/plugin.tsx
+++ b/app/components/plugin.tsx
@ -28,7 +28,7 @@ import {
 } from "./ui-lib";
 import Locale from "../locales";
 import { useNavigate } from "react-router-dom";
-import { useEffect, useState } from "react";
+import { useState } from "react";
 import { getClientConfig } from "../config/client";
 export function PluginPage() {
--- a/app/components/settings.tsx
+++ b/app/components/settings.tsx
@ -80,6 +80,7 @@ import { useSyncStore } from "../store/sync";
 import { nanoid } from "nanoid";
 import { useMaskStore } from "../store/mask";
 import { ProviderType } from "../utils/cloud";
 import { TTSConfigList } from "./tts-config";
 function EditPromptModal(props: { id: string; onClose: () => void }) {
  const promptStore = usePromptStore();
@ -1646,6 +1647,17 @@ export function Settings() {
          <UserPromptModal onClose={() => setShowPromptModal(false)} />
        )}
        <List>
          <TTSConfigList
            ttsConfig={config.ttsConfig}
            updateConfig={(updater) => {
              const ttsConfig = { ...config.ttsConfig };
              updater(ttsConfig);
              config.update((config) => (config.ttsConfig = ttsConfig));
            }}
          />
        </List>
        <DangerItems />
      </div>
    </ErrorBoundary>
--- a/app/components/sidebar.tsx
+++ b/app/components/sidebar.tsx
@ -7,7 +7,6 @@ import SettingsIcon from "../icons/settings.svg";
 import GithubIcon from "../icons/github.svg";
 import ChatGptIcon from "../icons/chatgpt.svg";
 import AddIcon from "../icons/add.svg";
 import CloseIcon from "../icons/close.svg";
 import DeleteIcon from "../icons/delete.svg";
 import MaskIcon from "../icons/mask.svg";
 import DragIcon from "../icons/drag.svg";
@ -254,11 +253,6 @@ export function SideBar(props: { className?: string }) {
        {showPluginSelector && (
          <Selector
            items={[
              {
                title: "👇 Please select the plugin you need to use",
                value: "-",
                disable: true,
              },
              ...PLUGINS.map((item) => {
                return {
                  title: item.name,
--- a/app/components/tts-config.tsx
+++ b/app/components/tts-config.tsx
@ -0,0 +1,133 @@
 import { TTSConfig, TTSConfigValidator } from "../store";
 import Locale from "../locales";
 import { ListItem, Select } from "./ui-lib";
 import {
  DEFAULT_TTS_ENGINE,
  DEFAULT_TTS_ENGINES,
  DEFAULT_TTS_MODELS,
  DEFAULT_TTS_VOICES,
 } from "../constant";
 import { InputRange } from "./input-range";
 export function TTSConfigList(props: {
  ttsConfig: TTSConfig;
  updateConfig: (updater: (config: TTSConfig) => void) => void;
 }) {
  return (
    <>
      <ListItem
        title={Locale.Settings.TTS.Enable.Title}
        subTitle={Locale.Settings.TTS.Enable.SubTitle}
      >
        <input
          type="checkbox"
          checked={props.ttsConfig.enable}
          onChange={(e) =>
            props.updateConfig(
              (config) => (config.enable = e.currentTarget.checked),
            )
          }
        ></input>
      </ListItem>
      {/* <ListItem
        title={Locale.Settings.TTS.Autoplay.Title}
        subTitle={Locale.Settings.TTS.Autoplay.SubTitle}
      >
        <input
          type="checkbox"
          checked={props.ttsConfig.autoplay}
          onChange={(e) =>
            props.updateConfig(
              (config) => (config.autoplay = e.currentTarget.checked),
            )
          }
        ></input>
      </ListItem> */}
      <ListItem title={Locale.Settings.TTS.Engine}>
        <Select
          value={props.ttsConfig.engine}
          onChange={(e) => {
            props.updateConfig(
              (config) =>
                (config.engine = TTSConfigValidator.engine(
                  e.currentTarget.value,
                )),
            );
          }}
        >
          {DEFAULT_TTS_ENGINES.map((v, i) => (
            <option value={v} key={i}>
              {v}
            </option>
          ))}
        </Select>
      </ListItem>
      {props.ttsConfig.engine === DEFAULT_TTS_ENGINE && (
        <>
          <ListItem title={Locale.Settings.TTS.Model}>
            <Select
              value={props.ttsConfig.model}
              onChange={(e) => {
                props.updateConfig(
                  (config) =>
                    (config.model = TTSConfigValidator.model(
                      e.currentTarget.value,
                    )),
                );
              }}
            >
              {DEFAULT_TTS_MODELS.map((v, i) => (
                <option value={v} key={i}>
                  {v}
                </option>
              ))}
            </Select>
          </ListItem>
          <ListItem
            title={Locale.Settings.TTS.Voice.Title}
            subTitle={Locale.Settings.TTS.Voice.SubTitle}
          >
            <Select
              value={props.ttsConfig.voice}
              onChange={(e) => {
                props.updateConfig(
                  (config) =>
                    (config.voice = TTSConfigValidator.voice(
                      e.currentTarget.value,
                    )),
                );
              }}
            >
              {DEFAULT_TTS_VOICES.map((v, i) => (
                <option value={v} key={i}>
                  {v}
                </option>
              ))}
            </Select>
          </ListItem>
          <ListItem
            title={Locale.Settings.TTS.Speed.Title}
            subTitle={Locale.Settings.TTS.Speed.SubTitle}
          >
            <InputRange
              aria={Locale.Settings.TTS.Speed.Title}
              value={props.ttsConfig.speed?.toFixed(1)}
              min="0.3"
              max="4.0"
              step="0.1"
              onChange={(e) => {
                props.updateConfig(
                  (config) =>
                    (config.speed = TTSConfigValidator.speed(
                      e.currentTarget.valueAsNumber,
                    )),
                );
              }}
            ></InputRange>
          </ListItem>
        </>
      )}
    </>
  );
 }
--- a/app/components/tts.module.scss
+++ b/app/components/tts.module.scss
@ -0,0 +1,119 @@
@import "../styles/animation.scss";
 .plugin-page {
  height: 100%;
  display: flex;
  flex-direction: column;
  .plugin-page-body {
    padding: 20px;
    overflow-y: auto;
    .plugin-filter {
      width: 100%;
      max-width: 100%;
      margin-bottom: 20px;
      animation: slide-in ease 0.3s;
      height: 40px;
      display: flex;
      .search-bar {
        flex-grow: 1;
        max-width: 100%;
        min-width: 0;
        outline: none;
      }
      .search-bar:focus {
        border: 1px solid var(--primary);
      }
      .plugin-filter-lang {
        height: 100%;
        margin-left: 10px;
      }
      .plugin-create {
        height: 100%;
        margin-left: 10px;
        box-sizing: border-box;
        min-width: 80px;
      }
    }
    .plugin-item {
      display: flex;
      justify-content: space-between;
      padding: 20px;
      border: var(--border-in-light);
      animation: slide-in ease 0.3s;
      &:not(:last-child) {
        border-bottom: 0;
      }
      &:first-child {
        border-top-left-radius: 10px;
        border-top-right-radius: 10px;
      }
      &:last-child {
        border-bottom-left-radius: 10px;
        border-bottom-right-radius: 10px;
      }
      .plugin-header {
        display: flex;
        align-items: center;
        .plugin-icon {
          display: flex;
          align-items: center;
          justify-content: center;
          margin-right: 10px;
        }
        .plugin-title {
          .plugin-name {
            font-size: 14px;
            font-weight: bold;
          }
          .plugin-info {
            font-size: 12px;
          }
          .plugin-runtime-warning {
            font-size: 12px;
            color: #f86c6c;
          }
        }
      }
      .plugin-actions {
        display: flex;
        flex-wrap: nowrap;
        transition: all ease 0.3s;
        justify-content: center;
        align-items: center;
      }
      @media screen and (max-width: 600px) {
        display: flex;
        flex-direction: column;
        padding-bottom: 10px;
        border-radius: 10px;
        margin-bottom: 20px;
        box-shadow: var(--card-shadow);
        &:not(:last-child) {
          border-bottom: var(--border-in-light);
        }
        .plugin-actions {
          width: 100%;
          justify-content: space-between;
          padding-top: 10px;
        }
      }
    }
  }
 }
--- a/app/config/server.ts
+++ b/app/config/server.ts
@ -154,8 +154,8 @@ export const getServerSideConfig = () => {
  //   `[Server Config] using ${randomIndex + 1} of ${apiKeys.length} api key`,
  // );
-  const allowedWebDevEndpoints = (
+  const allowedWebDavEndpoints = (
-    process.env.WHITE_WEBDEV_ENDPOINTS ?? ""
+    process.env.WHITE_WEBDAV_ENDPOINTS ?? ""
  ).split(",");
  return {
@ -229,6 +229,6 @@ export const getServerSideConfig = () => {
    disableFastLink: !!process.env.DISABLE_FAST_LINK,
    customModels,
    defaultModel,
-    allowedWebDevEndpoints,
+    allowedWebDavEndpoints,
  };
 };
--- a/app/constant.ts
+++ b/app/constant.ts
@ -1,5 +1,3 @@
 import path from "path";
 export const OWNER = "ChatGPTNextWeb";
 export const REPO = "ChatGPT-Next-Web";
 export const REPO_URL = `https://github.com/${OWNER}/${REPO}`;
@ -152,6 +150,7 @@ export const Anthropic = {
 export const OpenaiPath = {
  ChatPath: "v1/chat/completions",
  SpeechPath: "v1/audio/speech",
  ImagePath: "v1/images/generations",
  UsagePath: "dashboard/billing/usage",
  SubsPath: "dashboard/billing/subscription",
@ -258,6 +257,20 @@ export const KnowledgeCutOffDate: Record<string, string> = {
  "gemini-pro-vision": "2023-12",
 };
 export const DEFAULT_TTS_ENGINE = "OpenAI-TTS";
 export const DEFAULT_TTS_ENGINES = ["OpenAI-TTS", "Edge-TTS"];
 export const DEFAULT_TTS_MODEL = "tts-1";
 export const DEFAULT_TTS_VOICE = "alloy";
 export const DEFAULT_TTS_MODELS = ["tts-1", "tts-1-hd"];
 export const DEFAULT_TTS_VOICES = [
  "alloy",
  "echo",
  "fable",
  "onyx",
  "nova",
  "shimmer",
 ];
 const openaiModels = [
  "gpt-3.5-turbo",
  "gpt-3.5-turbo-1106",
@ -279,7 +292,7 @@ const openaiModels = [
  "gpt-4-1106-preview",
  "dall-e-3",
  "o1-mini",
-  "o1-preview"
+  "o1-preview",
 ];
 const googleModels = [
--- a/app/icons/speak-stop.svg
+++ b/app/icons/speak-stop.svg
@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M17.25 9.75 19.5 12m0 0 2.25 2.25M19.5 12l2.25-2.25M19.5 12l-2.25 2.25m-10.5-6 4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z"></path></svg>
--- a/app/icons/speak.svg
+++ b/app/icons/speak.svg
@ -0,0 +1 @@
 <svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M19.114 5.636a9 9 0 010 12.728M16.463 8.288a5.25 5.25 0 010 7.424M6.75 8.25l4.72-4.72a.75.75 0 011.28.53v15.88a.75.75 0 01-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.01 9.01 0 012.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75z"></path></svg>
--- a/app/icons/voice-white.svg
+++ b/app/icons/voice-white.svg
@ -0,0 +1,16 @@
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="16" height="16" fill="none" viewBox="0 0 20 20">
 	<defs>
 		<rect id="path_0" width="20" height="20" x="0" y="0" />
 	</defs>
 	<g opacity="1" transform="translate(0 0) rotate(0 8 8)">
 		<mask id="bg-mask-0" fill="#fff">
 			<use xlink:href="#path_0" />
 		</mask>
 		<g mask="url(#bg-mask-0)">
 			<path d="M7 4a3 3 0 016 0v6a3 3 0 11-6 0V4z" fill="#333333">
 			</path>
 			<path d="M5.5 9.643a.75.75 0 00-1.5 0V10c0 3.06 2.29 5.585 5.25 5.954V17.5h-1.5a.75.75 0 000 1.5h4.5a.75.75 0 000-1.5h-1.5v-1.546A6.001 6.001 0 0016 10v-.357a.75.75 0 00-1.5 0V10a4.5 4.5 0 01-9 0v-.357z" fill="#333333">
 			</path>
 		</g>
 	</g>
 </svg>
--- a/app/layout.tsx
+++ b/app/layout.tsx
@ -41,7 +41,11 @@ export default function RootLayout({
          name="viewport"
          content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"
        />
-        <link rel="manifest" href="/site.webmanifest" crossOrigin="use-credentials"></link>
+        <link
          rel="manifest"
          href="/site.webmanifest"
          crossOrigin="use-credentials"
        ></link>
        <script src="/serviceWorkerRegister.js" defer></script>
      </head>
      <body>
--- a/app/locales/cn.ts
+++ b/app/locales/cn.ts
@ -1,4 +1,3 @@
 import { ShortcutKeyModal } from "../components/chat";
 import { getClientConfig } from "../config/client";
 import { SubmitKey } from "../store/config";
@ -46,6 +45,8 @@ const cn = {
      FullScreen: "全屏",
      RefreshTitle: "刷新标题",
      RefreshToast: "已发送刷新标题请求",
      Speech: "朗读",
      StopSpeech: "停止",
    },
    Commands: {
      new: "新建聊天",
@ -53,6 +54,7 @@ const cn = {
      next: "下一个聊天",
      prev: "上一个聊天",
      clear: "清除上下文",
      fork: "复制聊天",
      del: "删除聊天",
    },
    InputActions: {
@ -79,6 +81,8 @@ const cn = {
      return inputHints + "，/ 触发补全，: 触发命令";
    },
    Send: "发送",
    StartSpeak: "说话",
    StopSpeak: "停止",
    Config: {
      Reset: "清除记忆",
      SaveAs: "存为面具",
@ -496,6 +500,26 @@ const cn = {
      Title: "频率惩罚度 (frequency_penalty)",
      SubTitle: "值越大，越有可能降低重复字词",
    },
    TTS: {
      Enable: {
        Title: "启用文本转语音",
        SubTitle: "启用文本生成语音服务",
      },
      Autoplay: {
        Title: "启用自动朗读",
        SubTitle: "自动生成语音并播放，需先开启文本转语音开关",
      },
      Model: "模型",
      Engine: "转换引擎",
      Voice: {
        Title: "声音",
        SubTitle: "生成语音时使用的声音",
      },
      Speed: {
        Title: "速度",
        SubTitle: "生成语音的速度",
      },
    },
  },
  Store: {
    DefaultTopic: "新的聊天",
--- a/app/locales/en.ts
+++ b/app/locales/en.ts
@ -47,6 +47,8 @@ const en: LocaleType = {
      FullScreen: "FullScreen",
      RefreshTitle: "Refresh Title",
      RefreshToast: "Title refresh request sent",
      Speech: "Play",
      StopSpeech: "Stop",
    },
    Commands: {
      new: "Start a new chat",
@ -54,6 +56,7 @@ const en: LocaleType = {
      next: "Next Chat",
      prev: "Previous Chat",
      clear: "Clear Context",
      fork: "Copy Chat",
      del: "Delete Chat",
    },
    InputActions: {
@ -80,6 +83,8 @@ const en: LocaleType = {
      return inputHints + ", / to search prompts, : to use commands";
    },
    Send: "Send",
    StartSpeak: "Start Speak",
    StopSpeak: "Stop Speak",
    Config: {
      Reset: "Reset to Default",
      SaveAs: "Save as Mask",
@ -502,6 +507,27 @@ const en: LocaleType = {
      SubTitle:
        "A larger value decreasing the likelihood to repeat the same line",
    },
    TTS: {
      Enable: {
        Title: "Enable TTS",
        SubTitle: "Enable text-to-speech service",
      },
      Autoplay: {
        Title: "Enable Autoplay",
        SubTitle:
          "Automatically generate speech and play, you need to enable the text-to-speech switch first",
      },
      Model: "Model",
      Voice: {
        Title: "Voice",
        SubTitle: "The voice to use when generating the audio",
      },
      Speed: {
        Title: "Speed",
        SubTitle: "The speed of the generated audio",
      },
      Engine: "TTS Engine",
    },
  },
  Store: {
    DefaultTopic: "New Conversation",
--- a/app/locales/index.ts
+++ b/app/locales/index.ts
@ -134,3 +134,34 @@ export function getISOLang() {
  const lang = getLang();
  return isoLangString[lang] ?? lang;
 }
 const DEFAULT_STT_LANG = "zh-CN";
 export const STT_LANG_MAP: Record<Lang, string> = {
  cn: "zh-CN",
  en: "en-US",
  pt: "pt-BR",
  tw: "zh-TW",
  jp: "ja-JP",
  ko: "ko-KR",
  id: "id-ID",
  fr: "fr-FR",
  es: "es-ES",
  it: "it-IT",
  tr: "tr-TR",
  de: "de-DE",
  vi: "vi-VN",
  ru: "ru-RU",
  cs: "cs-CZ",
  no: "no-NO",
  ar: "ar-SA",
  bn: "bn-BD",
  sk: "sk-SK",
 };
 export function getSTTLang(): string {
  try {
    return STT_LANG_MAP[getLang()];
  } catch {
    return DEFAULT_STT_LANG;
  }
 }
--- a/app/locales/sk.ts
+++ b/app/locales/sk.ts
@ -1,6 +1,5 @@
 import { getClientConfig } from "../config/client";
 import { SubmitKey } from "../store/config";
 import { LocaleType } from "./index";
 import type { PartialLocaleType } from "./index";
 // if you are adding a new translation, please use PartialLocaleType instead of LocaleType
--- a/app/masks/index.ts
+++ b/app/masks/index.ts
@ -1,7 +1,4 @@
 import { Mask } from "../store/mask";
 import { CN_MASKS } from "./cn";
 import { TW_MASKS } from "./tw";
 import { EN_MASKS } from "./en";
 import { type BuiltinMask } from "./typing";
 export { type BuiltinMask } from "./typing";
--- a/app/store/access.ts
+++ b/app/store/access.ts
@ -120,6 +120,9 @@ const DEFAULT_ACCESS_STATE = {
  disableFastLink: false,
  customModels: "",
  defaultModel: "",
  // tts config
  edgeTTSVoiceName: "zh-CN-YunxiNeural",
 };
 export const useAccessStore = createPersistStore(
@ -132,6 +135,12 @@ export const useAccessStore = createPersistStore(
      return get().needCode;
    },
    edgeVoiceName() {
      this.fetch();
      return get().edgeTTSVoiceName;
    },
    isValidOpenAI() {
      return ensure(get(), ["openaiApiKey"]);
    },
@ -204,8 +213,8 @@ export const useAccessStore = createPersistStore(
        .then((res) => {
          // Set default model from env request
          let defaultModel = res.defaultModel ?? "";
-          DEFAULT_CONFIG.modelConfig.model =
+          if (defaultModel !== "")
-            defaultModel !== "" ? defaultModel : "gpt-3.5-turbo";
+            DEFAULT_CONFIG.modelConfig.model = defaultModel;
          return res;
        })
        .then((res: DangerConfig) => {
--- a/app/store/chat.ts
+++ b/app/store/chat.ts
@ -170,6 +170,28 @@ export const useChatStore = createPersistStore(
    }
    const methods = {
      forkSession() {
        // 获取当前会话
        const currentSession = get().currentSession();
        if (!currentSession) return;
        const newSession = createEmptySession();
        newSession.topic = currentSession.topic;
        newSession.messages = [...currentSession.messages];
        newSession.mask = {
          ...currentSession.mask,
          modelConfig: {
            ...currentSession.mask.modelConfig,
          },
        };
        set((state) => ({
          currentSessionIndex: 0,
          sessions: [newSession, ...state.sessions],
        }));
      },
      clearSessions() {
        set(() => ({
          sessions: [createEmptySession()],
--- a/app/store/config.ts
+++ b/app/store/config.ts
@ -5,12 +5,21 @@ import {
  DEFAULT_INPUT_TEMPLATE,
  DEFAULT_MODELS,
  DEFAULT_SIDEBAR_WIDTH,
  DEFAULT_TTS_ENGINE,
  DEFAULT_TTS_ENGINES,
  DEFAULT_TTS_MODEL,
  DEFAULT_TTS_MODELS,
  DEFAULT_TTS_VOICE,
  DEFAULT_TTS_VOICES,
  StoreKey,
  ServiceProvider,
 } from "../constant";
 import { createPersistStore } from "../utils/store";
 export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
 export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
 export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
 export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number];
 export enum SubmitKey {
  Enter = "Enter",
@ -68,11 +77,21 @@ export const DEFAULT_CONFIG = {
    quality: "standard" as DalleQuality,
    style: "vivid" as DalleStyle,
  },
  ttsConfig: {
    enable: false,
    autoplay: false,
    engine: DEFAULT_TTS_ENGINE,
    model: DEFAULT_TTS_MODEL,
    voice: DEFAULT_TTS_VOICE,
    speed: 1.0,
  },
 };
 export type ChatConfig = typeof DEFAULT_CONFIG;
 export type ModelConfig = ChatConfig["modelConfig"];
 export type TTSConfig = ChatConfig["ttsConfig"];
 export function limitNumber(
  x: number,
@ -87,6 +106,21 @@ export function limitNumber(
  return Math.min(max, Math.max(min, x));
 }
 export const TTSConfigValidator = {
  engine(x: string) {
    return x as TTSEngineType;
  },
  model(x: string) {
    return x as TTSModelType;
  },
  voice(x: string) {
    return x as TTSVoiceType;
  },
  speed(x: number) {
    return limitNumber(x, 0.25, 4.0, 1.0);
  },
 };
 export const ModalConfigValidator = {
  model(x: string) {
    return x as ModelType;
@ -143,6 +177,21 @@ export const useAppConfig = createPersistStore(
  {
    name: StoreKey.Config,
    version: 4,
    merge(persistedState, currentState) {
      const state = persistedState as ChatConfig | undefined;
      if (!state) return { ...currentState };
      const models = currentState.models.slice();
      state.models.forEach((pModel) => {
        const idx = models.findIndex(
          (v) => v.name === pModel.name && v.provider === pModel.provider,
        );
        if (idx !== -1) models[idx] = pModel;
        else models.push(pModel);
      });
      return { ...currentState, ...state, models: models };
    },
    migrate(persistedState, version) {
      const state = persistedState as ChatConfig;
--- a/app/store/plugin.ts
+++ b/app/store/plugin.ts
@ -1,5 +1,4 @@
 import OpenAPIClientAxios from "openapi-client-axios";
 import { getLang, Lang } from "../locales";
 import { StoreKey } from "../constant";
 import { nanoid } from "nanoid";
 import { createPersistStore } from "../utils/store";
--- a/app/store/prompt.ts
+++ b/app/store/prompt.ts
@ -1,7 +1,7 @@
 import Fuse from "fuse.js";
 import { getLang } from "../locales";
 import { StoreKey } from "../constant";
 import { nanoid } from "nanoid";
 import { StoreKey } from "../constant";
 import { getLang } from "../locales";
 import { createPersistStore } from "../utils/store";
 export interface Prompt {
@ -147,6 +147,11 @@ export const usePromptStore = createPersistStore(
    },
    onRehydrateStorage(state) {
      // Skip store rehydration on server side
      if (typeof window === "undefined") {
        return;
      }
      const PROMPT_URL = "./prompts.json";
      type PromptList = Array<[string, string]>;
--- a/app/store/sync.ts
+++ b/app/store/sync.ts
@ -1,5 +1,4 @@
 import { getClientConfig } from "../config/client";
 import { Updater } from "../typing";
 import { ApiPath, STORAGE_KEY, StoreKey } from "../constant";
 import { createPersistStore } from "../utils/store";
 import {
@ -100,15 +99,17 @@ export const useSyncStore = createPersistStore(
        const remoteState = await client.get(config.username);
        if (!remoteState || remoteState === "") {
          await client.set(config.username, JSON.stringify(localState));
-          console.log("[Sync] Remote state is empty, using local state instead.");
+          console.log(
-          return
+            "[Sync] Remote state is empty, using local state instead.",
          );
          return;
        } else {
          const parsedRemoteState = JSON.parse(
            await client.get(config.username),
          ) as AppState;
          mergeAppState(localState, parsedRemoteState);
          setLocalAppState(localState);
-       } 
+        }
      } catch (e) {
        console.log("[Sync] failed to get remote state", e);
        throw e;
--- a/app/store/update.ts
+++ b/app/store/update.ts
@ -8,8 +8,6 @@ import { getClientConfig } from "../config/client";
 import { createPersistStore } from "../utils/store";
 import ChatGptIcon from "../icons/chatgpt.png";
 import Locale from "../locales";
 import { use } from "react";
 import { useAppConfig } from ".";
 import { ClientApi } from "../client/api";
 const ONE_MINUTE = 60 * 1000;
--- a/app/utils.ts
+++ b/app/utils.ts
@ -3,8 +3,7 @@ import { showToast } from "./components/ui-lib";
 import Locale from "./locales";
 import { RequestMessage } from "./client/api";
 import { ServiceProvider, REQUEST_TIMEOUT_MS } from "./constant";
-import isObject from "lodash-es/isObject";
+import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
 import { fetch as tauriFetch, Body, ResponseType } from "@tauri-apps/api/http";
 export function trimTopic(topic: string) {
  // Fix an issue where double quotes still show in the Indonesian language
--- a/app/utils/audio.ts
+++ b/app/utils/audio.ts
@ -0,0 +1,45 @@
 type TTSPlayer = {
  init: () => void;
  play: (audioBuffer: ArrayBuffer, onended: () => void | null) => Promise<void>;
  stop: () => void;
 };
 export function createTTSPlayer(): TTSPlayer {
  let audioContext: AudioContext | null = null;
  let audioBufferSourceNode: AudioBufferSourceNode | null = null;
  const init = () => {
    audioContext = new (window.AudioContext || window.webkitAudioContext)();
    audioContext.suspend();
  };
  const play = async (audioBuffer: ArrayBuffer, onended: () => void | null) => {
    if (audioBufferSourceNode) {
      audioBufferSourceNode.stop();
      audioBufferSourceNode.disconnect();
    }
    const buffer = await audioContext!.decodeAudioData(audioBuffer);
    audioBufferSourceNode = audioContext!.createBufferSource();
    audioBufferSourceNode.buffer = buffer;
    audioBufferSourceNode.connect(audioContext!.destination);
    audioContext!.resume().then(() => {
      audioBufferSourceNode!.start();
    });
    audioBufferSourceNode.onended = onended;
  };
  const stop = () => {
    if (audioBufferSourceNode) {
      audioBufferSourceNode.stop();
      audioBufferSourceNode.disconnect();
      audioBufferSourceNode = null;
    }
    if (audioContext) {
      audioContext.close();
      audioContext = null;
    }
  };
  return { init, play, stop };
 }
--- a/app/utils/cors.ts
+++ b/app/utils/cors.ts
@ -1,5 +1,5 @@
 import { getClientConfig } from "../config/client";
-import { ApiPath, DEFAULT_API_HOST } from "../constant";
+import { DEFAULT_API_HOST } from "../constant";
 export function corsPath(path: string) {
  const baseUrl = getClientConfig()?.isApp ? `${DEFAULT_API_HOST}` : "";
--- a/app/utils/ms_edge_tts.ts
+++ b/app/utils/ms_edge_tts.ts
@ -0,0 +1,391 @@
 // import axios from "axios";
 import { Buffer } from "buffer";
 import { randomBytes } from "crypto";
 import { Readable } from "stream";
 // Modified according to https://github.com/Migushthe2nd/MsEdgeTTS
 /**
 * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume
 */
 export enum VOLUME {
  SILENT = "silent",
  X_SOFT = "x-soft",
  SOFT = "soft",
  MEDIUM = "medium",
  LOUD = "loud",
  X_LOUD = "x-LOUD",
  DEFAULT = "default",
 }
 /**
 * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking
 */
 export enum RATE {
  X_SLOW = "x-slow",
  SLOW = "slow",
  MEDIUM = "medium",
  FAST = "fast",
  X_FAST = "x-fast",
  DEFAULT = "default",
 }
 /**
 * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline
 */
 export enum PITCH {
  X_LOW = "x-low",
  LOW = "low",
  MEDIUM = "medium",
  HIGH = "high",
  X_HIGH = "x-high",
  DEFAULT = "default",
 }
 /**
 * Only a few of the [possible formats](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs) are accepted.
 */
 export enum OUTPUT_FORMAT {
  // Streaming =============================
  // AMR_WB_16000HZ = "amr-wb-16000hz",
  // AUDIO_16KHZ_16BIT_32KBPS_MONO_OPUS = "audio-16khz-16bit-32kbps-mono-opus",
  // AUDIO_16KHZ_32KBITRATE_MONO_MP3 = "audio-16khz-32kbitrate-mono-mp3",
  // AUDIO_16KHZ_64KBITRATE_MONO_MP3 = "audio-16khz-64kbitrate-mono-mp3",
  // AUDIO_16KHZ_128KBITRATE_MONO_MP3 = "audio-16khz-128kbitrate-mono-mp3",
  // AUDIO_24KHZ_16BIT_24KBPS_MONO_OPUS = "audio-24khz-16bit-24kbps-mono-opus",
  // AUDIO_24KHZ_16BIT_48KBPS_MONO_OPUS = "audio-24khz-16bit-48kbps-mono-opus",
  AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3",
  AUDIO_24KHZ_96KBITRATE_MONO_MP3 = "audio-24khz-96kbitrate-mono-mp3",
  // AUDIO_24KHZ_160KBITRATE_MONO_MP3 = "audio-24khz-160kbitrate-mono-mp3",
  // AUDIO_48KHZ_96KBITRATE_MONO_MP3 = "audio-48khz-96kbitrate-mono-mp3",
  // AUDIO_48KHZ_192KBITRATE_MONO_MP3 = "audio-48khz-192kbitrate-mono-mp3",
  // OGG_16KHZ_16BIT_MONO_OPUS = "ogg-16khz-16bit-mono-opus",
  // OGG_24KHZ_16BIT_MONO_OPUS = "ogg-24khz-16bit-mono-opus",
  // OGG_48KHZ_16BIT_MONO_OPUS = "ogg-48khz-16bit-mono-opus",
  // RAW_8KHZ_8BIT_MONO_ALAW = "raw-8khz-8bit-mono-alaw",
  // RAW_8KHZ_8BIT_MONO_MULAW = "raw-8khz-8bit-mono-mulaw",
  // RAW_8KHZ_16BIT_MONO_PCM = "raw-8khz-16bit-mono-pcm",
  // RAW_16KHZ_16BIT_MONO_PCM = "raw-16khz-16bit-mono-pcm",
  // RAW_16KHZ_16BIT_MONO_TRUESILK = "raw-16khz-16bit-mono-truesilk",
  // RAW_22050HZ_16BIT_MONO_PCM = "raw-22050hz-16bit-mono-pcm",
  // RAW_24KHZ_16BIT_MONO_PCM = "raw-24khz-16bit-mono-pcm",
  // RAW_24KHZ_16BIT_MONO_TRUESILK = "raw-24khz-16bit-mono-truesilk",
  // RAW_44100HZ_16BIT_MONO_PCM = "raw-44100hz-16bit-mono-pcm",
  // RAW_48KHZ_16BIT_MONO_PCM = "raw-48khz-16bit-mono-pcm",
  // WEBM_16KHZ_16BIT_MONO_OPUS = "webm-16khz-16bit-mono-opus",
  // WEBM_24KHZ_16BIT_24KBPS_MONO_OPUS = "webm-24khz-16bit-24kbps-mono-opus",
  WEBM_24KHZ_16BIT_MONO_OPUS = "webm-24khz-16bit-mono-opus",
  // Non-streaming =============================
  // RIFF_8KHZ_8BIT_MONO_ALAW = "riff-8khz-8bit-mono-alaw",
  // RIFF_8KHZ_8BIT_MONO_MULAW = "riff-8khz-8bit-mono-mulaw",
  // RIFF_8KHZ_16BIT_MONO_PCM = "riff-8khz-16bit-mono-pcm",
  // RIFF_22050HZ_16BIT_MONO_PCM = "riff-22050hz-16bit-mono-pcm",
  // RIFF_24KHZ_16BIT_MONO_PCM = "riff-24khz-16bit-mono-pcm",
  // RIFF_44100HZ_16BIT_MONO_PCM = "riff-44100hz-16bit-mono-pcm",
  // RIFF_48KHZ_16BIT_MONO_PCM = "riff-48khz-16bit-mono-pcm",
 }
 export type Voice = {
  Name: string;
  ShortName: string;
  Gender: string;
  Locale: string;
  SuggestedCodec: string;
  FriendlyName: string;
  Status: string;
 };
 export class ProsodyOptions {
  /**
   * The pitch to use.
   * Can be any {@link PITCH}, or a relative frequency in Hz (+50Hz), a relative semitone (+2st), or a relative percentage (+50%).
   * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline)
   */
  pitch?: PITCH | string = "+0Hz";
  /**
   * The rate to use.
   * Can be any {@link RATE}, or a relative number (0.5), or string with a relative percentage (+50%).
   * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking)
   */
  rate?: RATE | string | number = 1.0;
  /**
   * The volume to use.
   * Can be any {@link VOLUME}, or an absolute number (0, 100), a string with a relative number (+50), or a relative percentage (+50%).
   * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume)
   */
  volume?: VOLUME | string | number = 100.0;
 }
 export class MsEdgeTTS {
  static OUTPUT_FORMAT = OUTPUT_FORMAT;
  private static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
  private static VOICES_URL = `https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
  private static SYNTH_URL = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
  private static BINARY_DELIM = "Path:audio\r\n";
  private static VOICE_LANG_REGEX = /\w{2}-\w{2}/;
  private readonly _enableLogger;
  private _ws: WebSocket | undefined;
  private _voice: any;
  private _voiceLocale: any;
  private _outputFormat: any;
  private _streams: { [key: string]: Readable } = {};
  private _startTime = 0;
  private _log(...o: any[]) {
    if (this._enableLogger) {
      console.log(...o);
    }
  }
  /**
   * Create a new `MsEdgeTTS` instance.
   *
   * @param agent (optional, **NOT SUPPORTED IN BROWSER**) Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
   * @param enableLogger=false whether to enable the built-in logger. This logs connections inits, disconnects, and incoming data to the console
   */
  public constructor(enableLogger: boolean = false) {
    this._enableLogger = enableLogger;
  }
  private async _send(message: any) {
    for (let i = 1; i <= 3 && this._ws!.readyState !== this._ws!.OPEN; i++) {
      if (i == 1) {
        this._startTime = Date.now();
      }
      this._log("connecting: ", i);
      await this._initClient();
    }
    this._ws!.send(message);
  }
  private _initClient() {
    this._ws = new WebSocket(MsEdgeTTS.SYNTH_URL);
    this._ws.binaryType = "arraybuffer";
    return new Promise((resolve, reject) => {
      this._ws!.onopen = () => {
        this._log(
          "Connected in",
          (Date.now() - this._startTime) / 1000,
          "seconds",
        );
        this._send(
          `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
                    {
                        "context": {
                            "synthesis": {
                                "audio": {
                                    "metadataoptions": {
                                        "sentenceBoundaryEnabled": "false",
                                        "wordBoundaryEnabled": "false"
                                    },
                                    "outputFormat": "${this._outputFormat}" 
                                }
                            }
                        }
                    }
                `,
        ).then(resolve);
      };
      this._ws!.onmessage = (m: any) => {
        const buffer = Buffer.from(m.data as ArrayBuffer);
        const message = buffer.toString();
        const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)![1];
        if (message.includes("Path:turn.start")) {
          // start of turn, ignore
        } else if (message.includes("Path:turn.end")) {
          // end of turn, close stream
          this._streams[requestId].push(null);
        } else if (message.includes("Path:response")) {
          // context response, ignore
        } else if (
          message.includes("Path:audio") &&
          m.data instanceof ArrayBuffer
        ) {
          this._pushAudioData(buffer, requestId);
        } else {
          this._log("UNKNOWN MESSAGE", message);
        }
      };
      this._ws!.onclose = () => {
        this._log(
          "disconnected after:",
          (Date.now() - this._startTime) / 1000,
          "seconds",
        );
        for (const requestId in this._streams) {
          this._streams[requestId].push(null);
        }
      };
      this._ws!.onerror = function (error: any) {
        reject("Connect Error: " + error);
      };
    });
  }
  private _pushAudioData(audioBuffer: Buffer, requestId: string) {
    const audioStartIndex =
      audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) +
      MsEdgeTTS.BINARY_DELIM.length;
    const audioData = audioBuffer.subarray(audioStartIndex);
    this._streams[requestId].push(audioData);
    this._log("received audio chunk, size: ", audioData?.length);
  }
  private _SSMLTemplate(input: string, options: ProsodyOptions = {}): string {
    // in case future updates to the edge API block these elements, we'll be concatenating strings.
    options = { ...new ProsodyOptions(), ...options };
    return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${this._voiceLocale}">
                <voice name="${this._voice}">
                    <prosody pitch="${options.pitch}" rate="${options.rate}" volume="${options.volume}">
                        ${input}
                    </prosody> 
                </voice>
            </speak>`;
  }
  /**
   * Fetch the list of voices available in Microsoft Edge.
   * These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
   */
  // getVoices(): Promise<Voice[]> {
  //   return new Promise((resolve, reject) => {
  //     axios
  //       .get(MsEdgeTTS.VOICES_URL)
  //       .then((res) => resolve(res.data))
  //       .catch(reject);
  //   });
  // }
  getVoices(): Promise<Voice[]> {
    return fetch(MsEdgeTTS.VOICES_URL)
      .then((response) => {
        if (!response.ok) {
          throw new Error("Network response was not ok");
        }
        return response.json();
      })
      .then((data) => data as Voice[])
      .catch((error) => {
        throw error;
      });
  }
  /**
   * Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
   * Must be called at least once before text can be synthesised.
   * Saved in this instance. Can be called at any time times to update the metadata.
   *
   * @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
   * @param outputFormat any {@link OUTPUT_FORMAT}
   * @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
   */
  async setMetadata(
    voiceName: string,
    outputFormat: OUTPUT_FORMAT,
    voiceLocale?: string,
  ) {
    const oldVoice = this._voice;
    const oldVoiceLocale = this._voiceLocale;
    const oldOutputFormat = this._outputFormat;
    this._voice = voiceName;
    this._voiceLocale = voiceLocale;
    if (!this._voiceLocale) {
      const voiceLangMatch = MsEdgeTTS.VOICE_LANG_REGEX.exec(this._voice);
      if (!voiceLangMatch)
        throw new Error("Could not infer voiceLocale from voiceName!");
      this._voiceLocale = voiceLangMatch[0];
    }
    this._outputFormat = outputFormat;
    const changed =
      oldVoice !== this._voice ||
      oldVoiceLocale !== this._voiceLocale ||
      oldOutputFormat !== this._outputFormat;
    // create new client
    if (changed || this._ws!.readyState !== this._ws!.OPEN) {
      this._startTime = Date.now();
      await this._initClient();
    }
  }
  private _metadataCheck() {
    if (!this._ws)
      throw new Error(
        "Speech synthesis not configured yet. Run setMetadata before calling toStream or toFile.",
      );
  }
  /**
   * Close the WebSocket connection.
   */
  close() {
    this._ws!.close();
  }
  /**
   * Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
   *
   * @param input the text to synthesise. Can include SSML elements.
   * @param options (optional) {@link ProsodyOptions}
   * @returns {Readable} - a `stream.Readable` with the audio data
   */
  toStream(input: string, options?: ProsodyOptions): Readable {
    const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options));
    return stream;
  }
  toArrayBuffer(input: string, options?: ProsodyOptions): Promise<ArrayBuffer> {
    return new Promise((resolve, reject) => {
      let data: Uint8Array[] = [];
      const readable = this.toStream(input, options);
      readable.on("data", (chunk) => {
        data.push(chunk);
      });
      readable.on("end", () => {
        resolve(Buffer.concat(data).buffer);
      });
      readable.on("error", (err) => {
        reject(err);
      });
    });
  }
  /**
   * Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
   *
   * @param requestSSML the SSML to send. SSML elements required in order to work.
   * @returns {Readable} - a `stream.Readable` with the audio data
   */
  rawToStream(requestSSML: string): Readable {
    const { stream } = this._rawSSMLRequest(requestSSML);
    return stream;
  }
  private _rawSSMLRequest(requestSSML: string): {
    stream: Readable;
    requestId: string;
  } {
    this._metadataCheck();
    const requestId = randomBytes(16).toString("hex");
    const request =
      `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
                ` + requestSSML.trim();
    // https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
    const self = this;
    const stream = new Readable({
      read() {},
      destroy(error: Error | null, callback: (error: Error | null) => void) {
        delete self._streams[requestId];
        callback(error);
      },
    });
    this._streams[requestId] = stream;
    this._send(request).then();
    return { stream, requestId };
  }
 }
--- a/package.json
+++ b/package.json
@ -32,6 +32,7 @@
    "idb-keyval": "^6.2.1",
    "lodash-es": "^4.17.21",
    "mermaid": "^10.6.1",
    "markdown-to-txt": "^2.0.1",
    "nanoid": "^5.0.3",
    "next": "^14.1.1",
    "node-fetch": "^3.3.1",
@ -66,6 +67,7 @@
    "eslint-config-next": "13.4.19",
    "eslint-config-prettier": "^8.8.0",
    "eslint-plugin-prettier": "^5.1.3",
    "eslint-plugin-unused-imports": "^3.2.0",
    "husky": "^8.0.0",
    "lint-staged": "^13.2.2",
    "prettier": "^3.0.2",
@ -78,4 +80,4 @@
    "lint-staged/yaml": "^2.2.2"
  },
  "packageManager": "yarn@1.22.19"
-}
+}
--- a/yarn.lock
+++ b/yarn.lock
@ -3367,6 +3367,18 @@ eslint-plugin-react@^7.31.7:
    semver "^6.3.0"
    string.prototype.matchall "^4.0.8"
 eslint-plugin-unused-imports@^3.2.0:
  version "3.2.0"
  resolved "https://registry.yarnpkg.com/eslint-plugin-unused-imports/-/eslint-plugin-unused-imports-3.2.0.tgz#63a98c9ad5f622cd9f830f70bc77739f25ccfe0d"
  integrity sha512-6uXyn6xdINEpxE1MtDjxQsyXB37lfyO2yKGVVgtD7WEWQGORSOZjgrD6hBhvGv4/SO+TOlS+UnC6JppRqbuwGQ==
  dependencies:
    eslint-rule-composer "^0.3.0"
 eslint-rule-composer@^0.3.0:
  version "0.3.0"
  resolved "https://registry.yarnpkg.com/eslint-rule-composer/-/eslint-rule-composer-0.3.0.tgz#79320c927b0c5c0d3d3d2b76c8b4a488f25bbaf9"
  integrity sha512-bt+Sh8CtDmn2OajxvNO+BX7Wn4CIWMpTRm3MaiKPCQcnnlm0CS2mhui6QaoeQugs+3Kj2ESKEEGJUdVafwhiCg==
 eslint-scope@5.1.1:
  version "5.1.1"
  resolved "https://registry.npmmirror.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
@ -4443,11 +4455,21 @@ lodash.debounce@^4.0.8:
  resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af"
  integrity sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==
 lodash.escape@^4.0.1:
  version "4.0.1"
  resolved "https://registry.yarnpkg.com/lodash.escape/-/lodash.escape-4.0.1.tgz#c9044690c21e04294beaa517712fded1fa88de98"
  integrity sha512-nXEOnb/jK9g0DYMr1/Xvq6l5xMD7GDG55+GSYIYmS0G4tBk/hURD4JR9WCavs04t33WmJx9kCyp9vJ+mr4BOUw==
 lodash.merge@^4.6.2:
  version "4.6.2"
  resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
  integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==
 lodash.unescape@^4.0.1:
  version "4.0.1"
  resolved "https://registry.yarnpkg.com/lodash.unescape/-/lodash.unescape-4.0.1.tgz#bf2249886ce514cda112fae9218cdc065211fc9c"
  integrity sha512-DhhGRshNS1aX6s5YdBE3njCCouPgnG29ebyHvImlZzXZf2SHgt+J08DHgytTPnpywNbO1Y8mNUFyQuIDBq2JZg==
 lodash@^4.17.21:
  version "4.17.21"
  resolved "https://registry.npmmirror.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
@ -4503,6 +4525,20 @@ markdown-table@^3.0.0:
  resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd"
  integrity sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw==
 markdown-to-txt@^2.0.1:
  version "2.0.1"
  resolved "https://registry.yarnpkg.com/markdown-to-txt/-/markdown-to-txt-2.0.1.tgz#bfd6233a2635443cc24900a158b60c6af36ce9c5"
  integrity sha512-Hsj7KTN8k1gutlLum3vosHwVZGnv8/cbYKWVkUyo/D1rzOYddbDesILebRfOsaVfjIBJank/AVOySBlHAYqfZw==
  dependencies:
    lodash.escape "^4.0.1"
    lodash.unescape "^4.0.1"
    marked "^4.0.14"
 marked@^4.0.14:
  version "4.3.0"
  resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3"
  integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==
 mdast-util-definitions@^5.0.0:
  version "5.1.2"
  resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz#9910abb60ac5d7115d6819b57ae0bcef07a3f7a7"
		`@ -0,0 +1 @@`
							`<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M17.25 9.75 19.5 12m0 0 2.25 2.25M19.5 12l2.25-2.25M19.5 12l-2.25 2.25m-10.5-6 4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z"></path></svg>`