Merge pull request #52 from ChatGPTNextWeb/main

[pull] main from ChatGPTNextWeb:main
This commit is contained in:
lyglgt 2024-09-23 15:40:06 +08:00 committed by GitHub
commit 39bc036e30
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
62 changed files with 1174 additions and 120 deletions

View File

@ -66,4 +66,4 @@ ANTHROPIC_API_VERSION=
ANTHROPIC_URL= ANTHROPIC_URL=
### (optional) ### (optional)
WHITE_WEBDEV_ENDPOINTS= WHITE_WEBDAV_ENDPOINTS=

View File

@ -1,4 +1,7 @@
{ {
"extends": "next/core-web-vitals", "extends": "next/core-web-vitals",
"plugins": ["prettier"] "plugins": ["prettier", "unused-imports"],
"rules": {
"unused-imports/no-unused-imports": "warn"
}
} }

View File

@ -49,7 +49,7 @@ jobs:
run: npm install --global vercel@latest run: npm install --global vercel@latest
- name: Cache dependencies - name: Cache dependencies
uses: actions/cache@v2 uses: actions/cache@v4
id: cache-npm id: cache-npm
with: with:
path: ~/.npm path: ~/.npm

View File

@ -340,7 +340,7 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name
Change default model Change default model
### `WHITE_WEBDEV_ENDPOINTS` (optional) ### `WHITE_WEBDAV_ENDPOINTS` (optional)
You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format
- Each address must be a complete endpoint - Each address must be a complete endpoint

View File

@ -202,7 +202,7 @@ ByteDance Api Url.
如果你想禁用从链接解析预制设置,将此环境变量设置为 1 即可。 如果你想禁用从链接解析预制设置,将此环境变量设置为 1 即可。
### `WHITE_WEBDEV_ENDPOINTS` (可选) ### `WHITE_WEBDAV_ENDPOINTS` (可选)
如果你想增加允许访问的webdav服务地址可以使用该选项格式要求 如果你想增加允许访问的webdav服务地址可以使用该选项格式要求
- 每一个地址必须是一个完整的 endpoint - 每一个地址必须是一个完整的 endpoint

View File

@ -193,7 +193,7 @@ ByteDance API の URL。
リンクからのプリセット設定解析を無効にしたい場合は、この環境変数を 1 に設定します。 リンクからのプリセット設定解析を無効にしたい場合は、この環境変数を 1 に設定します。
### `WHITE_WEBDEV_ENDPOINTS` (オプション) ### `WHITE_WEBDAV_ENDPOINTS` (オプション)
アクセス許可を与える WebDAV サービスのアドレスを追加したい場合、このオプションを使用します。フォーマット要件: アクセス許可を与える WebDAV サービスのアドレスを追加したい場合、このオプションを使用します。フォーマット要件:
- 各アドレスは完全なエンドポイントでなければなりません。 - 各アドレスは完全なエンドポイントでなければなりません。

View File

@ -1,5 +1,5 @@
import { ApiPath } from "@/app/constant"; import { ApiPath } from "@/app/constant";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest } from "next/server";
import { handle as openaiHandler } from "../../openai"; import { handle as openaiHandler } from "../../openai";
import { handle as azureHandler } from "../../azure"; import { handle as azureHandler } from "../../azure";
import { handle as googleHandler } from "../../google"; import { handle as googleHandler } from "../../google";

View File

@ -1,6 +1,5 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import {
Alibaba,
ALIBABA_BASE_URL, ALIBABA_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model"; import { isModelAvailableInServer } from "@/app/utils/model";
import type { RequestPayload } from "@/app/client/platforms/openai";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -3,7 +3,6 @@ import {
ANTHROPIC_BASE_URL, ANTHROPIC_BASE_URL,
Anthropic, Anthropic,
ApiPath, ApiPath,
DEFAULT_MODELS,
ServiceProvider, ServiceProvider,
ModelProvider, ModelProvider,
} from "@/app/constant"; } from "@/app/constant";

View File

@ -1,4 +1,3 @@
import { getServerSideConfig } from "@/app/config/server";
import { ModelProvider } from "@/app/constant"; import { ModelProvider } from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";

View File

@ -3,7 +3,6 @@ import {
BAIDU_BASE_URL, BAIDU_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
BAIDU_OATUH_URL,
ServiceProvider, ServiceProvider,
} from "@/app/constant"; } from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";

View File

@ -1,11 +1,6 @@
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { getServerSideConfig } from "../config/server"; import { getServerSideConfig } from "../config/server";
import { import { OPENAI_BASE_URL, ServiceProvider } from "../constant";
DEFAULT_MODELS,
OPENAI_BASE_URL,
GEMINI_BASE_URL,
ServiceProvider,
} from "../constant";
import { isModelAvailableInServer } from "../utils/model"; import { isModelAvailableInServer } from "../utils/model";
import { cloudflareAIGatewayUrl } from "../utils/cloudflare"; import { cloudflareAIGatewayUrl } from "../utils/cloudflare";

View File

@ -1,12 +1,7 @@
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "./auth"; import { auth } from "./auth";
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import { ApiPath, GEMINI_BASE_URL, ModelProvider } from "@/app/constant";
ApiPath,
GEMINI_BASE_URL,
Google,
ModelProvider,
} from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -1,6 +1,5 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import {
Iflytek,
IFLYTEK_BASE_URL, IFLYTEK_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model"; import { isModelAvailableInServer } from "@/app/utils/model";
import type { RequestPayload } from "@/app/client/platforms/openai";
// iflytek // iflytek
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -1,6 +1,5 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import {
Moonshot,
MOONSHOT_BASE_URL, MOONSHOT_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model"; import { isModelAvailableInServer } from "@/app/utils/model";
import type { RequestPayload } from "@/app/client/platforms/openai";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -1,15 +1,8 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import { TENCENT_BASE_URL, ModelProvider } from "@/app/constant";
TENCENT_BASE_URL,
ApiPath,
ModelProvider,
ServiceProvider,
Tencent,
} from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model";
import { getHeader } from "@/app/utils/tencent"; import { getHeader } from "@/app/utils/tencent";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -6,7 +6,7 @@ const config = getServerSideConfig();
const mergedAllowedWebDavEndpoints = [ const mergedAllowedWebDavEndpoints = [
...internalAllowedWebDavEndpoints, ...internalAllowedWebDavEndpoints,
...config.allowedWebDevEndpoints, ...config.allowedWebDavEndpoints,
].filter((domain) => Boolean(domain.trim())); ].filter((domain) => Boolean(domain.trim()));
const normalizeUrl = (url: string) => { const normalizeUrl = (url: string) => {

View File

@ -1,7 +1,6 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { import {
ACCESS_CODE_PREFIX, ACCESS_CODE_PREFIX,
Azure,
ModelProvider, ModelProvider,
ServiceProvider, ServiceProvider,
} from "../constant"; } from "../constant";
@ -26,6 +25,7 @@ export const ROLES = ["system", "user", "assistant"] as const;
export type MessageRole = (typeof ROLES)[number]; export type MessageRole = (typeof ROLES)[number];
export const Models = ["gpt-3.5-turbo", "gpt-4"] as const; export const Models = ["gpt-3.5-turbo", "gpt-4"] as const;
export const TTSModels = ["tts-1", "tts-1-hd"] as const;
export type ChatModel = ModelType; export type ChatModel = ModelType;
export interface MultimodalContent { export interface MultimodalContent {
@ -54,6 +54,15 @@ export interface LLMConfig {
style?: DalleRequestPayload["style"]; style?: DalleRequestPayload["style"];
} }
export interface SpeechOptions {
model: string;
input: string;
voice: string;
response_format?: string;
speed?: number;
onController?: (controller: AbortController) => void;
}
export interface ChatOptions { export interface ChatOptions {
messages: RequestMessage[]; messages: RequestMessage[];
config: LLMConfig; config: LLMConfig;
@ -88,6 +97,7 @@ export interface LLMModelProvider {
export abstract class LLMApi { export abstract class LLMApi {
abstract chat(options: ChatOptions): Promise<void>; abstract chat(options: ChatOptions): Promise<void>;
abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
abstract usage(): Promise<LLMUsage>; abstract usage(): Promise<LLMUsage>;
abstract models(): Promise<LLMModel[]>; abstract models(): Promise<LLMModel[]>;
} }
@ -206,13 +216,16 @@ export function validString(x: string): boolean {
return x?.length > 0; return x?.length > 0;
} }
export function getHeaders() { export function getHeaders(ignoreHeaders: boolean = false) {
const accessStore = useAccessStore.getState(); const accessStore = useAccessStore.getState();
const chatStore = useChatStore.getState(); const chatStore = useChatStore.getState();
const headers: Record<string, string> = { let headers: Record<string, string> = {};
"Content-Type": "application/json", if (!ignoreHeaders) {
Accept: "application/json", headers = {
}; "Content-Type": "application/json",
Accept: "application/json",
};
}
const clientConfig = getClientConfig(); const clientConfig = getClientConfig();

View File

@ -12,6 +12,7 @@ import {
getHeaders, getHeaders,
LLMApi, LLMApi,
LLMModel, LLMModel,
SpeechOptions,
MultimodalContent, MultimodalContent,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
@ -83,6 +84,10 @@ export class QwenApi implements LLMApi {
return res?.output?.choices?.at(0)?.message?.content ?? ""; return res?.output?.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({ const messages = options.messages.map((v) => ({
role: v.role, role: v.role,

View File

@ -1,5 +1,5 @@
import { ACCESS_CODE_PREFIX, Anthropic, ApiPath } from "@/app/constant"; import { Anthropic, ApiPath } from "@/app/constant";
import { ChatOptions, getHeaders, LLMApi, MultimodalContent } from "../api"; import { ChatOptions, getHeaders, LLMApi, SpeechOptions } from "../api";
import { import {
useAccessStore, useAccessStore,
useAppConfig, useAppConfig,
@ -9,13 +9,6 @@ import {
} from "@/app/store"; } from "@/app/store";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { DEFAULT_API_HOST } from "@/app/constant"; import { DEFAULT_API_HOST } from "@/app/constant";
import {
EventStreamContentType,
fetchEventSource,
} from "@fortaine/fetch-event-source";
import Locale from "../../locales";
import { prettyObject } from "@/app/utils/format";
import { getMessageTextContent, isVisionModel } from "@/app/utils"; import { getMessageTextContent, isVisionModel } from "@/app/utils";
import { preProcessImageContent, stream } from "@/app/utils/chat"; import { preProcessImageContent, stream } from "@/app/utils/chat";
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare"; import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
@ -80,6 +73,10 @@ const ClaudeMapper = {
const keys = ["claude-2, claude-instant-1"]; const keys = ["claude-2, claude-instant-1"];
export class ClaudeApi implements LLMApi { export class ClaudeApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
extractMessage(res: any) { extractMessage(res: any) {
console.log("[Response] claude response: ", res); console.log("[Response] claude response: ", res);

View File

@ -14,6 +14,7 @@ import {
LLMApi, LLMApi,
LLMModel, LLMModel,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
@ -75,6 +76,10 @@ export class ErnieApi implements LLMApi {
return [baseUrl, path].join("/"); return [baseUrl, path].join("/");
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({ const messages = options.messages.map((v) => ({
// "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function", // "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function",

View File

@ -13,6 +13,7 @@ import {
LLMApi, LLMApi,
LLMModel, LLMModel,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
@ -77,6 +78,10 @@ export class DoubaoApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? ""; return res.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({ const messages = options.messages.map((v) => ({
role: v.role, role: v.role,

View File

@ -1,5 +1,12 @@
import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant"; import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant";
import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api"; import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
LLMUsage,
SpeechOptions,
} from "../api";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { DEFAULT_API_HOST } from "@/app/constant"; import { DEFAULT_API_HOST } from "@/app/constant";
@ -56,6 +63,10 @@ export class GeminiProApi implements LLMApi {
"" ""
); );
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions): Promise<void> { async chat(options: ChatOptions): Promise<void> {
const apiClient = this; const apiClient = this;
let multimodal = false; let multimodal = false;

View File

@ -7,7 +7,13 @@ import {
} from "@/app/constant"; } from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api"; import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
SpeechOptions,
} from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
EventStreamContentType, EventStreamContentType,
@ -17,7 +23,7 @@ import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { getMessageTextContent } from "@/app/utils"; import { getMessageTextContent } from "@/app/utils";
import { OpenAIListModelResponse, RequestPayload } from "./openai"; import { RequestPayload } from "./openai";
export class SparkApi implements LLMApi { export class SparkApi implements LLMApi {
private disableListModels = true; private disableListModels = true;
@ -53,6 +59,10 @@ export class SparkApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? ""; return res.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = []; const messages: ChatOptions["messages"] = [];
for (const v of options.messages) { for (const v of options.messages) {

View File

@ -3,10 +3,8 @@
import { import {
ApiPath, ApiPath,
DEFAULT_API_HOST, DEFAULT_API_HOST,
DEFAULT_MODELS,
Moonshot, Moonshot,
REQUEST_TIMEOUT_MS, REQUEST_TIMEOUT_MS,
ServiceProvider,
} from "@/app/constant"; } from "@/app/constant";
import { import {
useAccessStore, useAccessStore,
@ -15,28 +13,17 @@ import {
ChatMessageTool, ChatMessageTool,
usePluginStore, usePluginStore,
} from "@/app/store"; } from "@/app/store";
import { collectModelsWithDefaultModel } from "@/app/utils/model"; import { stream } from "@/app/utils/chat";
import { preProcessImageContent, stream } from "@/app/utils/chat";
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
import { import {
ChatOptions, ChatOptions,
getHeaders, getHeaders,
LLMApi, LLMApi,
LLMModel, LLMModel,
LLMUsage, SpeechOptions,
MultimodalContent,
} from "../api"; } from "../api";
import Locale from "../../locales";
import {
EventStreamContentType,
fetchEventSource,
} from "@fortaine/fetch-event-source";
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { getMessageTextContent } from "@/app/utils"; import { getMessageTextContent } from "@/app/utils";
import { RequestPayload } from "./openai";
import { OpenAIListModelResponse, RequestPayload } from "./openai";
export class MoonshotApi implements LLMApi { export class MoonshotApi implements LLMApi {
private disableListModels = true; private disableListModels = true;
@ -72,6 +59,10 @@ export class MoonshotApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? ""; return res.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = []; const messages: ChatOptions["messages"] = [];
for (const v of options.messages) { for (const v of options.messages) {

View File

@ -33,17 +33,12 @@ import {
LLMModel, LLMModel,
LLMUsage, LLMUsage,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import {
EventStreamContentType,
fetchEventSource,
} from "@fortaine/fetch-event-source";
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { import {
getMessageTextContent, getMessageTextContent,
getMessageImages,
isVisionModel, isVisionModel,
isDalle3 as _isDalle3, isDalle3 as _isDalle3,
} from "@/app/utils"; } from "@/app/utils";
@ -147,6 +142,44 @@ export class ChatGPTApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? res; return res.choices?.at(0)?.message?.content ?? res;
} }
async speech(options: SpeechOptions): Promise<ArrayBuffer> {
const requestPayload = {
model: options.model,
input: options.input,
voice: options.voice,
response_format: options.response_format,
speed: options.speed,
};
console.log("[Request] openai speech payload: ", requestPayload);
const controller = new AbortController();
options.onController?.(controller);
try {
const speechPath = this.path(OpenaiPath.SpeechPath);
const speechPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
signal: controller.signal,
headers: getHeaders(),
};
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
REQUEST_TIMEOUT_MS,
);
const res = await fetch(speechPath, speechPayload);
clearTimeout(requestTimeoutId);
return await res.arrayBuffer();
} catch (e) {
console.log("[Request] failed to make a speech request", e);
throw e;
}
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const modelConfig = { const modelConfig = {
...useAppConfig.getState().modelConfig, ...useAppConfig.getState().modelConfig,
@ -244,6 +277,7 @@ export class ChatGPTApi implements LLMApi {
); );
} }
if (shouldStream) { if (shouldStream) {
let index = -1;
const [tools, funcs] = usePluginStore const [tools, funcs] = usePluginStore
.getState() .getState()
.getAsTools( .getAsTools(
@ -269,10 +303,10 @@ export class ChatGPTApi implements LLMApi {
}>; }>;
const tool_calls = choices[0]?.delta?.tool_calls; const tool_calls = choices[0]?.delta?.tool_calls;
if (tool_calls?.length > 0) { if (tool_calls?.length > 0) {
const index = tool_calls[0]?.index;
const id = tool_calls[0]?.id; const id = tool_calls[0]?.id;
const args = tool_calls[0]?.function?.arguments; const args = tool_calls[0]?.function?.arguments;
if (id) { if (id) {
index += 1;
runTools.push({ runTools.push({
id, id,
type: tool_calls[0]?.type, type: tool_calls[0]?.type,
@ -294,6 +328,8 @@ export class ChatGPTApi implements LLMApi {
toolCallMessage: any, toolCallMessage: any,
toolCallResult: any[], toolCallResult: any[],
) => { ) => {
// reset index value
index = -1;
// @ts-ignore // @ts-ignore
requestPayload?.messages?.splice( requestPayload?.messages?.splice(
// @ts-ignore // @ts-ignore

View File

@ -8,6 +8,7 @@ import {
LLMApi, LLMApi,
LLMModel, LLMModel,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
@ -89,6 +90,10 @@ export class HunyuanApi implements LLMApi {
return res.Choices?.at(0)?.Message?.Content ?? ""; return res.Choices?.at(0)?.Message?.Content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const visionModel = isVisionModel(options.config.model); const visionModel = isVisionModel(options.config.model);
const messages = options.messages.map((v, index) => ({ const messages = options.messages.map((v, index) => ({

View File

@ -38,6 +38,7 @@ interface ChatCommands {
next?: Command; next?: Command;
prev?: Command; prev?: Command;
clear?: Command; clear?: Command;
fork?: Command;
del?: Command; del?: Command;
} }

View File

@ -7,7 +7,6 @@ import {
useImperativeHandle, useImperativeHandle,
} from "react"; } from "react";
import { useParams } from "react-router"; import { useParams } from "react-router";
import { useWindowSize } from "@/app/utils";
import { IconButton } from "./button"; import { IconButton } from "./button";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import ExportIcon from "../icons/share.svg"; import ExportIcon from "../icons/share.svg";

View File

@ -1,5 +1,4 @@
import DeleteIcon from "../icons/delete.svg"; import DeleteIcon from "../icons/delete.svg";
import BotIcon from "../icons/bot.svg";
import styles from "./home.module.scss"; import styles from "./home.module.scss";
import { import {
@ -12,7 +11,7 @@ import {
import { useChatStore } from "../store"; import { useChatStore } from "../store";
import Locale from "../locales"; import Locale from "../locales";
import { Link, useLocation, useNavigate } from "react-router-dom"; import { useLocation, useNavigate } from "react-router-dom";
import { Path } from "../constant"; import { Path } from "../constant";
import { MaskAvatar } from "./mask"; import { MaskAvatar } from "./mask";
import { Mask } from "../store/mask"; import { Mask } from "../store/mask";

View File

@ -15,6 +15,8 @@ import RenameIcon from "../icons/rename.svg";
import ExportIcon from "../icons/share.svg"; import ExportIcon from "../icons/share.svg";
import ReturnIcon from "../icons/return.svg"; import ReturnIcon from "../icons/return.svg";
import CopyIcon from "../icons/copy.svg"; import CopyIcon from "../icons/copy.svg";
import SpeakIcon from "../icons/speak.svg";
import SpeakStopIcon from "../icons/speak-stop.svg";
import LoadingIcon from "../icons/three-dots.svg"; import LoadingIcon from "../icons/three-dots.svg";
import LoadingButtonIcon from "../icons/loading.svg"; import LoadingButtonIcon from "../icons/loading.svg";
import PromptIcon from "../icons/prompt.svg"; import PromptIcon from "../icons/prompt.svg";
@ -96,7 +98,8 @@ import {
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import { import {
CHAT_PAGE_SIZE, CHAT_PAGE_SIZE,
LAST_INPUT_KEY, DEFAULT_TTS_ENGINE,
ModelProvider,
Path, Path,
REQUEST_TIMEOUT_MS, REQUEST_TIMEOUT_MS,
UNFINISHED_INPUT, UNFINISHED_INPUT,
@ -113,6 +116,11 @@ import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api"; import { MultimodalContent } from "../client/api";
const localStorage = safeLocalStorage(); const localStorage = safeLocalStorage();
import { ClientApi } from "../client/api";
import { createTTSPlayer } from "../utils/audio";
import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
const ttsPlayer = createTTSPlayer();
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, { const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
loading: () => <LoadingIcon />, loading: () => <LoadingIcon />,
@ -443,6 +451,7 @@ export function ChatActions(props: {
hitBottom: boolean; hitBottom: boolean;
uploading: boolean; uploading: boolean;
setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>; setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>;
setUserInput: (input: string) => void;
}) { }) {
const config = useAppConfig(); const config = useAppConfig();
const navigate = useNavigate(); const navigate = useNavigate();
@ -981,6 +990,7 @@ function _Chat() {
chatStore.updateCurrentSession( chatStore.updateCurrentSession(
(session) => (session.clearContextIndex = session.messages.length), (session) => (session.clearContextIndex = session.messages.length),
), ),
fork: () => chatStore.forkSession(),
del: () => chatStore.deleteSession(chatStore.currentSessionIndex), del: () => chatStore.deleteSession(chatStore.currentSessionIndex),
}); });
@ -1184,10 +1194,55 @@ function _Chat() {
}); });
}; };
const accessStore = useAccessStore();
const [speechStatus, setSpeechStatus] = useState(false);
const [speechLoading, setSpeechLoading] = useState(false);
async function openaiSpeech(text: string) {
if (speechStatus) {
ttsPlayer.stop();
setSpeechStatus(false);
} else {
var api: ClientApi;
api = new ClientApi(ModelProvider.GPT);
const config = useAppConfig.getState();
setSpeechLoading(true);
ttsPlayer.init();
let audioBuffer: ArrayBuffer;
const { markdownToTxt } = require("markdown-to-txt");
const textContent = markdownToTxt(text);
if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) {
const edgeVoiceName = accessStore.edgeVoiceName();
const tts = new MsEdgeTTS();
await tts.setMetadata(
edgeVoiceName,
OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
);
audioBuffer = await tts.toArrayBuffer(textContent);
} else {
audioBuffer = await api.llm.speech({
model: config.ttsConfig.model,
input: textContent,
voice: config.ttsConfig.voice,
speed: config.ttsConfig.speed,
});
}
setSpeechStatus(true);
ttsPlayer
.play(audioBuffer, () => {
setSpeechStatus(false);
})
.catch((e) => {
console.error("[OpenAI Speech]", e);
showToast(prettyObject(e));
setSpeechStatus(false);
})
.finally(() => setSpeechLoading(false));
}
}
const context: RenderMessage[] = useMemo(() => { const context: RenderMessage[] = useMemo(() => {
return session.mask.hideContext ? [] : session.mask.context.slice(); return session.mask.hideContext ? [] : session.mask.context.slice();
}, [session.mask.context, session.mask.hideContext]); }, [session.mask.context, session.mask.hideContext]);
const accessStore = useAccessStore();
if ( if (
context.length === 0 && context.length === 0 &&
@ -1724,6 +1779,25 @@ function _Chat() {
) )
} }
/> />
{config.ttsConfig.enable && (
<ChatAction
text={
speechStatus
? Locale.Chat.Actions.StopSpeech
: Locale.Chat.Actions.Speech
}
icon={
speechStatus ? (
<SpeakStopIcon />
) : (
<SpeakIcon />
)
}
onClick={() =>
openaiSpeech(getMessageTextContent(message))
}
/>
)}
</> </>
)} )}
</div> </div>
@ -1842,6 +1916,7 @@ function _Chat() {
onSearch(""); onSearch("");
}} }}
setShowShortcutKeyModal={setShowShortcutKeyModal} setShowShortcutKeyModal={setShowShortcutKeyModal}
setUserInput={setUserInput}
/> />
<label <label
className={`${styles["chat-input-panel-inner"]} ${ className={`${styles["chat-input-panel-inner"]} ${

View File

@ -1,5 +1,5 @@
/* eslint-disable @next/next/no-img-element */ /* eslint-disable @next/next/no-img-element */
import { ChatMessage, ModelType, useAppConfig, useChatStore } from "../store"; import { ChatMessage, useAppConfig, useChatStore } from "../store";
import Locale from "../locales"; import Locale from "../locales";
import styles from "./exporter.module.scss"; import styles from "./exporter.module.scss";
import { import {

View File

@ -37,7 +37,7 @@ import Locale, { AllLangs, ALL_LANG_OPTIONS, Lang } from "../locales";
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import chatStyle from "./chat.module.scss"; import chatStyle from "./chat.module.scss";
import { useEffect, useState } from "react"; import { useState } from "react";
import { import {
copyToClipboard, copyToClipboard,
downloadAs, downloadAs,
@ -48,7 +48,6 @@ import { Updater } from "../typing";
import { ModelConfigList } from "./model-config"; import { ModelConfigList } from "./model-config";
import { FileName, Path } from "../constant"; import { FileName, Path } from "../constant";
import { BUILTIN_MASK_STORE } from "../masks"; import { BUILTIN_MASK_STORE } from "../masks";
import { nanoid } from "nanoid";
import { import {
DragDropContext, DragDropContext,
Droppable, Droppable,

View File

@ -28,7 +28,7 @@ import {
} from "./ui-lib"; } from "./ui-lib";
import Locale from "../locales"; import Locale from "../locales";
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import { useEffect, useState } from "react"; import { useState } from "react";
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
export function PluginPage() { export function PluginPage() {

View File

@ -80,6 +80,7 @@ import { useSyncStore } from "../store/sync";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import { useMaskStore } from "../store/mask"; import { useMaskStore } from "../store/mask";
import { ProviderType } from "../utils/cloud"; import { ProviderType } from "../utils/cloud";
import { TTSConfigList } from "./tts-config";
function EditPromptModal(props: { id: string; onClose: () => void }) { function EditPromptModal(props: { id: string; onClose: () => void }) {
const promptStore = usePromptStore(); const promptStore = usePromptStore();
@ -1646,6 +1647,17 @@ export function Settings() {
<UserPromptModal onClose={() => setShowPromptModal(false)} /> <UserPromptModal onClose={() => setShowPromptModal(false)} />
)} )}
<List>
<TTSConfigList
ttsConfig={config.ttsConfig}
updateConfig={(updater) => {
const ttsConfig = { ...config.ttsConfig };
updater(ttsConfig);
config.update((config) => (config.ttsConfig = ttsConfig));
}}
/>
</List>
<DangerItems /> <DangerItems />
</div> </div>
</ErrorBoundary> </ErrorBoundary>

View File

@ -7,7 +7,6 @@ import SettingsIcon from "../icons/settings.svg";
import GithubIcon from "../icons/github.svg"; import GithubIcon from "../icons/github.svg";
import ChatGptIcon from "../icons/chatgpt.svg"; import ChatGptIcon from "../icons/chatgpt.svg";
import AddIcon from "../icons/add.svg"; import AddIcon from "../icons/add.svg";
import CloseIcon from "../icons/close.svg";
import DeleteIcon from "../icons/delete.svg"; import DeleteIcon from "../icons/delete.svg";
import MaskIcon from "../icons/mask.svg"; import MaskIcon from "../icons/mask.svg";
import DragIcon from "../icons/drag.svg"; import DragIcon from "../icons/drag.svg";
@ -254,11 +253,6 @@ export function SideBar(props: { className?: string }) {
{showPluginSelector && ( {showPluginSelector && (
<Selector <Selector
items={[ items={[
{
title: "👇 Please select the plugin you need to use",
value: "-",
disable: true,
},
...PLUGINS.map((item) => { ...PLUGINS.map((item) => {
return { return {
title: item.name, title: item.name,

View File

@ -0,0 +1,133 @@
import { TTSConfig, TTSConfigValidator } from "../store";
import Locale from "../locales";
import { ListItem, Select } from "./ui-lib";
import {
DEFAULT_TTS_ENGINE,
DEFAULT_TTS_ENGINES,
DEFAULT_TTS_MODELS,
DEFAULT_TTS_VOICES,
} from "../constant";
import { InputRange } from "./input-range";
export function TTSConfigList(props: {
ttsConfig: TTSConfig;
updateConfig: (updater: (config: TTSConfig) => void) => void;
}) {
return (
<>
<ListItem
title={Locale.Settings.TTS.Enable.Title}
subTitle={Locale.Settings.TTS.Enable.SubTitle}
>
<input
type="checkbox"
checked={props.ttsConfig.enable}
onChange={(e) =>
props.updateConfig(
(config) => (config.enable = e.currentTarget.checked),
)
}
></input>
</ListItem>
{/* <ListItem
title={Locale.Settings.TTS.Autoplay.Title}
subTitle={Locale.Settings.TTS.Autoplay.SubTitle}
>
<input
type="checkbox"
checked={props.ttsConfig.autoplay}
onChange={(e) =>
props.updateConfig(
(config) => (config.autoplay = e.currentTarget.checked),
)
}
></input>
</ListItem> */}
<ListItem title={Locale.Settings.TTS.Engine}>
<Select
value={props.ttsConfig.engine}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.engine = TTSConfigValidator.engine(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_ENGINES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
{props.ttsConfig.engine === DEFAULT_TTS_ENGINE && (
<>
<ListItem title={Locale.Settings.TTS.Model}>
<Select
value={props.ttsConfig.model}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.model = TTSConfigValidator.model(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_MODELS.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.TTS.Voice.Title}
subTitle={Locale.Settings.TTS.Voice.SubTitle}
>
<Select
value={props.ttsConfig.voice}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.voice = TTSConfigValidator.voice(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_VOICES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.TTS.Speed.Title}
subTitle={Locale.Settings.TTS.Speed.SubTitle}
>
<InputRange
aria={Locale.Settings.TTS.Speed.Title}
value={props.ttsConfig.speed?.toFixed(1)}
min="0.3"
max="4.0"
step="0.1"
onChange={(e) => {
props.updateConfig(
(config) =>
(config.speed = TTSConfigValidator.speed(
e.currentTarget.valueAsNumber,
)),
);
}}
></InputRange>
</ListItem>
</>
)}
</>
);
}

View File

@ -0,0 +1,119 @@
@import "../styles/animation.scss";
.plugin-page {
height: 100%;
display: flex;
flex-direction: column;
.plugin-page-body {
padding: 20px;
overflow-y: auto;
.plugin-filter {
width: 100%;
max-width: 100%;
margin-bottom: 20px;
animation: slide-in ease 0.3s;
height: 40px;
display: flex;
.search-bar {
flex-grow: 1;
max-width: 100%;
min-width: 0;
outline: none;
}
.search-bar:focus {
border: 1px solid var(--primary);
}
.plugin-filter-lang {
height: 100%;
margin-left: 10px;
}
.plugin-create {
height: 100%;
margin-left: 10px;
box-sizing: border-box;
min-width: 80px;
}
}
.plugin-item {
display: flex;
justify-content: space-between;
padding: 20px;
border: var(--border-in-light);
animation: slide-in ease 0.3s;
&:not(:last-child) {
border-bottom: 0;
}
&:first-child {
border-top-left-radius: 10px;
border-top-right-radius: 10px;
}
&:last-child {
border-bottom-left-radius: 10px;
border-bottom-right-radius: 10px;
}
.plugin-header {
display: flex;
align-items: center;
.plugin-icon {
display: flex;
align-items: center;
justify-content: center;
margin-right: 10px;
}
.plugin-title {
.plugin-name {
font-size: 14px;
font-weight: bold;
}
.plugin-info {
font-size: 12px;
}
.plugin-runtime-warning {
font-size: 12px;
color: #f86c6c;
}
}
}
.plugin-actions {
display: flex;
flex-wrap: nowrap;
transition: all ease 0.3s;
justify-content: center;
align-items: center;
}
@media screen and (max-width: 600px) {
display: flex;
flex-direction: column;
padding-bottom: 10px;
border-radius: 10px;
margin-bottom: 20px;
box-shadow: var(--card-shadow);
&:not(:last-child) {
border-bottom: var(--border-in-light);
}
.plugin-actions {
width: 100%;
justify-content: space-between;
padding-top: 10px;
}
}
}
}
}

View File

@ -154,8 +154,8 @@ export const getServerSideConfig = () => {
// `[Server Config] using ${randomIndex + 1} of ${apiKeys.length} api key`, // `[Server Config] using ${randomIndex + 1} of ${apiKeys.length} api key`,
// ); // );
const allowedWebDevEndpoints = ( const allowedWebDavEndpoints = (
process.env.WHITE_WEBDEV_ENDPOINTS ?? "" process.env.WHITE_WEBDAV_ENDPOINTS ?? ""
).split(","); ).split(",");
return { return {
@ -229,6 +229,6 @@ export const getServerSideConfig = () => {
disableFastLink: !!process.env.DISABLE_FAST_LINK, disableFastLink: !!process.env.DISABLE_FAST_LINK,
customModels, customModels,
defaultModel, defaultModel,
allowedWebDevEndpoints, allowedWebDavEndpoints,
}; };
}; };

View File

@ -1,5 +1,3 @@
import path from "path";
export const OWNER = "ChatGPTNextWeb"; export const OWNER = "ChatGPTNextWeb";
export const REPO = "ChatGPT-Next-Web"; export const REPO = "ChatGPT-Next-Web";
export const REPO_URL = `https://github.com/${OWNER}/${REPO}`; export const REPO_URL = `https://github.com/${OWNER}/${REPO}`;
@ -152,6 +150,7 @@ export const Anthropic = {
export const OpenaiPath = { export const OpenaiPath = {
ChatPath: "v1/chat/completions", ChatPath: "v1/chat/completions",
SpeechPath: "v1/audio/speech",
ImagePath: "v1/images/generations", ImagePath: "v1/images/generations",
UsagePath: "dashboard/billing/usage", UsagePath: "dashboard/billing/usage",
SubsPath: "dashboard/billing/subscription", SubsPath: "dashboard/billing/subscription",
@ -258,6 +257,20 @@ export const KnowledgeCutOffDate: Record<string, string> = {
"gemini-pro-vision": "2023-12", "gemini-pro-vision": "2023-12",
}; };
export const DEFAULT_TTS_ENGINE = "OpenAI-TTS";
export const DEFAULT_TTS_ENGINES = ["OpenAI-TTS", "Edge-TTS"];
export const DEFAULT_TTS_MODEL = "tts-1";
export const DEFAULT_TTS_VOICE = "alloy";
export const DEFAULT_TTS_MODELS = ["tts-1", "tts-1-hd"];
export const DEFAULT_TTS_VOICES = [
"alloy",
"echo",
"fable",
"onyx",
"nova",
"shimmer",
];
const openaiModels = [ const openaiModels = [
"gpt-3.5-turbo", "gpt-3.5-turbo",
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-1106",
@ -279,7 +292,7 @@ const openaiModels = [
"gpt-4-1106-preview", "gpt-4-1106-preview",
"dall-e-3", "dall-e-3",
"o1-mini", "o1-mini",
"o1-preview" "o1-preview",
]; ];
const googleModels = [ const googleModels = [

1
app/icons/speak-stop.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M17.25 9.75 19.5 12m0 0 2.25 2.25M19.5 12l2.25-2.25M19.5 12l-2.25 2.25m-10.5-6 4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z"></path></svg>

After

Width:  |  Height:  |  Size: 495 B

1
app/icons/speak.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M19.114 5.636a9 9 0 010 12.728M16.463 8.288a5.25 5.25 0 010 7.424M6.75 8.25l4.72-4.72a.75.75 0 011.28.53v15.88a.75.75 0 01-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.01 9.01 0 012.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75z"></path></svg>

After

Width:  |  Height:  |  Size: 485 B

16
app/icons/voice-white.svg Normal file
View File

@ -0,0 +1,16 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="16" height="16" fill="none" viewBox="0 0 20 20">
<defs>
<rect id="path_0" width="20" height="20" x="0" y="0" />
</defs>
<g opacity="1" transform="translate(0 0) rotate(0 8 8)">
<mask id="bg-mask-0" fill="#fff">
<use xlink:href="#path_0" />
</mask>
<g mask="url(#bg-mask-0)">
<path d="M7 4a3 3 0 016 0v6a3 3 0 11-6 0V4z" fill="#333333">
</path>
<path d="M5.5 9.643a.75.75 0 00-1.5 0V10c0 3.06 2.29 5.585 5.25 5.954V17.5h-1.5a.75.75 0 000 1.5h4.5a.75.75 0 000-1.5h-1.5v-1.546A6.001 6.001 0 0016 10v-.357a.75.75 0 00-1.5 0V10a4.5 4.5 0 01-9 0v-.357z" fill="#333333">
</path>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 708 B

View File

@ -41,7 +41,11 @@ export default function RootLayout({
name="viewport" name="viewport"
content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"
/> />
<link rel="manifest" href="/site.webmanifest" crossOrigin="use-credentials"></link> <link
rel="manifest"
href="/site.webmanifest"
crossOrigin="use-credentials"
></link>
<script src="/serviceWorkerRegister.js" defer></script> <script src="/serviceWorkerRegister.js" defer></script>
</head> </head>
<body> <body>

View File

@ -1,4 +1,3 @@
import { ShortcutKeyModal } from "../components/chat";
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { SubmitKey } from "../store/config"; import { SubmitKey } from "../store/config";
@ -46,6 +45,8 @@ const cn = {
FullScreen: "全屏", FullScreen: "全屏",
RefreshTitle: "刷新标题", RefreshTitle: "刷新标题",
RefreshToast: "已发送刷新标题请求", RefreshToast: "已发送刷新标题请求",
Speech: "朗读",
StopSpeech: "停止",
}, },
Commands: { Commands: {
new: "新建聊天", new: "新建聊天",
@ -53,6 +54,7 @@ const cn = {
next: "下一个聊天", next: "下一个聊天",
prev: "上一个聊天", prev: "上一个聊天",
clear: "清除上下文", clear: "清除上下文",
fork: "复制聊天",
del: "删除聊天", del: "删除聊天",
}, },
InputActions: { InputActions: {
@ -79,6 +81,8 @@ const cn = {
return inputHints + "/ 触发补全,: 触发命令"; return inputHints + "/ 触发补全,: 触发命令";
}, },
Send: "发送", Send: "发送",
StartSpeak: "说话",
StopSpeak: "停止",
Config: { Config: {
Reset: "清除记忆", Reset: "清除记忆",
SaveAs: "存为面具", SaveAs: "存为面具",
@ -496,6 +500,26 @@ const cn = {
Title: "频率惩罚度 (frequency_penalty)", Title: "频率惩罚度 (frequency_penalty)",
SubTitle: "值越大,越有可能降低重复字词", SubTitle: "值越大,越有可能降低重复字词",
}, },
TTS: {
Enable: {
Title: "启用文本转语音",
SubTitle: "启用文本生成语音服务",
},
Autoplay: {
Title: "启用自动朗读",
SubTitle: "自动生成语音并播放,需先开启文本转语音开关",
},
Model: "模型",
Engine: "转换引擎",
Voice: {
Title: "声音",
SubTitle: "生成语音时使用的声音",
},
Speed: {
Title: "速度",
SubTitle: "生成语音的速度",
},
},
}, },
Store: { Store: {
DefaultTopic: "新的聊天", DefaultTopic: "新的聊天",

View File

@ -47,6 +47,8 @@ const en: LocaleType = {
FullScreen: "FullScreen", FullScreen: "FullScreen",
RefreshTitle: "Refresh Title", RefreshTitle: "Refresh Title",
RefreshToast: "Title refresh request sent", RefreshToast: "Title refresh request sent",
Speech: "Play",
StopSpeech: "Stop",
}, },
Commands: { Commands: {
new: "Start a new chat", new: "Start a new chat",
@ -54,6 +56,7 @@ const en: LocaleType = {
next: "Next Chat", next: "Next Chat",
prev: "Previous Chat", prev: "Previous Chat",
clear: "Clear Context", clear: "Clear Context",
fork: "Copy Chat",
del: "Delete Chat", del: "Delete Chat",
}, },
InputActions: { InputActions: {
@ -80,6 +83,8 @@ const en: LocaleType = {
return inputHints + ", / to search prompts, : to use commands"; return inputHints + ", / to search prompts, : to use commands";
}, },
Send: "Send", Send: "Send",
StartSpeak: "Start Speak",
StopSpeak: "Stop Speak",
Config: { Config: {
Reset: "Reset to Default", Reset: "Reset to Default",
SaveAs: "Save as Mask", SaveAs: "Save as Mask",
@ -502,6 +507,27 @@ const en: LocaleType = {
SubTitle: SubTitle:
"A larger value decreasing the likelihood to repeat the same line", "A larger value decreasing the likelihood to repeat the same line",
}, },
TTS: {
Enable: {
Title: "Enable TTS",
SubTitle: "Enable text-to-speech service",
},
Autoplay: {
Title: "Enable Autoplay",
SubTitle:
"Automatically generate speech and play, you need to enable the text-to-speech switch first",
},
Model: "Model",
Voice: {
Title: "Voice",
SubTitle: "The voice to use when generating the audio",
},
Speed: {
Title: "Speed",
SubTitle: "The speed of the generated audio",
},
Engine: "TTS Engine",
},
}, },
Store: { Store: {
DefaultTopic: "New Conversation", DefaultTopic: "New Conversation",

View File

@ -134,3 +134,34 @@ export function getISOLang() {
const lang = getLang(); const lang = getLang();
return isoLangString[lang] ?? lang; return isoLangString[lang] ?? lang;
} }
const DEFAULT_STT_LANG = "zh-CN";
export const STT_LANG_MAP: Record<Lang, string> = {
cn: "zh-CN",
en: "en-US",
pt: "pt-BR",
tw: "zh-TW",
jp: "ja-JP",
ko: "ko-KR",
id: "id-ID",
fr: "fr-FR",
es: "es-ES",
it: "it-IT",
tr: "tr-TR",
de: "de-DE",
vi: "vi-VN",
ru: "ru-RU",
cs: "cs-CZ",
no: "no-NO",
ar: "ar-SA",
bn: "bn-BD",
sk: "sk-SK",
};
export function getSTTLang(): string {
try {
return STT_LANG_MAP[getLang()];
} catch {
return DEFAULT_STT_LANG;
}
}

View File

@ -1,6 +1,5 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { SubmitKey } from "../store/config"; import { SubmitKey } from "../store/config";
import { LocaleType } from "./index";
import type { PartialLocaleType } from "./index"; import type { PartialLocaleType } from "./index";
// if you are adding a new translation, please use PartialLocaleType instead of LocaleType // if you are adding a new translation, please use PartialLocaleType instead of LocaleType

View File

@ -1,7 +1,4 @@
import { Mask } from "../store/mask"; import { Mask } from "../store/mask";
import { CN_MASKS } from "./cn";
import { TW_MASKS } from "./tw";
import { EN_MASKS } from "./en";
import { type BuiltinMask } from "./typing"; import { type BuiltinMask } from "./typing";
export { type BuiltinMask } from "./typing"; export { type BuiltinMask } from "./typing";

View File

@ -120,6 +120,9 @@ const DEFAULT_ACCESS_STATE = {
disableFastLink: false, disableFastLink: false,
customModels: "", customModels: "",
defaultModel: "", defaultModel: "",
// tts config
edgeTTSVoiceName: "zh-CN-YunxiNeural",
}; };
export const useAccessStore = createPersistStore( export const useAccessStore = createPersistStore(
@ -132,6 +135,12 @@ export const useAccessStore = createPersistStore(
return get().needCode; return get().needCode;
}, },
edgeVoiceName() {
this.fetch();
return get().edgeTTSVoiceName;
},
isValidOpenAI() { isValidOpenAI() {
return ensure(get(), ["openaiApiKey"]); return ensure(get(), ["openaiApiKey"]);
}, },
@ -204,8 +213,8 @@ export const useAccessStore = createPersistStore(
.then((res) => { .then((res) => {
// Set default model from env request // Set default model from env request
let defaultModel = res.defaultModel ?? ""; let defaultModel = res.defaultModel ?? "";
DEFAULT_CONFIG.modelConfig.model = if (defaultModel !== "")
defaultModel !== "" ? defaultModel : "gpt-3.5-turbo"; DEFAULT_CONFIG.modelConfig.model = defaultModel;
return res; return res;
}) })
.then((res: DangerConfig) => { .then((res: DangerConfig) => {

View File

@ -170,6 +170,28 @@ export const useChatStore = createPersistStore(
} }
const methods = { const methods = {
forkSession() {
// 获取当前会话
const currentSession = get().currentSession();
if (!currentSession) return;
const newSession = createEmptySession();
newSession.topic = currentSession.topic;
newSession.messages = [...currentSession.messages];
newSession.mask = {
...currentSession.mask,
modelConfig: {
...currentSession.mask.modelConfig,
},
};
set((state) => ({
currentSessionIndex: 0,
sessions: [newSession, ...state.sessions],
}));
},
clearSessions() { clearSessions() {
set(() => ({ set(() => ({
sessions: [createEmptySession()], sessions: [createEmptySession()],

View File

@ -5,12 +5,21 @@ import {
DEFAULT_INPUT_TEMPLATE, DEFAULT_INPUT_TEMPLATE,
DEFAULT_MODELS, DEFAULT_MODELS,
DEFAULT_SIDEBAR_WIDTH, DEFAULT_SIDEBAR_WIDTH,
DEFAULT_TTS_ENGINE,
DEFAULT_TTS_ENGINES,
DEFAULT_TTS_MODEL,
DEFAULT_TTS_MODELS,
DEFAULT_TTS_VOICE,
DEFAULT_TTS_VOICES,
StoreKey, StoreKey,
ServiceProvider, ServiceProvider,
} from "../constant"; } from "../constant";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
export type ModelType = (typeof DEFAULT_MODELS)[number]["name"]; export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number];
export enum SubmitKey { export enum SubmitKey {
Enter = "Enter", Enter = "Enter",
@ -68,11 +77,21 @@ export const DEFAULT_CONFIG = {
quality: "standard" as DalleQuality, quality: "standard" as DalleQuality,
style: "vivid" as DalleStyle, style: "vivid" as DalleStyle,
}, },
ttsConfig: {
enable: false,
autoplay: false,
engine: DEFAULT_TTS_ENGINE,
model: DEFAULT_TTS_MODEL,
voice: DEFAULT_TTS_VOICE,
speed: 1.0,
},
}; };
export type ChatConfig = typeof DEFAULT_CONFIG; export type ChatConfig = typeof DEFAULT_CONFIG;
export type ModelConfig = ChatConfig["modelConfig"]; export type ModelConfig = ChatConfig["modelConfig"];
export type TTSConfig = ChatConfig["ttsConfig"];
export function limitNumber( export function limitNumber(
x: number, x: number,
@ -87,6 +106,21 @@ export function limitNumber(
return Math.min(max, Math.max(min, x)); return Math.min(max, Math.max(min, x));
} }
export const TTSConfigValidator = {
engine(x: string) {
return x as TTSEngineType;
},
model(x: string) {
return x as TTSModelType;
},
voice(x: string) {
return x as TTSVoiceType;
},
speed(x: number) {
return limitNumber(x, 0.25, 4.0, 1.0);
},
};
export const ModalConfigValidator = { export const ModalConfigValidator = {
model(x: string) { model(x: string) {
return x as ModelType; return x as ModelType;
@ -143,6 +177,21 @@ export const useAppConfig = createPersistStore(
{ {
name: StoreKey.Config, name: StoreKey.Config,
version: 4, version: 4,
merge(persistedState, currentState) {
const state = persistedState as ChatConfig | undefined;
if (!state) return { ...currentState };
const models = currentState.models.slice();
state.models.forEach((pModel) => {
const idx = models.findIndex(
(v) => v.name === pModel.name && v.provider === pModel.provider,
);
if (idx !== -1) models[idx] = pModel;
else models.push(pModel);
});
return { ...currentState, ...state, models: models };
},
migrate(persistedState, version) { migrate(persistedState, version) {
const state = persistedState as ChatConfig; const state = persistedState as ChatConfig;

View File

@ -1,5 +1,4 @@
import OpenAPIClientAxios from "openapi-client-axios"; import OpenAPIClientAxios from "openapi-client-axios";
import { getLang, Lang } from "../locales";
import { StoreKey } from "../constant"; import { StoreKey } from "../constant";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";

View File

@ -1,7 +1,7 @@
import Fuse from "fuse.js"; import Fuse from "fuse.js";
import { getLang } from "../locales";
import { StoreKey } from "../constant";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import { StoreKey } from "../constant";
import { getLang } from "../locales";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
export interface Prompt { export interface Prompt {
@ -147,6 +147,11 @@ export const usePromptStore = createPersistStore(
}, },
onRehydrateStorage(state) { onRehydrateStorage(state) {
// Skip store rehydration on server side
if (typeof window === "undefined") {
return;
}
const PROMPT_URL = "./prompts.json"; const PROMPT_URL = "./prompts.json";
type PromptList = Array<[string, string]>; type PromptList = Array<[string, string]>;

View File

@ -1,5 +1,4 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { Updater } from "../typing";
import { ApiPath, STORAGE_KEY, StoreKey } from "../constant"; import { ApiPath, STORAGE_KEY, StoreKey } from "../constant";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
import { import {
@ -100,15 +99,17 @@ export const useSyncStore = createPersistStore(
const remoteState = await client.get(config.username); const remoteState = await client.get(config.username);
if (!remoteState || remoteState === "") { if (!remoteState || remoteState === "") {
await client.set(config.username, JSON.stringify(localState)); await client.set(config.username, JSON.stringify(localState));
console.log("[Sync] Remote state is empty, using local state instead."); console.log(
return "[Sync] Remote state is empty, using local state instead.",
);
return;
} else { } else {
const parsedRemoteState = JSON.parse( const parsedRemoteState = JSON.parse(
await client.get(config.username), await client.get(config.username),
) as AppState; ) as AppState;
mergeAppState(localState, parsedRemoteState); mergeAppState(localState, parsedRemoteState);
setLocalAppState(localState); setLocalAppState(localState);
} }
} catch (e) { } catch (e) {
console.log("[Sync] failed to get remote state", e); console.log("[Sync] failed to get remote state", e);
throw e; throw e;

View File

@ -8,8 +8,6 @@ import { getClientConfig } from "../config/client";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
import ChatGptIcon from "../icons/chatgpt.png"; import ChatGptIcon from "../icons/chatgpt.png";
import Locale from "../locales"; import Locale from "../locales";
import { use } from "react";
import { useAppConfig } from ".";
import { ClientApi } from "../client/api"; import { ClientApi } from "../client/api";
const ONE_MINUTE = 60 * 1000; const ONE_MINUTE = 60 * 1000;

View File

@ -3,8 +3,7 @@ import { showToast } from "./components/ui-lib";
import Locale from "./locales"; import Locale from "./locales";
import { RequestMessage } from "./client/api"; import { RequestMessage } from "./client/api";
import { ServiceProvider, REQUEST_TIMEOUT_MS } from "./constant"; import { ServiceProvider, REQUEST_TIMEOUT_MS } from "./constant";
import isObject from "lodash-es/isObject"; import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
import { fetch as tauriFetch, Body, ResponseType } from "@tauri-apps/api/http";
export function trimTopic(topic: string) { export function trimTopic(topic: string) {
// Fix an issue where double quotes still show in the Indonesian language // Fix an issue where double quotes still show in the Indonesian language

45
app/utils/audio.ts Normal file
View File

@ -0,0 +1,45 @@
type TTSPlayer = {
init: () => void;
play: (audioBuffer: ArrayBuffer, onended: () => void | null) => Promise<void>;
stop: () => void;
};
export function createTTSPlayer(): TTSPlayer {
let audioContext: AudioContext | null = null;
let audioBufferSourceNode: AudioBufferSourceNode | null = null;
const init = () => {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
audioContext.suspend();
};
const play = async (audioBuffer: ArrayBuffer, onended: () => void | null) => {
if (audioBufferSourceNode) {
audioBufferSourceNode.stop();
audioBufferSourceNode.disconnect();
}
const buffer = await audioContext!.decodeAudioData(audioBuffer);
audioBufferSourceNode = audioContext!.createBufferSource();
audioBufferSourceNode.buffer = buffer;
audioBufferSourceNode.connect(audioContext!.destination);
audioContext!.resume().then(() => {
audioBufferSourceNode!.start();
});
audioBufferSourceNode.onended = onended;
};
const stop = () => {
if (audioBufferSourceNode) {
audioBufferSourceNode.stop();
audioBufferSourceNode.disconnect();
audioBufferSourceNode = null;
}
if (audioContext) {
audioContext.close();
audioContext = null;
}
};
return { init, play, stop };
}

View File

@ -1,5 +1,5 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { ApiPath, DEFAULT_API_HOST } from "../constant"; import { DEFAULT_API_HOST } from "../constant";
export function corsPath(path: string) { export function corsPath(path: string) {
const baseUrl = getClientConfig()?.isApp ? `${DEFAULT_API_HOST}` : ""; const baseUrl = getClientConfig()?.isApp ? `${DEFAULT_API_HOST}` : "";

391
app/utils/ms_edge_tts.ts Normal file
View File

@ -0,0 +1,391 @@
// import axios from "axios";
import { Buffer } from "buffer";
import { randomBytes } from "crypto";
import { Readable } from "stream";
// Modified according to https://github.com/Migushthe2nd/MsEdgeTTS
/**
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume
*/
export enum VOLUME {
SILENT = "silent",
X_SOFT = "x-soft",
SOFT = "soft",
MEDIUM = "medium",
LOUD = "loud",
X_LOUD = "x-LOUD",
DEFAULT = "default",
}
/**
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking
*/
export enum RATE {
X_SLOW = "x-slow",
SLOW = "slow",
MEDIUM = "medium",
FAST = "fast",
X_FAST = "x-fast",
DEFAULT = "default",
}
/**
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline
*/
export enum PITCH {
X_LOW = "x-low",
LOW = "low",
MEDIUM = "medium",
HIGH = "high",
X_HIGH = "x-high",
DEFAULT = "default",
}
/**
* Only a few of the [possible formats](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs) are accepted.
*/
export enum OUTPUT_FORMAT {
// Streaming =============================
// AMR_WB_16000HZ = "amr-wb-16000hz",
// AUDIO_16KHZ_16BIT_32KBPS_MONO_OPUS = "audio-16khz-16bit-32kbps-mono-opus",
// AUDIO_16KHZ_32KBITRATE_MONO_MP3 = "audio-16khz-32kbitrate-mono-mp3",
// AUDIO_16KHZ_64KBITRATE_MONO_MP3 = "audio-16khz-64kbitrate-mono-mp3",
// AUDIO_16KHZ_128KBITRATE_MONO_MP3 = "audio-16khz-128kbitrate-mono-mp3",
// AUDIO_24KHZ_16BIT_24KBPS_MONO_OPUS = "audio-24khz-16bit-24kbps-mono-opus",
// AUDIO_24KHZ_16BIT_48KBPS_MONO_OPUS = "audio-24khz-16bit-48kbps-mono-opus",
AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3",
AUDIO_24KHZ_96KBITRATE_MONO_MP3 = "audio-24khz-96kbitrate-mono-mp3",
// AUDIO_24KHZ_160KBITRATE_MONO_MP3 = "audio-24khz-160kbitrate-mono-mp3",
// AUDIO_48KHZ_96KBITRATE_MONO_MP3 = "audio-48khz-96kbitrate-mono-mp3",
// AUDIO_48KHZ_192KBITRATE_MONO_MP3 = "audio-48khz-192kbitrate-mono-mp3",
// OGG_16KHZ_16BIT_MONO_OPUS = "ogg-16khz-16bit-mono-opus",
// OGG_24KHZ_16BIT_MONO_OPUS = "ogg-24khz-16bit-mono-opus",
// OGG_48KHZ_16BIT_MONO_OPUS = "ogg-48khz-16bit-mono-opus",
// RAW_8KHZ_8BIT_MONO_ALAW = "raw-8khz-8bit-mono-alaw",
// RAW_8KHZ_8BIT_MONO_MULAW = "raw-8khz-8bit-mono-mulaw",
// RAW_8KHZ_16BIT_MONO_PCM = "raw-8khz-16bit-mono-pcm",
// RAW_16KHZ_16BIT_MONO_PCM = "raw-16khz-16bit-mono-pcm",
// RAW_16KHZ_16BIT_MONO_TRUESILK = "raw-16khz-16bit-mono-truesilk",
// RAW_22050HZ_16BIT_MONO_PCM = "raw-22050hz-16bit-mono-pcm",
// RAW_24KHZ_16BIT_MONO_PCM = "raw-24khz-16bit-mono-pcm",
// RAW_24KHZ_16BIT_MONO_TRUESILK = "raw-24khz-16bit-mono-truesilk",
// RAW_44100HZ_16BIT_MONO_PCM = "raw-44100hz-16bit-mono-pcm",
// RAW_48KHZ_16BIT_MONO_PCM = "raw-48khz-16bit-mono-pcm",
// WEBM_16KHZ_16BIT_MONO_OPUS = "webm-16khz-16bit-mono-opus",
// WEBM_24KHZ_16BIT_24KBPS_MONO_OPUS = "webm-24khz-16bit-24kbps-mono-opus",
WEBM_24KHZ_16BIT_MONO_OPUS = "webm-24khz-16bit-mono-opus",
// Non-streaming =============================
// RIFF_8KHZ_8BIT_MONO_ALAW = "riff-8khz-8bit-mono-alaw",
// RIFF_8KHZ_8BIT_MONO_MULAW = "riff-8khz-8bit-mono-mulaw",
// RIFF_8KHZ_16BIT_MONO_PCM = "riff-8khz-16bit-mono-pcm",
// RIFF_22050HZ_16BIT_MONO_PCM = "riff-22050hz-16bit-mono-pcm",
// RIFF_24KHZ_16BIT_MONO_PCM = "riff-24khz-16bit-mono-pcm",
// RIFF_44100HZ_16BIT_MONO_PCM = "riff-44100hz-16bit-mono-pcm",
// RIFF_48KHZ_16BIT_MONO_PCM = "riff-48khz-16bit-mono-pcm",
}
export type Voice = {
Name: string;
ShortName: string;
Gender: string;
Locale: string;
SuggestedCodec: string;
FriendlyName: string;
Status: string;
};
export class ProsodyOptions {
/**
* The pitch to use.
* Can be any {@link PITCH}, or a relative frequency in Hz (+50Hz), a relative semitone (+2st), or a relative percentage (+50%).
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline)
*/
pitch?: PITCH | string = "+0Hz";
/**
* The rate to use.
* Can be any {@link RATE}, or a relative number (0.5), or string with a relative percentage (+50%).
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking)
*/
rate?: RATE | string | number = 1.0;
/**
* The volume to use.
* Can be any {@link VOLUME}, or an absolute number (0, 100), a string with a relative number (+50), or a relative percentage (+50%).
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume)
*/
volume?: VOLUME | string | number = 100.0;
}
export class MsEdgeTTS {
static OUTPUT_FORMAT = OUTPUT_FORMAT;
private static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
private static VOICES_URL = `https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
private static SYNTH_URL = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
private static BINARY_DELIM = "Path:audio\r\n";
private static VOICE_LANG_REGEX = /\w{2}-\w{2}/;
private readonly _enableLogger;
private _ws: WebSocket | undefined;
private _voice: any;
private _voiceLocale: any;
private _outputFormat: any;
private _streams: { [key: string]: Readable } = {};
private _startTime = 0;
private _log(...o: any[]) {
if (this._enableLogger) {
console.log(...o);
}
}
/**
* Create a new `MsEdgeTTS` instance.
*
* @param agent (optional, **NOT SUPPORTED IN BROWSER**) Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
* @param enableLogger=false whether to enable the built-in logger. This logs connections inits, disconnects, and incoming data to the console
*/
public constructor(enableLogger: boolean = false) {
this._enableLogger = enableLogger;
}
private async _send(message: any) {
for (let i = 1; i <= 3 && this._ws!.readyState !== this._ws!.OPEN; i++) {
if (i == 1) {
this._startTime = Date.now();
}
this._log("connecting: ", i);
await this._initClient();
}
this._ws!.send(message);
}
private _initClient() {
this._ws = new WebSocket(MsEdgeTTS.SYNTH_URL);
this._ws.binaryType = "arraybuffer";
return new Promise((resolve, reject) => {
this._ws!.onopen = () => {
this._log(
"Connected in",
(Date.now() - this._startTime) / 1000,
"seconds",
);
this._send(
`Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
{
"context": {
"synthesis": {
"audio": {
"metadataoptions": {
"sentenceBoundaryEnabled": "false",
"wordBoundaryEnabled": "false"
},
"outputFormat": "${this._outputFormat}"
}
}
}
}
`,
).then(resolve);
};
this._ws!.onmessage = (m: any) => {
const buffer = Buffer.from(m.data as ArrayBuffer);
const message = buffer.toString();
const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)![1];
if (message.includes("Path:turn.start")) {
// start of turn, ignore
} else if (message.includes("Path:turn.end")) {
// end of turn, close stream
this._streams[requestId].push(null);
} else if (message.includes("Path:response")) {
// context response, ignore
} else if (
message.includes("Path:audio") &&
m.data instanceof ArrayBuffer
) {
this._pushAudioData(buffer, requestId);
} else {
this._log("UNKNOWN MESSAGE", message);
}
};
this._ws!.onclose = () => {
this._log(
"disconnected after:",
(Date.now() - this._startTime) / 1000,
"seconds",
);
for (const requestId in this._streams) {
this._streams[requestId].push(null);
}
};
this._ws!.onerror = function (error: any) {
reject("Connect Error: " + error);
};
});
}
private _pushAudioData(audioBuffer: Buffer, requestId: string) {
const audioStartIndex =
audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) +
MsEdgeTTS.BINARY_DELIM.length;
const audioData = audioBuffer.subarray(audioStartIndex);
this._streams[requestId].push(audioData);
this._log("received audio chunk, size: ", audioData?.length);
}
private _SSMLTemplate(input: string, options: ProsodyOptions = {}): string {
// in case future updates to the edge API block these elements, we'll be concatenating strings.
options = { ...new ProsodyOptions(), ...options };
return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${this._voiceLocale}">
<voice name="${this._voice}">
<prosody pitch="${options.pitch}" rate="${options.rate}" volume="${options.volume}">
${input}
</prosody>
</voice>
</speak>`;
}
/**
* Fetch the list of voices available in Microsoft Edge.
* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
*/
// getVoices(): Promise<Voice[]> {
// return new Promise((resolve, reject) => {
// axios
// .get(MsEdgeTTS.VOICES_URL)
// .then((res) => resolve(res.data))
// .catch(reject);
// });
// }
getVoices(): Promise<Voice[]> {
return fetch(MsEdgeTTS.VOICES_URL)
.then((response) => {
if (!response.ok) {
throw new Error("Network response was not ok");
}
return response.json();
})
.then((data) => data as Voice[])
.catch((error) => {
throw error;
});
}
/**
* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
* Must be called at least once before text can be synthesised.
* Saved in this instance. Can be called at any time times to update the metadata.
*
* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
* @param outputFormat any {@link OUTPUT_FORMAT}
* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
*/
async setMetadata(
voiceName: string,
outputFormat: OUTPUT_FORMAT,
voiceLocale?: string,
) {
const oldVoice = this._voice;
const oldVoiceLocale = this._voiceLocale;
const oldOutputFormat = this._outputFormat;
this._voice = voiceName;
this._voiceLocale = voiceLocale;
if (!this._voiceLocale) {
const voiceLangMatch = MsEdgeTTS.VOICE_LANG_REGEX.exec(this._voice);
if (!voiceLangMatch)
throw new Error("Could not infer voiceLocale from voiceName!");
this._voiceLocale = voiceLangMatch[0];
}
this._outputFormat = outputFormat;
const changed =
oldVoice !== this._voice ||
oldVoiceLocale !== this._voiceLocale ||
oldOutputFormat !== this._outputFormat;
// create new client
if (changed || this._ws!.readyState !== this._ws!.OPEN) {
this._startTime = Date.now();
await this._initClient();
}
}
private _metadataCheck() {
if (!this._ws)
throw new Error(
"Speech synthesis not configured yet. Run setMetadata before calling toStream or toFile.",
);
}
/**
* Close the WebSocket connection.
*/
close() {
this._ws!.close();
}
/**
* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
*
* @param input the text to synthesise. Can include SSML elements.
* @param options (optional) {@link ProsodyOptions}
* @returns {Readable} - a `stream.Readable` with the audio data
*/
toStream(input: string, options?: ProsodyOptions): Readable {
const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options));
return stream;
}
toArrayBuffer(input: string, options?: ProsodyOptions): Promise<ArrayBuffer> {
return new Promise((resolve, reject) => {
let data: Uint8Array[] = [];
const readable = this.toStream(input, options);
readable.on("data", (chunk) => {
data.push(chunk);
});
readable.on("end", () => {
resolve(Buffer.concat(data).buffer);
});
readable.on("error", (err) => {
reject(err);
});
});
}
/**
* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
*
* @param requestSSML the SSML to send. SSML elements required in order to work.
* @returns {Readable} - a `stream.Readable` with the audio data
*/
rawToStream(requestSSML: string): Readable {
const { stream } = this._rawSSMLRequest(requestSSML);
return stream;
}
private _rawSSMLRequest(requestSSML: string): {
stream: Readable;
requestId: string;
} {
this._metadataCheck();
const requestId = randomBytes(16).toString("hex");
const request =
`X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
` + requestSSML.trim();
// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
const self = this;
const stream = new Readable({
read() {},
destroy(error: Error | null, callback: (error: Error | null) => void) {
delete self._streams[requestId];
callback(error);
},
});
this._streams[requestId] = stream;
this._send(request).then();
return { stream, requestId };
}
}

View File

@ -32,6 +32,7 @@
"idb-keyval": "^6.2.1", "idb-keyval": "^6.2.1",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"mermaid": "^10.6.1", "mermaid": "^10.6.1",
"markdown-to-txt": "^2.0.1",
"nanoid": "^5.0.3", "nanoid": "^5.0.3",
"next": "^14.1.1", "next": "^14.1.1",
"node-fetch": "^3.3.1", "node-fetch": "^3.3.1",
@ -66,6 +67,7 @@
"eslint-config-next": "13.4.19", "eslint-config-next": "13.4.19",
"eslint-config-prettier": "^8.8.0", "eslint-config-prettier": "^8.8.0",
"eslint-plugin-prettier": "^5.1.3", "eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-unused-imports": "^3.2.0",
"husky": "^8.0.0", "husky": "^8.0.0",
"lint-staged": "^13.2.2", "lint-staged": "^13.2.2",
"prettier": "^3.0.2", "prettier": "^3.0.2",
@ -78,4 +80,4 @@
"lint-staged/yaml": "^2.2.2" "lint-staged/yaml": "^2.2.2"
}, },
"packageManager": "yarn@1.22.19" "packageManager": "yarn@1.22.19"
} }

View File

@ -3367,6 +3367,18 @@ eslint-plugin-react@^7.31.7:
semver "^6.3.0" semver "^6.3.0"
string.prototype.matchall "^4.0.8" string.prototype.matchall "^4.0.8"
eslint-plugin-unused-imports@^3.2.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/eslint-plugin-unused-imports/-/eslint-plugin-unused-imports-3.2.0.tgz#63a98c9ad5f622cd9f830f70bc77739f25ccfe0d"
integrity sha512-6uXyn6xdINEpxE1MtDjxQsyXB37lfyO2yKGVVgtD7WEWQGORSOZjgrD6hBhvGv4/SO+TOlS+UnC6JppRqbuwGQ==
dependencies:
eslint-rule-composer "^0.3.0"
eslint-rule-composer@^0.3.0:
version "0.3.0"
resolved "https://registry.yarnpkg.com/eslint-rule-composer/-/eslint-rule-composer-0.3.0.tgz#79320c927b0c5c0d3d3d2b76c8b4a488f25bbaf9"
integrity sha512-bt+Sh8CtDmn2OajxvNO+BX7Wn4CIWMpTRm3MaiKPCQcnnlm0CS2mhui6QaoeQugs+3Kj2ESKEEGJUdVafwhiCg==
eslint-scope@5.1.1: eslint-scope@5.1.1:
version "5.1.1" version "5.1.1"
resolved "https://registry.npmmirror.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" resolved "https://registry.npmmirror.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
@ -4443,11 +4455,21 @@ lodash.debounce@^4.0.8:
resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af" resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af"
integrity sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow== integrity sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==
lodash.escape@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/lodash.escape/-/lodash.escape-4.0.1.tgz#c9044690c21e04294beaa517712fded1fa88de98"
integrity sha512-nXEOnb/jK9g0DYMr1/Xvq6l5xMD7GDG55+GSYIYmS0G4tBk/hURD4JR9WCavs04t33WmJx9kCyp9vJ+mr4BOUw==
lodash.merge@^4.6.2: lodash.merge@^4.6.2:
version "4.6.2" version "4.6.2"
resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ== integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==
lodash.unescape@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/lodash.unescape/-/lodash.unescape-4.0.1.tgz#bf2249886ce514cda112fae9218cdc065211fc9c"
integrity sha512-DhhGRshNS1aX6s5YdBE3njCCouPgnG29ebyHvImlZzXZf2SHgt+J08DHgytTPnpywNbO1Y8mNUFyQuIDBq2JZg==
lodash@^4.17.21: lodash@^4.17.21:
version "4.17.21" version "4.17.21"
resolved "https://registry.npmmirror.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" resolved "https://registry.npmmirror.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
@ -4503,6 +4525,20 @@ markdown-table@^3.0.0:
resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd" resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd"
integrity sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw== integrity sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw==
markdown-to-txt@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/markdown-to-txt/-/markdown-to-txt-2.0.1.tgz#bfd6233a2635443cc24900a158b60c6af36ce9c5"
integrity sha512-Hsj7KTN8k1gutlLum3vosHwVZGnv8/cbYKWVkUyo/D1rzOYddbDesILebRfOsaVfjIBJank/AVOySBlHAYqfZw==
dependencies:
lodash.escape "^4.0.1"
lodash.unescape "^4.0.1"
marked "^4.0.14"
marked@^4.0.14:
version "4.3.0"
resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3"
integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==
mdast-util-definitions@^5.0.0: mdast-util-definitions@^5.0.0:
version "5.1.2" version "5.1.2"
resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz#9910abb60ac5d7115d6819b57ae0bcef07a3f7a7" resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz#9910abb60ac5d7115d6819b57ae0bcef07a3f7a7"