Merge pull request #1 from ChatGPTNextWeb/main

引入新功能
This commit is contained in:
caowenhao 2024-09-19 09:57:43 +08:00 committed by GitHub
commit cf36f8452c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
56 changed files with 1162 additions and 111 deletions

View File

@ -1,4 +1,7 @@
{ {
"extends": "next/core-web-vitals", "extends": "next/core-web-vitals",
"plugins": ["prettier"] "plugins": ["prettier", "unused-imports"],
"rules": {
"unused-imports/no-unused-imports": "warn"
}
} }

View File

@ -49,7 +49,7 @@ jobs:
run: npm install --global vercel@latest run: npm install --global vercel@latest
- name: Cache dependencies - name: Cache dependencies
uses: actions/cache@v2 uses: actions/cache@v4
id: cache-npm id: cache-npm
with: with:
path: ~/.npm path: ~/.npm

View File

@ -1,5 +1,5 @@
import { ApiPath } from "@/app/constant"; import { ApiPath } from "@/app/constant";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest } from "next/server";
import { handle as openaiHandler } from "../../openai"; import { handle as openaiHandler } from "../../openai";
import { handle as azureHandler } from "../../azure"; import { handle as azureHandler } from "../../azure";
import { handle as googleHandler } from "../../google"; import { handle as googleHandler } from "../../google";

View File

@ -1,6 +1,5 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import {
Alibaba,
ALIBABA_BASE_URL, ALIBABA_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model"; import { isModelAvailableInServer } from "@/app/utils/model";
import type { RequestPayload } from "@/app/client/platforms/openai";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -3,7 +3,6 @@ import {
ANTHROPIC_BASE_URL, ANTHROPIC_BASE_URL,
Anthropic, Anthropic,
ApiPath, ApiPath,
DEFAULT_MODELS,
ServiceProvider, ServiceProvider,
ModelProvider, ModelProvider,
} from "@/app/constant"; } from "@/app/constant";

View File

@ -1,4 +1,3 @@
import { getServerSideConfig } from "@/app/config/server";
import { ModelProvider } from "@/app/constant"; import { ModelProvider } from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";

View File

@ -3,7 +3,6 @@ import {
BAIDU_BASE_URL, BAIDU_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
BAIDU_OATUH_URL,
ServiceProvider, ServiceProvider,
} from "@/app/constant"; } from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";

View File

@ -1,11 +1,6 @@
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { getServerSideConfig } from "../config/server"; import { getServerSideConfig } from "../config/server";
import { import { OPENAI_BASE_URL, ServiceProvider } from "../constant";
DEFAULT_MODELS,
OPENAI_BASE_URL,
GEMINI_BASE_URL,
ServiceProvider,
} from "../constant";
import { isModelAvailableInServer } from "../utils/model"; import { isModelAvailableInServer } from "../utils/model";
import { cloudflareAIGatewayUrl } from "../utils/cloudflare"; import { cloudflareAIGatewayUrl } from "../utils/cloudflare";

View File

@ -1,12 +1,7 @@
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "./auth"; import { auth } from "./auth";
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import { ApiPath, GEMINI_BASE_URL, ModelProvider } from "@/app/constant";
ApiPath,
GEMINI_BASE_URL,
Google,
ModelProvider,
} from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -1,6 +1,5 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import {
Iflytek,
IFLYTEK_BASE_URL, IFLYTEK_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model"; import { isModelAvailableInServer } from "@/app/utils/model";
import type { RequestPayload } from "@/app/client/platforms/openai";
// iflytek // iflytek
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -1,6 +1,5 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import {
Moonshot,
MOONSHOT_BASE_URL, MOONSHOT_BASE_URL,
ApiPath, ApiPath,
ModelProvider, ModelProvider,
@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model"; import { isModelAvailableInServer } from "@/app/utils/model";
import type { RequestPayload } from "@/app/client/platforms/openai";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -1,15 +1,8 @@
import { getServerSideConfig } from "@/app/config/server"; import { getServerSideConfig } from "@/app/config/server";
import { import { TENCENT_BASE_URL, ModelProvider } from "@/app/constant";
TENCENT_BASE_URL,
ApiPath,
ModelProvider,
ServiceProvider,
Tencent,
} from "@/app/constant";
import { prettyObject } from "@/app/utils/format"; import { prettyObject } from "@/app/utils/format";
import { NextRequest, NextResponse } from "next/server"; import { NextRequest, NextResponse } from "next/server";
import { auth } from "@/app/api/auth"; import { auth } from "@/app/api/auth";
import { isModelAvailableInServer } from "@/app/utils/model";
import { getHeader } from "@/app/utils/tencent"; import { getHeader } from "@/app/utils/tencent";
const serverConfig = getServerSideConfig(); const serverConfig = getServerSideConfig();

View File

@ -1,7 +1,6 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { import {
ACCESS_CODE_PREFIX, ACCESS_CODE_PREFIX,
Azure,
ModelProvider, ModelProvider,
ServiceProvider, ServiceProvider,
} from "../constant"; } from "../constant";
@ -26,6 +25,7 @@ export const ROLES = ["system", "user", "assistant"] as const;
export type MessageRole = (typeof ROLES)[number]; export type MessageRole = (typeof ROLES)[number];
export const Models = ["gpt-3.5-turbo", "gpt-4"] as const; export const Models = ["gpt-3.5-turbo", "gpt-4"] as const;
export const TTSModels = ["tts-1", "tts-1-hd"] as const;
export type ChatModel = ModelType; export type ChatModel = ModelType;
export interface MultimodalContent { export interface MultimodalContent {
@ -54,6 +54,15 @@ export interface LLMConfig {
style?: DalleRequestPayload["style"]; style?: DalleRequestPayload["style"];
} }
export interface SpeechOptions {
model: string;
input: string;
voice: string;
response_format?: string;
speed?: number;
onController?: (controller: AbortController) => void;
}
export interface ChatOptions { export interface ChatOptions {
messages: RequestMessage[]; messages: RequestMessage[];
config: LLMConfig; config: LLMConfig;
@ -88,6 +97,7 @@ export interface LLMModelProvider {
export abstract class LLMApi { export abstract class LLMApi {
abstract chat(options: ChatOptions): Promise<void>; abstract chat(options: ChatOptions): Promise<void>;
abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
abstract usage(): Promise<LLMUsage>; abstract usage(): Promise<LLMUsage>;
abstract models(): Promise<LLMModel[]>; abstract models(): Promise<LLMModel[]>;
} }
@ -206,13 +216,16 @@ export function validString(x: string): boolean {
return x?.length > 0; return x?.length > 0;
} }
export function getHeaders() { export function getHeaders(ignoreHeaders: boolean = false) {
const accessStore = useAccessStore.getState(); const accessStore = useAccessStore.getState();
const chatStore = useChatStore.getState(); const chatStore = useChatStore.getState();
const headers: Record<string, string> = { let headers: Record<string, string> = {};
"Content-Type": "application/json", if (!ignoreHeaders) {
Accept: "application/json", headers = {
}; "Content-Type": "application/json",
Accept: "application/json",
};
}
const clientConfig = getClientConfig(); const clientConfig = getClientConfig();

View File

@ -12,6 +12,7 @@ import {
getHeaders, getHeaders,
LLMApi, LLMApi,
LLMModel, LLMModel,
SpeechOptions,
MultimodalContent, MultimodalContent,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
@ -83,6 +84,10 @@ export class QwenApi implements LLMApi {
return res?.output?.choices?.at(0)?.message?.content ?? ""; return res?.output?.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({ const messages = options.messages.map((v) => ({
role: v.role, role: v.role,

View File

@ -1,5 +1,5 @@
import { ACCESS_CODE_PREFIX, Anthropic, ApiPath } from "@/app/constant"; import { Anthropic, ApiPath } from "@/app/constant";
import { ChatOptions, getHeaders, LLMApi, MultimodalContent } from "../api"; import { ChatOptions, getHeaders, LLMApi, SpeechOptions } from "../api";
import { import {
useAccessStore, useAccessStore,
useAppConfig, useAppConfig,
@ -9,13 +9,6 @@ import {
} from "@/app/store"; } from "@/app/store";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { DEFAULT_API_HOST } from "@/app/constant"; import { DEFAULT_API_HOST } from "@/app/constant";
import {
EventStreamContentType,
fetchEventSource,
} from "@fortaine/fetch-event-source";
import Locale from "../../locales";
import { prettyObject } from "@/app/utils/format";
import { getMessageTextContent, isVisionModel } from "@/app/utils"; import { getMessageTextContent, isVisionModel } from "@/app/utils";
import { preProcessImageContent, stream } from "@/app/utils/chat"; import { preProcessImageContent, stream } from "@/app/utils/chat";
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare"; import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
@ -80,6 +73,10 @@ const ClaudeMapper = {
const keys = ["claude-2, claude-instant-1"]; const keys = ["claude-2, claude-instant-1"];
export class ClaudeApi implements LLMApi { export class ClaudeApi implements LLMApi {
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
extractMessage(res: any) { extractMessage(res: any) {
console.log("[Response] claude response: ", res); console.log("[Response] claude response: ", res);

View File

@ -14,6 +14,7 @@ import {
LLMApi, LLMApi,
LLMModel, LLMModel,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
@ -75,6 +76,10 @@ export class ErnieApi implements LLMApi {
return [baseUrl, path].join("/"); return [baseUrl, path].join("/");
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({ const messages = options.messages.map((v) => ({
// "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function", // "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function",

View File

@ -13,6 +13,7 @@ import {
LLMApi, LLMApi,
LLMModel, LLMModel,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
@ -77,6 +78,10 @@ export class DoubaoApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? ""; return res.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages = options.messages.map((v) => ({ const messages = options.messages.map((v) => ({
role: v.role, role: v.role,

View File

@ -1,5 +1,12 @@
import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant"; import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant";
import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api"; import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
LLMUsage,
SpeechOptions,
} from "../api";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { DEFAULT_API_HOST } from "@/app/constant"; import { DEFAULT_API_HOST } from "@/app/constant";
@ -56,6 +63,10 @@ export class GeminiProApi implements LLMApi {
"" ""
); );
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions): Promise<void> { async chat(options: ChatOptions): Promise<void> {
const apiClient = this; const apiClient = this;
let multimodal = false; let multimodal = false;

View File

@ -7,7 +7,13 @@ import {
} from "@/app/constant"; } from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api"; import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
SpeechOptions,
} from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
EventStreamContentType, EventStreamContentType,
@ -17,7 +23,7 @@ import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { getMessageTextContent } from "@/app/utils"; import { getMessageTextContent } from "@/app/utils";
import { OpenAIListModelResponse, RequestPayload } from "./openai"; import { RequestPayload } from "./openai";
export class SparkApi implements LLMApi { export class SparkApi implements LLMApi {
private disableListModels = true; private disableListModels = true;
@ -53,6 +59,10 @@ export class SparkApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? ""; return res.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = []; const messages: ChatOptions["messages"] = [];
for (const v of options.messages) { for (const v of options.messages) {

View File

@ -3,10 +3,8 @@
import { import {
ApiPath, ApiPath,
DEFAULT_API_HOST, DEFAULT_API_HOST,
DEFAULT_MODELS,
Moonshot, Moonshot,
REQUEST_TIMEOUT_MS, REQUEST_TIMEOUT_MS,
ServiceProvider,
} from "@/app/constant"; } from "@/app/constant";
import { import {
useAccessStore, useAccessStore,
@ -15,28 +13,17 @@ import {
ChatMessageTool, ChatMessageTool,
usePluginStore, usePluginStore,
} from "@/app/store"; } from "@/app/store";
import { collectModelsWithDefaultModel } from "@/app/utils/model"; import { stream } from "@/app/utils/chat";
import { preProcessImageContent, stream } from "@/app/utils/chat";
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
import { import {
ChatOptions, ChatOptions,
getHeaders, getHeaders,
LLMApi, LLMApi,
LLMModel, LLMModel,
LLMUsage, SpeechOptions,
MultimodalContent,
} from "../api"; } from "../api";
import Locale from "../../locales";
import {
EventStreamContentType,
fetchEventSource,
} from "@fortaine/fetch-event-source";
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { getMessageTextContent } from "@/app/utils"; import { getMessageTextContent } from "@/app/utils";
import { RequestPayload } from "./openai";
import { OpenAIListModelResponse, RequestPayload } from "./openai";
export class MoonshotApi implements LLMApi { export class MoonshotApi implements LLMApi {
private disableListModels = true; private disableListModels = true;
@ -72,6 +59,10 @@ export class MoonshotApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? ""; return res.choices?.at(0)?.message?.content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const messages: ChatOptions["messages"] = []; const messages: ChatOptions["messages"] = [];
for (const v of options.messages) { for (const v of options.messages) {

View File

@ -33,17 +33,12 @@ import {
LLMModel, LLMModel,
LLMUsage, LLMUsage,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import {
EventStreamContentType,
fetchEventSource,
} from "@fortaine/fetch-event-source";
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client"; import { getClientConfig } from "@/app/config/client";
import { import {
getMessageTextContent, getMessageTextContent,
getMessageImages,
isVisionModel, isVisionModel,
isDalle3 as _isDalle3, isDalle3 as _isDalle3,
} from "@/app/utils"; } from "@/app/utils";
@ -147,6 +142,44 @@ export class ChatGPTApi implements LLMApi {
return res.choices?.at(0)?.message?.content ?? res; return res.choices?.at(0)?.message?.content ?? res;
} }
async speech(options: SpeechOptions): Promise<ArrayBuffer> {
const requestPayload = {
model: options.model,
input: options.input,
voice: options.voice,
response_format: options.response_format,
speed: options.speed,
};
console.log("[Request] openai speech payload: ", requestPayload);
const controller = new AbortController();
options.onController?.(controller);
try {
const speechPath = this.path(OpenaiPath.SpeechPath);
const speechPayload = {
method: "POST",
body: JSON.stringify(requestPayload),
signal: controller.signal,
headers: getHeaders(),
};
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
REQUEST_TIMEOUT_MS,
);
const res = await fetch(speechPath, speechPayload);
clearTimeout(requestTimeoutId);
return await res.arrayBuffer();
} catch (e) {
console.log("[Request] failed to make a speech request", e);
throw e;
}
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const modelConfig = { const modelConfig = {
...useAppConfig.getState().modelConfig, ...useAppConfig.getState().modelConfig,

View File

@ -8,6 +8,7 @@ import {
LLMApi, LLMApi,
LLMModel, LLMModel,
MultimodalContent, MultimodalContent,
SpeechOptions,
} from "../api"; } from "../api";
import Locale from "../../locales"; import Locale from "../../locales";
import { import {
@ -89,6 +90,10 @@ export class HunyuanApi implements LLMApi {
return res.Choices?.at(0)?.Message?.Content ?? ""; return res.Choices?.at(0)?.Message?.Content ?? "";
} }
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}
async chat(options: ChatOptions) { async chat(options: ChatOptions) {
const visionModel = isVisionModel(options.config.model); const visionModel = isVisionModel(options.config.model);
const messages = options.messages.map((v, index) => ({ const messages = options.messages.map((v, index) => ({

View File

@ -38,6 +38,7 @@ interface ChatCommands {
next?: Command; next?: Command;
prev?: Command; prev?: Command;
clear?: Command; clear?: Command;
fork?: Command;
del?: Command; del?: Command;
} }

View File

@ -7,7 +7,6 @@ import {
useImperativeHandle, useImperativeHandle,
} from "react"; } from "react";
import { useParams } from "react-router"; import { useParams } from "react-router";
import { useWindowSize } from "@/app/utils";
import { IconButton } from "./button"; import { IconButton } from "./button";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import ExportIcon from "../icons/share.svg"; import ExportIcon from "../icons/share.svg";

View File

@ -1,5 +1,4 @@
import DeleteIcon from "../icons/delete.svg"; import DeleteIcon from "../icons/delete.svg";
import BotIcon from "../icons/bot.svg";
import styles from "./home.module.scss"; import styles from "./home.module.scss";
import { import {
@ -12,7 +11,7 @@ import {
import { useChatStore } from "../store"; import { useChatStore } from "../store";
import Locale from "../locales"; import Locale from "../locales";
import { Link, useLocation, useNavigate } from "react-router-dom"; import { useLocation, useNavigate } from "react-router-dom";
import { Path } from "../constant"; import { Path } from "../constant";
import { MaskAvatar } from "./mask"; import { MaskAvatar } from "./mask";
import { Mask } from "../store/mask"; import { Mask } from "../store/mask";

View File

@ -15,6 +15,8 @@ import RenameIcon from "../icons/rename.svg";
import ExportIcon from "../icons/share.svg"; import ExportIcon from "../icons/share.svg";
import ReturnIcon from "../icons/return.svg"; import ReturnIcon from "../icons/return.svg";
import CopyIcon from "../icons/copy.svg"; import CopyIcon from "../icons/copy.svg";
import SpeakIcon from "../icons/speak.svg";
import SpeakStopIcon from "../icons/speak-stop.svg";
import LoadingIcon from "../icons/three-dots.svg"; import LoadingIcon from "../icons/three-dots.svg";
import LoadingButtonIcon from "../icons/loading.svg"; import LoadingButtonIcon from "../icons/loading.svg";
import PromptIcon from "../icons/prompt.svg"; import PromptIcon from "../icons/prompt.svg";
@ -96,7 +98,8 @@ import {
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import { import {
CHAT_PAGE_SIZE, CHAT_PAGE_SIZE,
LAST_INPUT_KEY, DEFAULT_TTS_ENGINE,
ModelProvider,
Path, Path,
REQUEST_TIMEOUT_MS, REQUEST_TIMEOUT_MS,
UNFINISHED_INPUT, UNFINISHED_INPUT,
@ -113,6 +116,11 @@ import { useAllModels } from "../utils/hooks";
import { MultimodalContent } from "../client/api"; import { MultimodalContent } from "../client/api";
const localStorage = safeLocalStorage(); const localStorage = safeLocalStorage();
import { ClientApi } from "../client/api";
import { createTTSPlayer } from "../utils/audio";
import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts";
const ttsPlayer = createTTSPlayer();
const Markdown = dynamic(async () => (await import("./markdown")).Markdown, { const Markdown = dynamic(async () => (await import("./markdown")).Markdown, {
loading: () => <LoadingIcon />, loading: () => <LoadingIcon />,
@ -443,6 +451,7 @@ export function ChatActions(props: {
hitBottom: boolean; hitBottom: boolean;
uploading: boolean; uploading: boolean;
setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>; setShowShortcutKeyModal: React.Dispatch<React.SetStateAction<boolean>>;
setUserInput: (input: string) => void;
}) { }) {
const config = useAppConfig(); const config = useAppConfig();
const navigate = useNavigate(); const navigate = useNavigate();
@ -981,6 +990,7 @@ function _Chat() {
chatStore.updateCurrentSession( chatStore.updateCurrentSession(
(session) => (session.clearContextIndex = session.messages.length), (session) => (session.clearContextIndex = session.messages.length),
), ),
fork: () => chatStore.forkSession(),
del: () => chatStore.deleteSession(chatStore.currentSessionIndex), del: () => chatStore.deleteSession(chatStore.currentSessionIndex),
}); });
@ -1184,10 +1194,55 @@ function _Chat() {
}); });
}; };
const accessStore = useAccessStore();
const [speechStatus, setSpeechStatus] = useState(false);
const [speechLoading, setSpeechLoading] = useState(false);
async function openaiSpeech(text: string) {
if (speechStatus) {
ttsPlayer.stop();
setSpeechStatus(false);
} else {
var api: ClientApi;
api = new ClientApi(ModelProvider.GPT);
const config = useAppConfig.getState();
setSpeechLoading(true);
ttsPlayer.init();
let audioBuffer: ArrayBuffer;
const { markdownToTxt } = require("markdown-to-txt");
const textContent = markdownToTxt(text);
if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) {
const edgeVoiceName = accessStore.edgeVoiceName();
const tts = new MsEdgeTTS();
await tts.setMetadata(
edgeVoiceName,
OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
);
audioBuffer = await tts.toArrayBuffer(textContent);
} else {
audioBuffer = await api.llm.speech({
model: config.ttsConfig.model,
input: textContent,
voice: config.ttsConfig.voice,
speed: config.ttsConfig.speed,
});
}
setSpeechStatus(true);
ttsPlayer
.play(audioBuffer, () => {
setSpeechStatus(false);
})
.catch((e) => {
console.error("[OpenAI Speech]", e);
showToast(prettyObject(e));
setSpeechStatus(false);
})
.finally(() => setSpeechLoading(false));
}
}
const context: RenderMessage[] = useMemo(() => { const context: RenderMessage[] = useMemo(() => {
return session.mask.hideContext ? [] : session.mask.context.slice(); return session.mask.hideContext ? [] : session.mask.context.slice();
}, [session.mask.context, session.mask.hideContext]); }, [session.mask.context, session.mask.hideContext]);
const accessStore = useAccessStore();
if ( if (
context.length === 0 && context.length === 0 &&
@ -1724,6 +1779,25 @@ function _Chat() {
) )
} }
/> />
{config.ttsConfig.enable && (
<ChatAction
text={
speechStatus
? Locale.Chat.Actions.StopSpeech
: Locale.Chat.Actions.Speech
}
icon={
speechStatus ? (
<SpeakStopIcon />
) : (
<SpeakIcon />
)
}
onClick={() =>
openaiSpeech(getMessageTextContent(message))
}
/>
)}
</> </>
)} )}
</div> </div>
@ -1842,6 +1916,7 @@ function _Chat() {
onSearch(""); onSearch("");
}} }}
setShowShortcutKeyModal={setShowShortcutKeyModal} setShowShortcutKeyModal={setShowShortcutKeyModal}
setUserInput={setUserInput}
/> />
<label <label
className={`${styles["chat-input-panel-inner"]} ${ className={`${styles["chat-input-panel-inner"]} ${

View File

@ -1,5 +1,5 @@
/* eslint-disable @next/next/no-img-element */ /* eslint-disable @next/next/no-img-element */
import { ChatMessage, ModelType, useAppConfig, useChatStore } from "../store"; import { ChatMessage, useAppConfig, useChatStore } from "../store";
import Locale from "../locales"; import Locale from "../locales";
import styles from "./exporter.module.scss"; import styles from "./exporter.module.scss";
import { import {

View File

@ -37,7 +37,7 @@ import Locale, { AllLangs, ALL_LANG_OPTIONS, Lang } from "../locales";
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import chatStyle from "./chat.module.scss"; import chatStyle from "./chat.module.scss";
import { useEffect, useState } from "react"; import { useState } from "react";
import { import {
copyToClipboard, copyToClipboard,
downloadAs, downloadAs,
@ -48,7 +48,6 @@ import { Updater } from "../typing";
import { ModelConfigList } from "./model-config"; import { ModelConfigList } from "./model-config";
import { FileName, Path } from "../constant"; import { FileName, Path } from "../constant";
import { BUILTIN_MASK_STORE } from "../masks"; import { BUILTIN_MASK_STORE } from "../masks";
import { nanoid } from "nanoid";
import { import {
DragDropContext, DragDropContext,
Droppable, Droppable,

View File

@ -28,7 +28,7 @@ import {
} from "./ui-lib"; } from "./ui-lib";
import Locale from "../locales"; import Locale from "../locales";
import { useNavigate } from "react-router-dom"; import { useNavigate } from "react-router-dom";
import { useEffect, useState } from "react"; import { useState } from "react";
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
export function PluginPage() { export function PluginPage() {

View File

@ -80,6 +80,7 @@ import { useSyncStore } from "../store/sync";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import { useMaskStore } from "../store/mask"; import { useMaskStore } from "../store/mask";
import { ProviderType } from "../utils/cloud"; import { ProviderType } from "../utils/cloud";
import { TTSConfigList } from "./tts-config";
function EditPromptModal(props: { id: string; onClose: () => void }) { function EditPromptModal(props: { id: string; onClose: () => void }) {
const promptStore = usePromptStore(); const promptStore = usePromptStore();
@ -1646,6 +1647,17 @@ export function Settings() {
<UserPromptModal onClose={() => setShowPromptModal(false)} /> <UserPromptModal onClose={() => setShowPromptModal(false)} />
)} )}
<List>
<TTSConfigList
ttsConfig={config.ttsConfig}
updateConfig={(updater) => {
const ttsConfig = { ...config.ttsConfig };
updater(ttsConfig);
config.update((config) => (config.ttsConfig = ttsConfig));
}}
/>
</List>
<DangerItems /> <DangerItems />
</div> </div>
</ErrorBoundary> </ErrorBoundary>

View File

@ -7,7 +7,6 @@ import SettingsIcon from "../icons/settings.svg";
import GithubIcon from "../icons/github.svg"; import GithubIcon from "../icons/github.svg";
import ChatGptIcon from "../icons/chatgpt.svg"; import ChatGptIcon from "../icons/chatgpt.svg";
import AddIcon from "../icons/add.svg"; import AddIcon from "../icons/add.svg";
import CloseIcon from "../icons/close.svg";
import DeleteIcon from "../icons/delete.svg"; import DeleteIcon from "../icons/delete.svg";
import MaskIcon from "../icons/mask.svg"; import MaskIcon from "../icons/mask.svg";
import DragIcon from "../icons/drag.svg"; import DragIcon from "../icons/drag.svg";
@ -254,11 +253,6 @@ export function SideBar(props: { className?: string }) {
{showPluginSelector && ( {showPluginSelector && (
<Selector <Selector
items={[ items={[
{
title: "👇 Please select the plugin you need to use",
value: "-",
disable: true,
},
...PLUGINS.map((item) => { ...PLUGINS.map((item) => {
return { return {
title: item.name, title: item.name,

View File

@ -0,0 +1,133 @@
import { TTSConfig, TTSConfigValidator } from "../store";
import Locale from "../locales";
import { ListItem, Select } from "./ui-lib";
import {
DEFAULT_TTS_ENGINE,
DEFAULT_TTS_ENGINES,
DEFAULT_TTS_MODELS,
DEFAULT_TTS_VOICES,
} from "../constant";
import { InputRange } from "./input-range";
export function TTSConfigList(props: {
ttsConfig: TTSConfig;
updateConfig: (updater: (config: TTSConfig) => void) => void;
}) {
return (
<>
<ListItem
title={Locale.Settings.TTS.Enable.Title}
subTitle={Locale.Settings.TTS.Enable.SubTitle}
>
<input
type="checkbox"
checked={props.ttsConfig.enable}
onChange={(e) =>
props.updateConfig(
(config) => (config.enable = e.currentTarget.checked),
)
}
></input>
</ListItem>
{/* <ListItem
title={Locale.Settings.TTS.Autoplay.Title}
subTitle={Locale.Settings.TTS.Autoplay.SubTitle}
>
<input
type="checkbox"
checked={props.ttsConfig.autoplay}
onChange={(e) =>
props.updateConfig(
(config) => (config.autoplay = e.currentTarget.checked),
)
}
></input>
</ListItem> */}
<ListItem title={Locale.Settings.TTS.Engine}>
<Select
value={props.ttsConfig.engine}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.engine = TTSConfigValidator.engine(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_ENGINES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
{props.ttsConfig.engine === DEFAULT_TTS_ENGINE && (
<>
<ListItem title={Locale.Settings.TTS.Model}>
<Select
value={props.ttsConfig.model}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.model = TTSConfigValidator.model(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_MODELS.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.TTS.Voice.Title}
subTitle={Locale.Settings.TTS.Voice.SubTitle}
>
<Select
value={props.ttsConfig.voice}
onChange={(e) => {
props.updateConfig(
(config) =>
(config.voice = TTSConfigValidator.voice(
e.currentTarget.value,
)),
);
}}
>
{DEFAULT_TTS_VOICES.map((v, i) => (
<option value={v} key={i}>
{v}
</option>
))}
</Select>
</ListItem>
<ListItem
title={Locale.Settings.TTS.Speed.Title}
subTitle={Locale.Settings.TTS.Speed.SubTitle}
>
<InputRange
aria={Locale.Settings.TTS.Speed.Title}
value={props.ttsConfig.speed?.toFixed(1)}
min="0.3"
max="4.0"
step="0.1"
onChange={(e) => {
props.updateConfig(
(config) =>
(config.speed = TTSConfigValidator.speed(
e.currentTarget.valueAsNumber,
)),
);
}}
></InputRange>
</ListItem>
</>
)}
</>
);
}

View File

@ -0,0 +1,119 @@
@import "../styles/animation.scss";
.plugin-page {
height: 100%;
display: flex;
flex-direction: column;
.plugin-page-body {
padding: 20px;
overflow-y: auto;
.plugin-filter {
width: 100%;
max-width: 100%;
margin-bottom: 20px;
animation: slide-in ease 0.3s;
height: 40px;
display: flex;
.search-bar {
flex-grow: 1;
max-width: 100%;
min-width: 0;
outline: none;
}
.search-bar:focus {
border: 1px solid var(--primary);
}
.plugin-filter-lang {
height: 100%;
margin-left: 10px;
}
.plugin-create {
height: 100%;
margin-left: 10px;
box-sizing: border-box;
min-width: 80px;
}
}
.plugin-item {
display: flex;
justify-content: space-between;
padding: 20px;
border: var(--border-in-light);
animation: slide-in ease 0.3s;
&:not(:last-child) {
border-bottom: 0;
}
&:first-child {
border-top-left-radius: 10px;
border-top-right-radius: 10px;
}
&:last-child {
border-bottom-left-radius: 10px;
border-bottom-right-radius: 10px;
}
.plugin-header {
display: flex;
align-items: center;
.plugin-icon {
display: flex;
align-items: center;
justify-content: center;
margin-right: 10px;
}
.plugin-title {
.plugin-name {
font-size: 14px;
font-weight: bold;
}
.plugin-info {
font-size: 12px;
}
.plugin-runtime-warning {
font-size: 12px;
color: #f86c6c;
}
}
}
.plugin-actions {
display: flex;
flex-wrap: nowrap;
transition: all ease 0.3s;
justify-content: center;
align-items: center;
}
@media screen and (max-width: 600px) {
display: flex;
flex-direction: column;
padding-bottom: 10px;
border-radius: 10px;
margin-bottom: 20px;
box-shadow: var(--card-shadow);
&:not(:last-child) {
border-bottom: var(--border-in-light);
}
.plugin-actions {
width: 100%;
justify-content: space-between;
padding-top: 10px;
}
}
}
}
}

View File

@ -1,5 +1,3 @@
import path from "path";
export const OWNER = "ChatGPTNextWeb"; export const OWNER = "ChatGPTNextWeb";
export const REPO = "ChatGPT-Next-Web"; export const REPO = "ChatGPT-Next-Web";
export const REPO_URL = `https://github.com/${OWNER}/${REPO}`; export const REPO_URL = `https://github.com/${OWNER}/${REPO}`;
@ -152,6 +150,7 @@ export const Anthropic = {
export const OpenaiPath = { export const OpenaiPath = {
ChatPath: "v1/chat/completions", ChatPath: "v1/chat/completions",
SpeechPath: "v1/audio/speech",
ImagePath: "v1/images/generations", ImagePath: "v1/images/generations",
UsagePath: "dashboard/billing/usage", UsagePath: "dashboard/billing/usage",
SubsPath: "dashboard/billing/subscription", SubsPath: "dashboard/billing/subscription",
@ -258,6 +257,20 @@ export const KnowledgeCutOffDate: Record<string, string> = {
"gemini-pro-vision": "2023-12", "gemini-pro-vision": "2023-12",
}; };
export const DEFAULT_TTS_ENGINE = "OpenAI-TTS";
export const DEFAULT_TTS_ENGINES = ["OpenAI-TTS", "Edge-TTS"];
export const DEFAULT_TTS_MODEL = "tts-1";
export const DEFAULT_TTS_VOICE = "alloy";
export const DEFAULT_TTS_MODELS = ["tts-1", "tts-1-hd"];
export const DEFAULT_TTS_VOICES = [
"alloy",
"echo",
"fable",
"onyx",
"nova",
"shimmer",
];
const openaiModels = [ const openaiModels = [
"gpt-3.5-turbo", "gpt-3.5-turbo",
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-1106",
@ -279,7 +292,7 @@ const openaiModels = [
"gpt-4-1106-preview", "gpt-4-1106-preview",
"dall-e-3", "dall-e-3",
"o1-mini", "o1-mini",
"o1-preview" "o1-preview",
]; ];
const googleModels = [ const googleModels = [

1
app/icons/speak-stop.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M17.25 9.75 19.5 12m0 0 2.25 2.25M19.5 12l2.25-2.25M19.5 12l-2.25 2.25m-10.5-6 4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z"></path></svg>

After

Width:  |  Height:  |  Size: 495 B

1
app/icons/speak.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" width="16" height="16" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4"><path stroke-linecap="round" stroke-linejoin="round" d="M19.114 5.636a9 9 0 010 12.728M16.463 8.288a5.25 5.25 0 010 7.424M6.75 8.25l4.72-4.72a.75.75 0 011.28.53v15.88a.75.75 0 01-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.01 9.01 0 012.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75z"></path></svg>

After

Width:  |  Height:  |  Size: 485 B

16
app/icons/voice-white.svg Normal file
View File

@ -0,0 +1,16 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="16" height="16" fill="none" viewBox="0 0 20 20">
<defs>
<rect id="path_0" width="20" height="20" x="0" y="0" />
</defs>
<g opacity="1" transform="translate(0 0) rotate(0 8 8)">
<mask id="bg-mask-0" fill="#fff">
<use xlink:href="#path_0" />
</mask>
<g mask="url(#bg-mask-0)">
<path d="M7 4a3 3 0 016 0v6a3 3 0 11-6 0V4z" fill="#333333">
</path>
<path d="M5.5 9.643a.75.75 0 00-1.5 0V10c0 3.06 2.29 5.585 5.25 5.954V17.5h-1.5a.75.75 0 000 1.5h4.5a.75.75 0 000-1.5h-1.5v-1.546A6.001 6.001 0 0016 10v-.357a.75.75 0 00-1.5 0V10a4.5 4.5 0 01-9 0v-.357z" fill="#333333">
</path>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 708 B

View File

@ -41,7 +41,11 @@ export default function RootLayout({
name="viewport" name="viewport"
content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"
/> />
<link rel="manifest" href="/site.webmanifest" crossOrigin="use-credentials"></link> <link
rel="manifest"
href="/site.webmanifest"
crossOrigin="use-credentials"
></link>
<script src="/serviceWorkerRegister.js" defer></script> <script src="/serviceWorkerRegister.js" defer></script>
</head> </head>
<body> <body>

View File

@ -1,4 +1,3 @@
import { ShortcutKeyModal } from "../components/chat";
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { SubmitKey } from "../store/config"; import { SubmitKey } from "../store/config";
@ -46,6 +45,8 @@ const cn = {
FullScreen: "全屏", FullScreen: "全屏",
RefreshTitle: "刷新标题", RefreshTitle: "刷新标题",
RefreshToast: "已发送刷新标题请求", RefreshToast: "已发送刷新标题请求",
Speech: "朗读",
StopSpeech: "停止",
}, },
Commands: { Commands: {
new: "新建聊天", new: "新建聊天",
@ -53,6 +54,7 @@ const cn = {
next: "下一个聊天", next: "下一个聊天",
prev: "上一个聊天", prev: "上一个聊天",
clear: "清除上下文", clear: "清除上下文",
fork: "复制聊天",
del: "删除聊天", del: "删除聊天",
}, },
InputActions: { InputActions: {
@ -79,6 +81,8 @@ const cn = {
return inputHints + "/ 触发补全,: 触发命令"; return inputHints + "/ 触发补全,: 触发命令";
}, },
Send: "发送", Send: "发送",
StartSpeak: "说话",
StopSpeak: "停止",
Config: { Config: {
Reset: "清除记忆", Reset: "清除记忆",
SaveAs: "存为面具", SaveAs: "存为面具",
@ -496,6 +500,26 @@ const cn = {
Title: "频率惩罚度 (frequency_penalty)", Title: "频率惩罚度 (frequency_penalty)",
SubTitle: "值越大,越有可能降低重复字词", SubTitle: "值越大,越有可能降低重复字词",
}, },
TTS: {
Enable: {
Title: "启用文本转语音",
SubTitle: "启用文本生成语音服务",
},
Autoplay: {
Title: "启用自动朗读",
SubTitle: "自动生成语音并播放,需先开启文本转语音开关",
},
Model: "模型",
Engine: "转换引擎",
Voice: {
Title: "声音",
SubTitle: "生成语音时使用的声音",
},
Speed: {
Title: "速度",
SubTitle: "生成语音的速度",
},
},
}, },
Store: { Store: {
DefaultTopic: "新的聊天", DefaultTopic: "新的聊天",

View File

@ -47,6 +47,8 @@ const en: LocaleType = {
FullScreen: "FullScreen", FullScreen: "FullScreen",
RefreshTitle: "Refresh Title", RefreshTitle: "Refresh Title",
RefreshToast: "Title refresh request sent", RefreshToast: "Title refresh request sent",
Speech: "Play",
StopSpeech: "Stop",
}, },
Commands: { Commands: {
new: "Start a new chat", new: "Start a new chat",
@ -54,6 +56,7 @@ const en: LocaleType = {
next: "Next Chat", next: "Next Chat",
prev: "Previous Chat", prev: "Previous Chat",
clear: "Clear Context", clear: "Clear Context",
fork: "Copy Chat",
del: "Delete Chat", del: "Delete Chat",
}, },
InputActions: { InputActions: {
@ -80,6 +83,8 @@ const en: LocaleType = {
return inputHints + ", / to search prompts, : to use commands"; return inputHints + ", / to search prompts, : to use commands";
}, },
Send: "Send", Send: "Send",
StartSpeak: "Start Speak",
StopSpeak: "Stop Speak",
Config: { Config: {
Reset: "Reset to Default", Reset: "Reset to Default",
SaveAs: "Save as Mask", SaveAs: "Save as Mask",
@ -502,6 +507,27 @@ const en: LocaleType = {
SubTitle: SubTitle:
"A larger value decreasing the likelihood to repeat the same line", "A larger value decreasing the likelihood to repeat the same line",
}, },
TTS: {
Enable: {
Title: "Enable TTS",
SubTitle: "Enable text-to-speech service",
},
Autoplay: {
Title: "Enable Autoplay",
SubTitle:
"Automatically generate speech and play, you need to enable the text-to-speech switch first",
},
Model: "Model",
Voice: {
Title: "Voice",
SubTitle: "The voice to use when generating the audio",
},
Speed: {
Title: "Speed",
SubTitle: "The speed of the generated audio",
},
Engine: "TTS Engine",
},
}, },
Store: { Store: {
DefaultTopic: "New Conversation", DefaultTopic: "New Conversation",

View File

@ -134,3 +134,34 @@ export function getISOLang() {
const lang = getLang(); const lang = getLang();
return isoLangString[lang] ?? lang; return isoLangString[lang] ?? lang;
} }
const DEFAULT_STT_LANG = "zh-CN";
export const STT_LANG_MAP: Record<Lang, string> = {
cn: "zh-CN",
en: "en-US",
pt: "pt-BR",
tw: "zh-TW",
jp: "ja-JP",
ko: "ko-KR",
id: "id-ID",
fr: "fr-FR",
es: "es-ES",
it: "it-IT",
tr: "tr-TR",
de: "de-DE",
vi: "vi-VN",
ru: "ru-RU",
cs: "cs-CZ",
no: "no-NO",
ar: "ar-SA",
bn: "bn-BD",
sk: "sk-SK",
};
export function getSTTLang(): string {
try {
return STT_LANG_MAP[getLang()];
} catch {
return DEFAULT_STT_LANG;
}
}

View File

@ -1,6 +1,5 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { SubmitKey } from "../store/config"; import { SubmitKey } from "../store/config";
import { LocaleType } from "./index";
import type { PartialLocaleType } from "./index"; import type { PartialLocaleType } from "./index";
// if you are adding a new translation, please use PartialLocaleType instead of LocaleType // if you are adding a new translation, please use PartialLocaleType instead of LocaleType

View File

@ -1,7 +1,4 @@
import { Mask } from "../store/mask"; import { Mask } from "../store/mask";
import { CN_MASKS } from "./cn";
import { TW_MASKS } from "./tw";
import { EN_MASKS } from "./en";
import { type BuiltinMask } from "./typing"; import { type BuiltinMask } from "./typing";
export { type BuiltinMask } from "./typing"; export { type BuiltinMask } from "./typing";

View File

@ -120,6 +120,9 @@ const DEFAULT_ACCESS_STATE = {
disableFastLink: false, disableFastLink: false,
customModels: "", customModels: "",
defaultModel: "", defaultModel: "",
// tts config
edgeTTSVoiceName: "zh-CN-YunxiNeural",
}; };
export const useAccessStore = createPersistStore( export const useAccessStore = createPersistStore(
@ -132,6 +135,12 @@ export const useAccessStore = createPersistStore(
return get().needCode; return get().needCode;
}, },
edgeVoiceName() {
this.fetch();
return get().edgeTTSVoiceName;
},
isValidOpenAI() { isValidOpenAI() {
return ensure(get(), ["openaiApiKey"]); return ensure(get(), ["openaiApiKey"]);
}, },
@ -204,8 +213,8 @@ export const useAccessStore = createPersistStore(
.then((res) => { .then((res) => {
// Set default model from env request // Set default model from env request
let defaultModel = res.defaultModel ?? ""; let defaultModel = res.defaultModel ?? "";
DEFAULT_CONFIG.modelConfig.model = if (defaultModel !== "")
defaultModel !== "" ? defaultModel : "gpt-3.5-turbo"; DEFAULT_CONFIG.modelConfig.model = defaultModel;
return res; return res;
}) })
.then((res: DangerConfig) => { .then((res: DangerConfig) => {

View File

@ -170,6 +170,28 @@ export const useChatStore = createPersistStore(
} }
const methods = { const methods = {
forkSession() {
// 获取当前会话
const currentSession = get().currentSession();
if (!currentSession) return;
const newSession = createEmptySession();
newSession.topic = currentSession.topic;
newSession.messages = [...currentSession.messages];
newSession.mask = {
...currentSession.mask,
modelConfig: {
...currentSession.mask.modelConfig,
},
};
set((state) => ({
currentSessionIndex: 0,
sessions: [newSession, ...state.sessions],
}));
},
clearSessions() { clearSessions() {
set(() => ({ set(() => ({
sessions: [createEmptySession()], sessions: [createEmptySession()],

View File

@ -5,12 +5,21 @@ import {
DEFAULT_INPUT_TEMPLATE, DEFAULT_INPUT_TEMPLATE,
DEFAULT_MODELS, DEFAULT_MODELS,
DEFAULT_SIDEBAR_WIDTH, DEFAULT_SIDEBAR_WIDTH,
DEFAULT_TTS_ENGINE,
DEFAULT_TTS_ENGINES,
DEFAULT_TTS_MODEL,
DEFAULT_TTS_MODELS,
DEFAULT_TTS_VOICE,
DEFAULT_TTS_VOICES,
StoreKey, StoreKey,
ServiceProvider, ServiceProvider,
} from "../constant"; } from "../constant";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
export type ModelType = (typeof DEFAULT_MODELS)[number]["name"]; export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number];
export enum SubmitKey { export enum SubmitKey {
Enter = "Enter", Enter = "Enter",
@ -68,11 +77,21 @@ export const DEFAULT_CONFIG = {
quality: "standard" as DalleQuality, quality: "standard" as DalleQuality,
style: "vivid" as DalleStyle, style: "vivid" as DalleStyle,
}, },
ttsConfig: {
enable: false,
autoplay: false,
engine: DEFAULT_TTS_ENGINE,
model: DEFAULT_TTS_MODEL,
voice: DEFAULT_TTS_VOICE,
speed: 1.0,
},
}; };
export type ChatConfig = typeof DEFAULT_CONFIG; export type ChatConfig = typeof DEFAULT_CONFIG;
export type ModelConfig = ChatConfig["modelConfig"]; export type ModelConfig = ChatConfig["modelConfig"];
export type TTSConfig = ChatConfig["ttsConfig"];
export function limitNumber( export function limitNumber(
x: number, x: number,
@ -87,6 +106,21 @@ export function limitNumber(
return Math.min(max, Math.max(min, x)); return Math.min(max, Math.max(min, x));
} }
export const TTSConfigValidator = {
engine(x: string) {
return x as TTSEngineType;
},
model(x: string) {
return x as TTSModelType;
},
voice(x: string) {
return x as TTSVoiceType;
},
speed(x: number) {
return limitNumber(x, 0.25, 4.0, 1.0);
},
};
export const ModalConfigValidator = { export const ModalConfigValidator = {
model(x: string) { model(x: string) {
return x as ModelType; return x as ModelType;
@ -143,6 +177,21 @@ export const useAppConfig = createPersistStore(
{ {
name: StoreKey.Config, name: StoreKey.Config,
version: 4, version: 4,
merge(persistedState, currentState) {
const state = persistedState as ChatConfig | undefined;
if (!state) return { ...currentState };
const models = currentState.models.slice();
state.models.forEach((pModel) => {
const idx = models.findIndex(
(v) => v.name === pModel.name && v.provider === pModel.provider,
);
if (idx !== -1) models[idx] = pModel;
else models.push(pModel);
});
return { ...currentState, ...state, models: models };
},
migrate(persistedState, version) { migrate(persistedState, version) {
const state = persistedState as ChatConfig; const state = persistedState as ChatConfig;

View File

@ -1,5 +1,4 @@
import OpenAPIClientAxios from "openapi-client-axios"; import OpenAPIClientAxios from "openapi-client-axios";
import { getLang, Lang } from "../locales";
import { StoreKey } from "../constant"; import { StoreKey } from "../constant";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";

View File

@ -1,7 +1,7 @@
import Fuse from "fuse.js"; import Fuse from "fuse.js";
import { getLang } from "../locales";
import { StoreKey } from "../constant";
import { nanoid } from "nanoid"; import { nanoid } from "nanoid";
import { StoreKey } from "../constant";
import { getLang } from "../locales";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
export interface Prompt { export interface Prompt {
@ -147,6 +147,11 @@ export const usePromptStore = createPersistStore(
}, },
onRehydrateStorage(state) { onRehydrateStorage(state) {
// Skip store rehydration on server side
if (typeof window === "undefined") {
return;
}
const PROMPT_URL = "./prompts.json"; const PROMPT_URL = "./prompts.json";
type PromptList = Array<[string, string]>; type PromptList = Array<[string, string]>;

View File

@ -1,5 +1,4 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { Updater } from "../typing";
import { ApiPath, STORAGE_KEY, StoreKey } from "../constant"; import { ApiPath, STORAGE_KEY, StoreKey } from "../constant";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
import { import {
@ -100,15 +99,17 @@ export const useSyncStore = createPersistStore(
const remoteState = await client.get(config.username); const remoteState = await client.get(config.username);
if (!remoteState || remoteState === "") { if (!remoteState || remoteState === "") {
await client.set(config.username, JSON.stringify(localState)); await client.set(config.username, JSON.stringify(localState));
console.log("[Sync] Remote state is empty, using local state instead."); console.log(
return "[Sync] Remote state is empty, using local state instead.",
);
return;
} else { } else {
const parsedRemoteState = JSON.parse( const parsedRemoteState = JSON.parse(
await client.get(config.username), await client.get(config.username),
) as AppState; ) as AppState;
mergeAppState(localState, parsedRemoteState); mergeAppState(localState, parsedRemoteState);
setLocalAppState(localState); setLocalAppState(localState);
} }
} catch (e) { } catch (e) {
console.log("[Sync] failed to get remote state", e); console.log("[Sync] failed to get remote state", e);
throw e; throw e;

View File

@ -8,8 +8,6 @@ import { getClientConfig } from "../config/client";
import { createPersistStore } from "../utils/store"; import { createPersistStore } from "../utils/store";
import ChatGptIcon from "../icons/chatgpt.png"; import ChatGptIcon from "../icons/chatgpt.png";
import Locale from "../locales"; import Locale from "../locales";
import { use } from "react";
import { useAppConfig } from ".";
import { ClientApi } from "../client/api"; import { ClientApi } from "../client/api";
const ONE_MINUTE = 60 * 1000; const ONE_MINUTE = 60 * 1000;

View File

@ -3,8 +3,7 @@ import { showToast } from "./components/ui-lib";
import Locale from "./locales"; import Locale from "./locales";
import { RequestMessage } from "./client/api"; import { RequestMessage } from "./client/api";
import { ServiceProvider, REQUEST_TIMEOUT_MS } from "./constant"; import { ServiceProvider, REQUEST_TIMEOUT_MS } from "./constant";
import isObject from "lodash-es/isObject"; import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http";
import { fetch as tauriFetch, Body, ResponseType } from "@tauri-apps/api/http";
export function trimTopic(topic: string) { export function trimTopic(topic: string) {
// Fix an issue where double quotes still show in the Indonesian language // Fix an issue where double quotes still show in the Indonesian language

45
app/utils/audio.ts Normal file
View File

@ -0,0 +1,45 @@
type TTSPlayer = {
init: () => void;
play: (audioBuffer: ArrayBuffer, onended: () => void | null) => Promise<void>;
stop: () => void;
};
export function createTTSPlayer(): TTSPlayer {
let audioContext: AudioContext | null = null;
let audioBufferSourceNode: AudioBufferSourceNode | null = null;
const init = () => {
audioContext = new (window.AudioContext || window.webkitAudioContext)();
audioContext.suspend();
};
const play = async (audioBuffer: ArrayBuffer, onended: () => void | null) => {
if (audioBufferSourceNode) {
audioBufferSourceNode.stop();
audioBufferSourceNode.disconnect();
}
const buffer = await audioContext!.decodeAudioData(audioBuffer);
audioBufferSourceNode = audioContext!.createBufferSource();
audioBufferSourceNode.buffer = buffer;
audioBufferSourceNode.connect(audioContext!.destination);
audioContext!.resume().then(() => {
audioBufferSourceNode!.start();
});
audioBufferSourceNode.onended = onended;
};
const stop = () => {
if (audioBufferSourceNode) {
audioBufferSourceNode.stop();
audioBufferSourceNode.disconnect();
audioBufferSourceNode = null;
}
if (audioContext) {
audioContext.close();
audioContext = null;
}
};
return { init, play, stop };
}

View File

@ -1,5 +1,5 @@
import { getClientConfig } from "../config/client"; import { getClientConfig } from "../config/client";
import { ApiPath, DEFAULT_API_HOST } from "../constant"; import { DEFAULT_API_HOST } from "../constant";
export function corsPath(path: string) { export function corsPath(path: string) {
const baseUrl = getClientConfig()?.isApp ? `${DEFAULT_API_HOST}` : ""; const baseUrl = getClientConfig()?.isApp ? `${DEFAULT_API_HOST}` : "";

391
app/utils/ms_edge_tts.ts Normal file
View File

@ -0,0 +1,391 @@
// import axios from "axios";
import { Buffer } from "buffer";
import { randomBytes } from "crypto";
import { Readable } from "stream";
// Modified according to https://github.com/Migushthe2nd/MsEdgeTTS
/**
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume
*/
export enum VOLUME {
SILENT = "silent",
X_SOFT = "x-soft",
SOFT = "soft",
MEDIUM = "medium",
LOUD = "loud",
X_LOUD = "x-LOUD",
DEFAULT = "default",
}
/**
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking
*/
export enum RATE {
X_SLOW = "x-slow",
SLOW = "slow",
MEDIUM = "medium",
FAST = "fast",
X_FAST = "x-fast",
DEFAULT = "default",
}
/**
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline
*/
export enum PITCH {
X_LOW = "x-low",
LOW = "low",
MEDIUM = "medium",
HIGH = "high",
X_HIGH = "x-high",
DEFAULT = "default",
}
/**
* Only a few of the [possible formats](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs) are accepted.
*/
export enum OUTPUT_FORMAT {
// Streaming =============================
// AMR_WB_16000HZ = "amr-wb-16000hz",
// AUDIO_16KHZ_16BIT_32KBPS_MONO_OPUS = "audio-16khz-16bit-32kbps-mono-opus",
// AUDIO_16KHZ_32KBITRATE_MONO_MP3 = "audio-16khz-32kbitrate-mono-mp3",
// AUDIO_16KHZ_64KBITRATE_MONO_MP3 = "audio-16khz-64kbitrate-mono-mp3",
// AUDIO_16KHZ_128KBITRATE_MONO_MP3 = "audio-16khz-128kbitrate-mono-mp3",
// AUDIO_24KHZ_16BIT_24KBPS_MONO_OPUS = "audio-24khz-16bit-24kbps-mono-opus",
// AUDIO_24KHZ_16BIT_48KBPS_MONO_OPUS = "audio-24khz-16bit-48kbps-mono-opus",
AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3",
AUDIO_24KHZ_96KBITRATE_MONO_MP3 = "audio-24khz-96kbitrate-mono-mp3",
// AUDIO_24KHZ_160KBITRATE_MONO_MP3 = "audio-24khz-160kbitrate-mono-mp3",
// AUDIO_48KHZ_96KBITRATE_MONO_MP3 = "audio-48khz-96kbitrate-mono-mp3",
// AUDIO_48KHZ_192KBITRATE_MONO_MP3 = "audio-48khz-192kbitrate-mono-mp3",
// OGG_16KHZ_16BIT_MONO_OPUS = "ogg-16khz-16bit-mono-opus",
// OGG_24KHZ_16BIT_MONO_OPUS = "ogg-24khz-16bit-mono-opus",
// OGG_48KHZ_16BIT_MONO_OPUS = "ogg-48khz-16bit-mono-opus",
// RAW_8KHZ_8BIT_MONO_ALAW = "raw-8khz-8bit-mono-alaw",
// RAW_8KHZ_8BIT_MONO_MULAW = "raw-8khz-8bit-mono-mulaw",
// RAW_8KHZ_16BIT_MONO_PCM = "raw-8khz-16bit-mono-pcm",
// RAW_16KHZ_16BIT_MONO_PCM = "raw-16khz-16bit-mono-pcm",
// RAW_16KHZ_16BIT_MONO_TRUESILK = "raw-16khz-16bit-mono-truesilk",
// RAW_22050HZ_16BIT_MONO_PCM = "raw-22050hz-16bit-mono-pcm",
// RAW_24KHZ_16BIT_MONO_PCM = "raw-24khz-16bit-mono-pcm",
// RAW_24KHZ_16BIT_MONO_TRUESILK = "raw-24khz-16bit-mono-truesilk",
// RAW_44100HZ_16BIT_MONO_PCM = "raw-44100hz-16bit-mono-pcm",
// RAW_48KHZ_16BIT_MONO_PCM = "raw-48khz-16bit-mono-pcm",
// WEBM_16KHZ_16BIT_MONO_OPUS = "webm-16khz-16bit-mono-opus",
// WEBM_24KHZ_16BIT_24KBPS_MONO_OPUS = "webm-24khz-16bit-24kbps-mono-opus",
WEBM_24KHZ_16BIT_MONO_OPUS = "webm-24khz-16bit-mono-opus",
// Non-streaming =============================
// RIFF_8KHZ_8BIT_MONO_ALAW = "riff-8khz-8bit-mono-alaw",
// RIFF_8KHZ_8BIT_MONO_MULAW = "riff-8khz-8bit-mono-mulaw",
// RIFF_8KHZ_16BIT_MONO_PCM = "riff-8khz-16bit-mono-pcm",
// RIFF_22050HZ_16BIT_MONO_PCM = "riff-22050hz-16bit-mono-pcm",
// RIFF_24KHZ_16BIT_MONO_PCM = "riff-24khz-16bit-mono-pcm",
// RIFF_44100HZ_16BIT_MONO_PCM = "riff-44100hz-16bit-mono-pcm",
// RIFF_48KHZ_16BIT_MONO_PCM = "riff-48khz-16bit-mono-pcm",
}
export type Voice = {
Name: string;
ShortName: string;
Gender: string;
Locale: string;
SuggestedCodec: string;
FriendlyName: string;
Status: string;
};
export class ProsodyOptions {
/**
* The pitch to use.
* Can be any {@link PITCH}, or a relative frequency in Hz (+50Hz), a relative semitone (+2st), or a relative percentage (+50%).
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline)
*/
pitch?: PITCH | string = "+0Hz";
/**
* The rate to use.
* Can be any {@link RATE}, or a relative number (0.5), or string with a relative percentage (+50%).
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking)
*/
rate?: RATE | string | number = 1.0;
/**
* The volume to use.
* Can be any {@link VOLUME}, or an absolute number (0, 100), a string with a relative number (+50), or a relative percentage (+50%).
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume)
*/
volume?: VOLUME | string | number = 100.0;
}
export class MsEdgeTTS {
static OUTPUT_FORMAT = OUTPUT_FORMAT;
private static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
private static VOICES_URL = `https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
private static SYNTH_URL = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`;
private static BINARY_DELIM = "Path:audio\r\n";
private static VOICE_LANG_REGEX = /\w{2}-\w{2}/;
private readonly _enableLogger;
private _ws: WebSocket | undefined;
private _voice: any;
private _voiceLocale: any;
private _outputFormat: any;
private _streams: { [key: string]: Readable } = {};
private _startTime = 0;
private _log(...o: any[]) {
if (this._enableLogger) {
console.log(...o);
}
}
/**
* Create a new `MsEdgeTTS` instance.
*
* @param agent (optional, **NOT SUPPORTED IN BROWSER**) Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
* @param enableLogger=false whether to enable the built-in logger. This logs connections inits, disconnects, and incoming data to the console
*/
public constructor(enableLogger: boolean = false) {
this._enableLogger = enableLogger;
}
private async _send(message: any) {
for (let i = 1; i <= 3 && this._ws!.readyState !== this._ws!.OPEN; i++) {
if (i == 1) {
this._startTime = Date.now();
}
this._log("connecting: ", i);
await this._initClient();
}
this._ws!.send(message);
}
private _initClient() {
this._ws = new WebSocket(MsEdgeTTS.SYNTH_URL);
this._ws.binaryType = "arraybuffer";
return new Promise((resolve, reject) => {
this._ws!.onopen = () => {
this._log(
"Connected in",
(Date.now() - this._startTime) / 1000,
"seconds",
);
this._send(
`Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
{
"context": {
"synthesis": {
"audio": {
"metadataoptions": {
"sentenceBoundaryEnabled": "false",
"wordBoundaryEnabled": "false"
},
"outputFormat": "${this._outputFormat}"
}
}
}
}
`,
).then(resolve);
};
this._ws!.onmessage = (m: any) => {
const buffer = Buffer.from(m.data as ArrayBuffer);
const message = buffer.toString();
const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)![1];
if (message.includes("Path:turn.start")) {
// start of turn, ignore
} else if (message.includes("Path:turn.end")) {
// end of turn, close stream
this._streams[requestId].push(null);
} else if (message.includes("Path:response")) {
// context response, ignore
} else if (
message.includes("Path:audio") &&
m.data instanceof ArrayBuffer
) {
this._pushAudioData(buffer, requestId);
} else {
this._log("UNKNOWN MESSAGE", message);
}
};
this._ws!.onclose = () => {
this._log(
"disconnected after:",
(Date.now() - this._startTime) / 1000,
"seconds",
);
for (const requestId in this._streams) {
this._streams[requestId].push(null);
}
};
this._ws!.onerror = function (error: any) {
reject("Connect Error: " + error);
};
});
}
private _pushAudioData(audioBuffer: Buffer, requestId: string) {
const audioStartIndex =
audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) +
MsEdgeTTS.BINARY_DELIM.length;
const audioData = audioBuffer.subarray(audioStartIndex);
this._streams[requestId].push(audioData);
this._log("received audio chunk, size: ", audioData?.length);
}
private _SSMLTemplate(input: string, options: ProsodyOptions = {}): string {
// in case future updates to the edge API block these elements, we'll be concatenating strings.
options = { ...new ProsodyOptions(), ...options };
return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${this._voiceLocale}">
<voice name="${this._voice}">
<prosody pitch="${options.pitch}" rate="${options.rate}" volume="${options.volume}">
${input}
</prosody>
</voice>
</speak>`;
}
/**
* Fetch the list of voices available in Microsoft Edge.
* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
*/
// getVoices(): Promise<Voice[]> {
// return new Promise((resolve, reject) => {
// axios
// .get(MsEdgeTTS.VOICES_URL)
// .then((res) => resolve(res.data))
// .catch(reject);
// });
// }
getVoices(): Promise<Voice[]> {
return fetch(MsEdgeTTS.VOICES_URL)
.then((response) => {
if (!response.ok) {
throw new Error("Network response was not ok");
}
return response.json();
})
.then((data) => data as Voice[])
.catch((error) => {
throw error;
});
}
/**
* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
* Must be called at least once before text can be synthesised.
* Saved in this instance. Can be called at any time times to update the metadata.
*
* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices)
* @param outputFormat any {@link OUTPUT_FORMAT}
* @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`
*/
async setMetadata(
voiceName: string,
outputFormat: OUTPUT_FORMAT,
voiceLocale?: string,
) {
const oldVoice = this._voice;
const oldVoiceLocale = this._voiceLocale;
const oldOutputFormat = this._outputFormat;
this._voice = voiceName;
this._voiceLocale = voiceLocale;
if (!this._voiceLocale) {
const voiceLangMatch = MsEdgeTTS.VOICE_LANG_REGEX.exec(this._voice);
if (!voiceLangMatch)
throw new Error("Could not infer voiceLocale from voiceName!");
this._voiceLocale = voiceLangMatch[0];
}
this._outputFormat = outputFormat;
const changed =
oldVoice !== this._voice ||
oldVoiceLocale !== this._voiceLocale ||
oldOutputFormat !== this._outputFormat;
// create new client
if (changed || this._ws!.readyState !== this._ws!.OPEN) {
this._startTime = Date.now();
await this._initClient();
}
}
private _metadataCheck() {
if (!this._ws)
throw new Error(
"Speech synthesis not configured yet. Run setMetadata before calling toStream or toFile.",
);
}
/**
* Close the WebSocket connection.
*/
close() {
this._ws!.close();
}
/**
* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
*
* @param input the text to synthesise. Can include SSML elements.
* @param options (optional) {@link ProsodyOptions}
* @returns {Readable} - a `stream.Readable` with the audio data
*/
toStream(input: string, options?: ProsodyOptions): Readable {
const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options));
return stream;
}
toArrayBuffer(input: string, options?: ProsodyOptions): Promise<ArrayBuffer> {
return new Promise((resolve, reject) => {
let data: Uint8Array[] = [];
const readable = this.toStream(input, options);
readable.on("data", (chunk) => {
data.push(chunk);
});
readable.on("end", () => {
resolve(Buffer.concat(data).buffer);
});
readable.on("error", (err) => {
reject(err);
});
});
}
/**
* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
*
* @param requestSSML the SSML to send. SSML elements required in order to work.
* @returns {Readable} - a `stream.Readable` with the audio data
*/
rawToStream(requestSSML: string): Readable {
const { stream } = this._rawSSMLRequest(requestSSML);
return stream;
}
private _rawSSMLRequest(requestSSML: string): {
stream: Readable;
requestId: string;
} {
this._metadataCheck();
const requestId = randomBytes(16).toString("hex");
const request =
`X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
` + requestSSML.trim();
// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
const self = this;
const stream = new Readable({
read() {},
destroy(error: Error | null, callback: (error: Error | null) => void) {
delete self._streams[requestId];
callback(error);
},
});
this._streams[requestId] = stream;
this._send(request).then();
return { stream, requestId };
}
}

View File

@ -32,6 +32,7 @@
"idb-keyval": "^6.2.1", "idb-keyval": "^6.2.1",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"mermaid": "^10.6.1", "mermaid": "^10.6.1",
"markdown-to-txt": "^2.0.1",
"nanoid": "^5.0.3", "nanoid": "^5.0.3",
"next": "^14.1.1", "next": "^14.1.1",
"node-fetch": "^3.3.1", "node-fetch": "^3.3.1",
@ -66,6 +67,7 @@
"eslint-config-next": "13.4.19", "eslint-config-next": "13.4.19",
"eslint-config-prettier": "^8.8.0", "eslint-config-prettier": "^8.8.0",
"eslint-plugin-prettier": "^5.1.3", "eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-unused-imports": "^3.2.0",
"husky": "^8.0.0", "husky": "^8.0.0",
"lint-staged": "^13.2.2", "lint-staged": "^13.2.2",
"prettier": "^3.0.2", "prettier": "^3.0.2",
@ -78,4 +80,4 @@
"lint-staged/yaml": "^2.2.2" "lint-staged/yaml": "^2.2.2"
}, },
"packageManager": "yarn@1.22.19" "packageManager": "yarn@1.22.19"
} }

View File

@ -3367,6 +3367,18 @@ eslint-plugin-react@^7.31.7:
semver "^6.3.0" semver "^6.3.0"
string.prototype.matchall "^4.0.8" string.prototype.matchall "^4.0.8"
eslint-plugin-unused-imports@^3.2.0:
version "3.2.0"
resolved "https://registry.yarnpkg.com/eslint-plugin-unused-imports/-/eslint-plugin-unused-imports-3.2.0.tgz#63a98c9ad5f622cd9f830f70bc77739f25ccfe0d"
integrity sha512-6uXyn6xdINEpxE1MtDjxQsyXB37lfyO2yKGVVgtD7WEWQGORSOZjgrD6hBhvGv4/SO+TOlS+UnC6JppRqbuwGQ==
dependencies:
eslint-rule-composer "^0.3.0"
eslint-rule-composer@^0.3.0:
version "0.3.0"
resolved "https://registry.yarnpkg.com/eslint-rule-composer/-/eslint-rule-composer-0.3.0.tgz#79320c927b0c5c0d3d3d2b76c8b4a488f25bbaf9"
integrity sha512-bt+Sh8CtDmn2OajxvNO+BX7Wn4CIWMpTRm3MaiKPCQcnnlm0CS2mhui6QaoeQugs+3Kj2ESKEEGJUdVafwhiCg==
eslint-scope@5.1.1: eslint-scope@5.1.1:
version "5.1.1" version "5.1.1"
resolved "https://registry.npmmirror.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" resolved "https://registry.npmmirror.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
@ -4443,11 +4455,21 @@ lodash.debounce@^4.0.8:
resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af" resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af"
integrity sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow== integrity sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==
lodash.escape@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/lodash.escape/-/lodash.escape-4.0.1.tgz#c9044690c21e04294beaa517712fded1fa88de98"
integrity sha512-nXEOnb/jK9g0DYMr1/Xvq6l5xMD7GDG55+GSYIYmS0G4tBk/hURD4JR9WCavs04t33WmJx9kCyp9vJ+mr4BOUw==
lodash.merge@^4.6.2: lodash.merge@^4.6.2:
version "4.6.2" version "4.6.2"
resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a"
integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ== integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==
lodash.unescape@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/lodash.unescape/-/lodash.unescape-4.0.1.tgz#bf2249886ce514cda112fae9218cdc065211fc9c"
integrity sha512-DhhGRshNS1aX6s5YdBE3njCCouPgnG29ebyHvImlZzXZf2SHgt+J08DHgytTPnpywNbO1Y8mNUFyQuIDBq2JZg==
lodash@^4.17.21: lodash@^4.17.21:
version "4.17.21" version "4.17.21"
resolved "https://registry.npmmirror.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" resolved "https://registry.npmmirror.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
@ -4503,6 +4525,20 @@ markdown-table@^3.0.0:
resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd" resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd"
integrity sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw== integrity sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw==
markdown-to-txt@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/markdown-to-txt/-/markdown-to-txt-2.0.1.tgz#bfd6233a2635443cc24900a158b60c6af36ce9c5"
integrity sha512-Hsj7KTN8k1gutlLum3vosHwVZGnv8/cbYKWVkUyo/D1rzOYddbDesILebRfOsaVfjIBJank/AVOySBlHAYqfZw==
dependencies:
lodash.escape "^4.0.1"
lodash.unescape "^4.0.1"
marked "^4.0.14"
marked@^4.0.14:
version "4.3.0"
resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3"
integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==
mdast-util-definitions@^5.0.0: mdast-util-definitions@^5.0.0:
version "5.1.2" version "5.1.2"
resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz#9910abb60ac5d7115d6819b57ae0bcef07a3f7a7" resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz#9910abb60ac5d7115d6819b57ae0bcef07a3f7a7"