mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-10-01 07:36:39 +08:00
Merge fe484fd38a
into 557a2cce35
This commit is contained in:
commit
d6b770cb9b
1
.yarnrc.yml
Normal file
1
.yarnrc.yml
Normal file
@ -0,0 +1 @@
|
|||||||
|
nodeLinker: node-modules
|
@ -107,7 +107,8 @@ export interface LLMModelProvider {
|
|||||||
|
|
||||||
export abstract class LLMApi {
|
export abstract class LLMApi {
|
||||||
abstract chat(options: ChatOptions): Promise<void>;
|
abstract chat(options: ChatOptions): Promise<void>;
|
||||||
abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
|
abstract speech(options: SpeechOptions): Promise<ArrayBuffer | AudioBuffer>;
|
||||||
|
abstract streamSpeech?(options: SpeechOptions): AsyncGenerator<AudioBuffer>;
|
||||||
abstract usage(): Promise<LLMUsage>;
|
abstract usage(): Promise<LLMUsage>;
|
||||||
abstract models(): Promise<LLMModel[]>;
|
abstract models(): Promise<LLMModel[]>;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
"use client";
|
"use client";
|
||||||
import { ApiPath, Alibaba, ALIBABA_BASE_URL } from "@/app/constant";
|
import {
|
||||||
|
ApiPath,
|
||||||
|
Alibaba,
|
||||||
|
ALIBABA_BASE_URL,
|
||||||
|
REQUEST_TIMEOUT_MS,
|
||||||
|
} from "@/app/constant";
|
||||||
import {
|
import {
|
||||||
useAccessStore,
|
useAccessStore,
|
||||||
useAppConfig,
|
useAppConfig,
|
||||||
@ -59,6 +64,7 @@ interface RequestPayload {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export class QwenApi implements LLMApi {
|
export class QwenApi implements LLMApi {
|
||||||
|
private audioContext?: AudioContext;
|
||||||
path(path: string): string {
|
path(path: string): string {
|
||||||
const accessStore = useAccessStore.getState();
|
const accessStore = useAccessStore.getState();
|
||||||
|
|
||||||
@ -89,10 +95,72 @@ export class QwenApi implements LLMApi {
|
|||||||
return res?.output?.choices?.at(0)?.message?.content ?? "";
|
return res?.output?.choices?.at(0)?.message?.content ?? "";
|
||||||
}
|
}
|
||||||
|
|
||||||
speech(options: SpeechOptions): Promise<ArrayBuffer> {
|
async speech(options: SpeechOptions): Promise<ArrayBuffer> {
|
||||||
throw new Error("Method not implemented.");
|
throw new Error("Method not implemented.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async *streamSpeech(options: SpeechOptions): AsyncGenerator<AudioBuffer> {
|
||||||
|
const requestPayload = {
|
||||||
|
model: options.model,
|
||||||
|
input: {
|
||||||
|
text: options.input,
|
||||||
|
voice: options.voice,
|
||||||
|
},
|
||||||
|
speed: options.speed,
|
||||||
|
response_format: options.response_format,
|
||||||
|
};
|
||||||
|
const controller = new AbortController();
|
||||||
|
options.onController?.(controller);
|
||||||
|
try {
|
||||||
|
const speechPath = this.path(Alibaba.SpeechPath);
|
||||||
|
const speechPayload = {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify(requestPayload),
|
||||||
|
signal: controller.signal,
|
||||||
|
headers: {
|
||||||
|
...getHeaders(),
|
||||||
|
"X-DashScope-SSE": "enable",
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// make a fetch request
|
||||||
|
const requestTimeoutId = setTimeout(
|
||||||
|
() => controller.abort(),
|
||||||
|
REQUEST_TIMEOUT_MS,
|
||||||
|
);
|
||||||
|
|
||||||
|
const res = await fetch(speechPath, speechPayload);
|
||||||
|
|
||||||
|
const reader = res.body!.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = "";
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
const lines = buffer.split("\n");
|
||||||
|
buffer = lines.pop() || "";
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (line.startsWith("data:")) {
|
||||||
|
const data = line.slice(5);
|
||||||
|
const json = JSON.parse(data);
|
||||||
|
if (json.output.audio.data) {
|
||||||
|
yield this.PCMBase64ToAudioBuffer(json.output.audio.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clearTimeout(requestTimeoutId);
|
||||||
|
reader.releaseLock();
|
||||||
|
} catch (e) {
|
||||||
|
console.log("[Request] failed to make a speech request", e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async chat(options: ChatOptions) {
|
async chat(options: ChatOptions) {
|
||||||
const modelConfig = {
|
const modelConfig = {
|
||||||
...useAppConfig.getState().modelConfig,
|
...useAppConfig.getState().modelConfig,
|
||||||
@ -273,5 +341,75 @@ export class QwenApi implements LLMApi {
|
|||||||
async models(): Promise<LLMModel[]> {
|
async models(): Promise<LLMModel[]> {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 播放 PCM base64 数据
|
||||||
|
private async PCMBase64ToAudioBuffer(base64Data: string) {
|
||||||
|
try {
|
||||||
|
// 解码 base64
|
||||||
|
const binaryString = atob(base64Data);
|
||||||
|
const bytes = new Uint8Array(binaryString.length);
|
||||||
|
for (let i = 0; i < binaryString.length; i++) {
|
||||||
|
bytes[i] = binaryString.charCodeAt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 转换为 AudioBuffer
|
||||||
|
const audioBuffer = await this.convertToAudioBuffer(bytes);
|
||||||
|
|
||||||
|
return audioBuffer;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("播放 PCM 数据失败:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将 PCM 字节数据转换为 AudioBuffer
|
||||||
|
private convertToAudioBuffer(pcmData: Uint8Array) {
|
||||||
|
if (!this.audioContext) {
|
||||||
|
this.audioContext = new (window.AudioContext ||
|
||||||
|
window.webkitAudioContext)();
|
||||||
|
}
|
||||||
|
const audioContext = this.audioContext;
|
||||||
|
const channels = 1;
|
||||||
|
const sampleRate = 24000;
|
||||||
|
return new Promise<AudioBuffer>((resolve, reject) => {
|
||||||
|
try {
|
||||||
|
let float32Array;
|
||||||
|
// 16位 PCM 转换为 32位浮点数
|
||||||
|
float32Array = this.pcm16ToFloat32(pcmData);
|
||||||
|
|
||||||
|
// 创建 AudioBuffer
|
||||||
|
const audioBuffer = audioContext.createBuffer(
|
||||||
|
channels,
|
||||||
|
float32Array.length / channels,
|
||||||
|
sampleRate,
|
||||||
|
);
|
||||||
|
|
||||||
|
// 复制数据到 AudioBuffer
|
||||||
|
for (let channel = 0; channel < channels; channel++) {
|
||||||
|
const channelData = audioBuffer.getChannelData(channel);
|
||||||
|
for (let i = 0; i < channelData.length; i++) {
|
||||||
|
channelData[i] = float32Array[i * channels + channel];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve(audioBuffer);
|
||||||
|
} catch (error) {
|
||||||
|
reject(error);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// 16位 PCM 转 32位浮点数
|
||||||
|
private pcm16ToFloat32(pcmData: Uint8Array) {
|
||||||
|
const length = pcmData.length / 2;
|
||||||
|
const float32Array = new Float32Array(length);
|
||||||
|
|
||||||
|
for (let i = 0; i < length; i++) {
|
||||||
|
const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2];
|
||||||
|
const int16Signed = int16 > 32767 ? int16 - 65536 : int16;
|
||||||
|
float32Array[i] = int16Signed / 32768;
|
||||||
|
}
|
||||||
|
|
||||||
|
return float32Array;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
export { Alibaba };
|
export { Alibaba };
|
||||||
|
@ -101,8 +101,6 @@ import {
|
|||||||
import { useNavigate } from "react-router-dom";
|
import { useNavigate } from "react-router-dom";
|
||||||
import {
|
import {
|
||||||
CHAT_PAGE_SIZE,
|
CHAT_PAGE_SIZE,
|
||||||
DEFAULT_TTS_ENGINE,
|
|
||||||
ModelProvider,
|
|
||||||
Path,
|
Path,
|
||||||
REQUEST_TIMEOUT_MS,
|
REQUEST_TIMEOUT_MS,
|
||||||
ServiceProvider,
|
ServiceProvider,
|
||||||
@ -1286,6 +1284,7 @@ function _Chat() {
|
|||||||
const accessStore = useAccessStore();
|
const accessStore = useAccessStore();
|
||||||
const [speechStatus, setSpeechStatus] = useState(false);
|
const [speechStatus, setSpeechStatus] = useState(false);
|
||||||
const [speechLoading, setSpeechLoading] = useState(false);
|
const [speechLoading, setSpeechLoading] = useState(false);
|
||||||
|
const [speechCooldown, setSpeechCooldown] = useState(false);
|
||||||
|
|
||||||
async function openaiSpeech(text: string) {
|
async function openaiSpeech(text: string) {
|
||||||
if (speechStatus) {
|
if (speechStatus) {
|
||||||
@ -1293,14 +1292,14 @@ function _Chat() {
|
|||||||
setSpeechStatus(false);
|
setSpeechStatus(false);
|
||||||
} else {
|
} else {
|
||||||
var api: ClientApi;
|
var api: ClientApi;
|
||||||
api = new ClientApi(ModelProvider.GPT);
|
|
||||||
const config = useAppConfig.getState();
|
const config = useAppConfig.getState();
|
||||||
|
api = new ClientApi(config.ttsConfig.modelProvider);
|
||||||
setSpeechLoading(true);
|
setSpeechLoading(true);
|
||||||
ttsPlayer.init();
|
ttsPlayer.init();
|
||||||
let audioBuffer: ArrayBuffer;
|
let audioBuffer: ArrayBuffer | AudioBuffer;
|
||||||
const { markdownToTxt } = require("markdown-to-txt");
|
const { markdownToTxt } = require("markdown-to-txt");
|
||||||
const textContent = markdownToTxt(text);
|
const textContent = markdownToTxt(text);
|
||||||
if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) {
|
if (config.ttsConfig.engine === "Edge") {
|
||||||
const edgeVoiceName = accessStore.edgeVoiceName();
|
const edgeVoiceName = accessStore.edgeVoiceName();
|
||||||
const tts = new MsEdgeTTS();
|
const tts = new MsEdgeTTS();
|
||||||
await tts.setMetadata(
|
await tts.setMetadata(
|
||||||
@ -1308,6 +1307,33 @@ function _Chat() {
|
|||||||
OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
|
OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3,
|
||||||
);
|
);
|
||||||
audioBuffer = await tts.toArrayBuffer(textContent);
|
audioBuffer = await tts.toArrayBuffer(textContent);
|
||||||
|
playSpeech(audioBuffer);
|
||||||
|
} else {
|
||||||
|
if (api.llm.streamSpeech) {
|
||||||
|
// 使用流式播放,边接收边播放
|
||||||
|
setSpeechStatus(true);
|
||||||
|
ttsPlayer.startStreamPlay(() => {
|
||||||
|
setSpeechStatus(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
for await (const chunk of api.llm.streamSpeech({
|
||||||
|
model: config.ttsConfig.model,
|
||||||
|
input: textContent,
|
||||||
|
voice: config.ttsConfig.voice,
|
||||||
|
speed: config.ttsConfig.speed,
|
||||||
|
})) {
|
||||||
|
ttsPlayer.addToQueue(chunk);
|
||||||
|
}
|
||||||
|
ttsPlayer.finishStreamPlay();
|
||||||
|
} catch (e) {
|
||||||
|
console.error("[Stream Speech]", e);
|
||||||
|
showToast(prettyObject(e));
|
||||||
|
setSpeechStatus(false);
|
||||||
|
ttsPlayer.stop();
|
||||||
|
} finally {
|
||||||
|
setSpeechLoading(false);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
audioBuffer = await api.llm.speech({
|
audioBuffer = await api.llm.speech({
|
||||||
model: config.ttsConfig.model,
|
model: config.ttsConfig.model,
|
||||||
@ -1315,7 +1341,13 @@ function _Chat() {
|
|||||||
voice: config.ttsConfig.voice,
|
voice: config.ttsConfig.voice,
|
||||||
speed: config.ttsConfig.speed,
|
speed: config.ttsConfig.speed,
|
||||||
});
|
});
|
||||||
|
playSpeech(audioBuffer);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function playSpeech(audioBuffer: ArrayBuffer | AudioBuffer) {
|
||||||
setSpeechStatus(true);
|
setSpeechStatus(true);
|
||||||
ttsPlayer
|
ttsPlayer
|
||||||
.play(audioBuffer, () => {
|
.play(audioBuffer, () => {
|
||||||
@ -1328,7 +1360,6 @@ function _Chat() {
|
|||||||
})
|
})
|
||||||
.finally(() => setSpeechLoading(false));
|
.finally(() => setSpeechLoading(false));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const context: RenderMessage[] = useMemo(() => {
|
const context: RenderMessage[] = useMemo(() => {
|
||||||
return session.mask.hideContext ? [] : session.mask.context.slice();
|
return session.mask.hideContext ? [] : session.mask.context.slice();
|
||||||
|
@ -3,10 +3,9 @@ import { TTSConfig, TTSConfigValidator } from "../store";
|
|||||||
import Locale from "../locales";
|
import Locale from "../locales";
|
||||||
import { ListItem, Select } from "./ui-lib";
|
import { ListItem, Select } from "./ui-lib";
|
||||||
import {
|
import {
|
||||||
DEFAULT_TTS_ENGINE,
|
ServiceProvider,
|
||||||
DEFAULT_TTS_ENGINES,
|
TTS_CONFIGS,
|
||||||
DEFAULT_TTS_MODELS,
|
TTSEngineType
|
||||||
DEFAULT_TTS_VOICES,
|
|
||||||
} from "../constant";
|
} from "../constant";
|
||||||
import { InputRange } from "./input-range";
|
import { InputRange } from "./input-range";
|
||||||
|
|
||||||
@ -48,22 +47,33 @@ export function TTSConfigList(props: {
|
|||||||
<Select
|
<Select
|
||||||
value={props.ttsConfig.engine}
|
value={props.ttsConfig.engine}
|
||||||
onChange={(e) => {
|
onChange={(e) => {
|
||||||
|
const newEngine = e.currentTarget.value as TTSEngineType;
|
||||||
props.updateConfig(
|
props.updateConfig(
|
||||||
(config) =>
|
(config) => {
|
||||||
(config.engine = TTSConfigValidator.engine(
|
config.engine = TTSConfigValidator.engine(newEngine);
|
||||||
e.currentTarget.value,
|
const engineConfig = TTS_CONFIGS[newEngine];
|
||||||
)),
|
config.model = TTSConfigValidator.model(
|
||||||
|
engineConfig.Model[0] || ""
|
||||||
|
);
|
||||||
|
config.voice = TTSConfigValidator.voice(
|
||||||
|
engineConfig.Voices[0] || ""
|
||||||
|
);
|
||||||
|
config.modelProvider = TTSConfigValidator.modelProvider(
|
||||||
|
engineConfig.ModelProvider
|
||||||
|
);
|
||||||
|
}
|
||||||
);
|
);
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{DEFAULT_TTS_ENGINES.map((v, i) => (
|
{Object.keys(TTS_CONFIGS).map((v, i) => (
|
||||||
<option value={v} key={i}>
|
<option value={v} key={i}>
|
||||||
{v}
|
{v}-TTS
|
||||||
</option>
|
</option>
|
||||||
))}
|
))}
|
||||||
</Select>
|
</Select>
|
||||||
</ListItem>
|
</ListItem>
|
||||||
{props.ttsConfig.engine === DEFAULT_TTS_ENGINE && (
|
{(props.ttsConfig.engine === ServiceProvider.OpenAI ||
|
||||||
|
props.ttsConfig.engine === ServiceProvider.Alibaba) && (
|
||||||
<>
|
<>
|
||||||
<ListItem title={Locale.Settings.TTS.Model}>
|
<ListItem title={Locale.Settings.TTS.Model}>
|
||||||
<Select
|
<Select
|
||||||
@ -77,7 +87,7 @@ export function TTSConfigList(props: {
|
|||||||
);
|
);
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{DEFAULT_TTS_MODELS.map((v, i) => (
|
{TTS_CONFIGS[props.ttsConfig.engine]!.Model.map((v, i) => (
|
||||||
<option value={v} key={i}>
|
<option value={v} key={i}>
|
||||||
{v}
|
{v}
|
||||||
</option>
|
</option>
|
||||||
@ -99,7 +109,7 @@ export function TTSConfigList(props: {
|
|||||||
);
|
);
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{DEFAULT_TTS_VOICES.map((v, i) => (
|
{TTS_CONFIGS[props.ttsConfig.engine]!.Voices.map((v, i) => (
|
||||||
<option value={v} key={i}>
|
<option value={v} key={i}>
|
||||||
{v}
|
{v}
|
||||||
</option>
|
</option>
|
||||||
|
@ -232,6 +232,7 @@ export const Alibaba = {
|
|||||||
}
|
}
|
||||||
return `v1/services/aigc/text-generation/generation`;
|
return `v1/services/aigc/text-generation/generation`;
|
||||||
},
|
},
|
||||||
|
SpeechPath: "v1/services/aigc/multimodal-generation/generation",
|
||||||
};
|
};
|
||||||
|
|
||||||
export const Tencent = {
|
export const Tencent = {
|
||||||
@ -461,19 +462,49 @@ export const KnowledgeCutOffDate: Record<string, string> = {
|
|||||||
"deepseek-coder": "2024-07",
|
"deepseek-coder": "2024-07",
|
||||||
};
|
};
|
||||||
|
|
||||||
export const DEFAULT_TTS_ENGINE = "OpenAI-TTS";
|
export const DEFAULT_TTS_ENGINE = ServiceProvider.OpenAI;
|
||||||
export const DEFAULT_TTS_ENGINES = ["OpenAI-TTS", "Edge-TTS"];
|
|
||||||
export const DEFAULT_TTS_MODEL = "tts-1";
|
export const DEFAULT_TTS_MODEL = "tts-1";
|
||||||
export const DEFAULT_TTS_VOICE = "alloy";
|
export const DEFAULT_TTS_VOICE = "alloy";
|
||||||
export const DEFAULT_TTS_MODELS = ["tts-1", "tts-1-hd"];
|
|
||||||
export const DEFAULT_TTS_VOICES = [
|
export const OPENAI_TTS = {
|
||||||
"alloy",
|
Provider: ServiceProvider.OpenAI,
|
||||||
"echo",
|
ModelProvider: ModelProvider.GPT,
|
||||||
"fable",
|
Model: ["tts-1", "tts-1-hd"],
|
||||||
"onyx",
|
Voices: ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
||||||
"nova",
|
} as const;
|
||||||
"shimmer",
|
|
||||||
];
|
export const ALIBABA_TTS = {
|
||||||
|
Provider: ServiceProvider.Alibaba,
|
||||||
|
ModelProvider: ModelProvider.Qwen,
|
||||||
|
Model: ["qwen-tts", "qwen-tts-latest"],
|
||||||
|
Voices: ["Chelsie", "Cherry", "Ethan", "Serena", "Dylan", "Jada", "Sunny"],
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export const EDGE_TTS = {
|
||||||
|
Provider: "Edge" as const,
|
||||||
|
ModelProvider: ModelProvider.GPT,
|
||||||
|
Model: [] as string[],
|
||||||
|
Voices: [] as string[],
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export type TTSEngineType = ServiceProvider.OpenAI | ServiceProvider.Alibaba | "Edge";
|
||||||
|
|
||||||
|
export const DEFAULT_TTS_ENGINES = [ServiceProvider.OpenAI, ServiceProvider.Alibaba, "Edge"] as const;
|
||||||
|
export const DEFAULT_TTS_MODELS = [...OPENAI_TTS.Model, ...ALIBABA_TTS.Model] as const;
|
||||||
|
export const DEFAULT_TTS_VOICES = [...OPENAI_TTS.Voices, ...ALIBABA_TTS.Voices] as const;
|
||||||
|
|
||||||
|
interface TTSConfigItem {
|
||||||
|
Provider: ServiceProvider | "Edge";
|
||||||
|
Model: readonly string[];
|
||||||
|
Voices: readonly string[];
|
||||||
|
ModelProvider: ModelProvider;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const TTS_CONFIGS: Record<TTSEngineType, TTSConfigItem> = {
|
||||||
|
[ServiceProvider.OpenAI]: OPENAI_TTS,
|
||||||
|
[ServiceProvider.Alibaba]: ALIBABA_TTS,
|
||||||
|
Edge: EDGE_TTS,
|
||||||
|
} as const;
|
||||||
|
|
||||||
export const VISION_MODEL_REGEXES = [
|
export const VISION_MODEL_REGEXES = [
|
||||||
/vision/,
|
/vision/,
|
||||||
@ -920,3 +951,4 @@ export const DEFAULT_GA_ID = "G-89WN60ZK2E";
|
|||||||
|
|
||||||
export const SAAS_CHAT_URL = "https://nextchat.club";
|
export const SAAS_CHAT_URL = "https://nextchat.club";
|
||||||
export const SAAS_CHAT_UTM_URL = "https://nextchat.club?utm=github";
|
export const SAAS_CHAT_UTM_URL = "https://nextchat.club?utm=github";
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@ import {
|
|||||||
DEFAULT_TTS_VOICES,
|
DEFAULT_TTS_VOICES,
|
||||||
StoreKey,
|
StoreKey,
|
||||||
ServiceProvider,
|
ServiceProvider,
|
||||||
|
TTSEngineType,
|
||||||
|
ModelProvider,
|
||||||
} from "../constant";
|
} from "../constant";
|
||||||
import { createPersistStore } from "../utils/store";
|
import { createPersistStore } from "../utils/store";
|
||||||
import type { Voice } from "rt-client";
|
import type { Voice } from "rt-client";
|
||||||
@ -20,7 +22,6 @@ import type { Voice } from "rt-client";
|
|||||||
export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
|
export type ModelType = (typeof DEFAULT_MODELS)[number]["name"];
|
||||||
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
|
export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number];
|
||||||
export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
|
export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number];
|
||||||
export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number];
|
|
||||||
|
|
||||||
export enum SubmitKey {
|
export enum SubmitKey {
|
||||||
Enter = "Enter",
|
Enter = "Enter",
|
||||||
@ -86,7 +87,8 @@ export const DEFAULT_CONFIG = {
|
|||||||
ttsConfig: {
|
ttsConfig: {
|
||||||
enable: false,
|
enable: false,
|
||||||
autoplay: false,
|
autoplay: false,
|
||||||
engine: DEFAULT_TTS_ENGINE,
|
modelProvider: ModelProvider.GPT,
|
||||||
|
engine: DEFAULT_TTS_ENGINE as TTSEngineType,
|
||||||
model: DEFAULT_TTS_MODEL,
|
model: DEFAULT_TTS_MODEL,
|
||||||
voice: DEFAULT_TTS_VOICE,
|
voice: DEFAULT_TTS_VOICE,
|
||||||
speed: 1.0,
|
speed: 1.0,
|
||||||
@ -126,18 +128,21 @@ export function limitNumber(
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const TTSConfigValidator = {
|
export const TTSConfigValidator = {
|
||||||
engine(x: string) {
|
engine(x: string | TTSEngineType): TTSEngineType {
|
||||||
return x as TTSEngineType;
|
return x as TTSEngineType;
|
||||||
},
|
},
|
||||||
model(x: string) {
|
model(x: string): TTSModelType {
|
||||||
return x as TTSModelType;
|
return x as TTSModelType;
|
||||||
},
|
},
|
||||||
voice(x: string) {
|
voice(x: string): TTSVoiceType {
|
||||||
return x as TTSVoiceType;
|
return x as TTSVoiceType;
|
||||||
},
|
},
|
||||||
speed(x: number) {
|
speed(x: number): number {
|
||||||
return limitNumber(x, 0.25, 4.0, 1.0);
|
return limitNumber(x, 0.25, 4.0, 1.0);
|
||||||
},
|
},
|
||||||
|
modelProvider(x: string): ModelProvider {
|
||||||
|
return x as ModelProvider;
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export const ModalConfigValidator = {
|
export const ModalConfigValidator = {
|
||||||
|
@ -1,25 +1,48 @@
|
|||||||
type TTSPlayer = {
|
type TTSPlayer = {
|
||||||
init: () => void;
|
init: () => void;
|
||||||
play: (audioBuffer: ArrayBuffer, onended: () => void | null) => Promise<void>;
|
play: (
|
||||||
|
audioBuffer: ArrayBuffer | AudioBuffer,
|
||||||
|
onended: () => void | null,
|
||||||
|
) => Promise<void>;
|
||||||
|
playQueue: (
|
||||||
|
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
||||||
|
onended: () => void | null,
|
||||||
|
) => Promise<void>;
|
||||||
|
addToQueue: (audioBuffer: ArrayBuffer | AudioBuffer) => void;
|
||||||
|
startStreamPlay: (onended: () => void | null) => void;
|
||||||
|
finishStreamPlay: () => void;
|
||||||
stop: () => void;
|
stop: () => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
export function createTTSPlayer(): TTSPlayer {
|
export function createTTSPlayer(): TTSPlayer {
|
||||||
let audioContext: AudioContext | null = null;
|
let audioContext: AudioContext | null = null;
|
||||||
let audioBufferSourceNode: AudioBufferSourceNode | null = null;
|
let audioBufferSourceNode: AudioBufferSourceNode | null = null;
|
||||||
|
let isPlaying = false;
|
||||||
|
let playQueue: (ArrayBuffer | AudioBuffer)[] = [];
|
||||||
|
let currentOnended: (() => void | null) | null = null;
|
||||||
|
let isStreamMode = false;
|
||||||
|
let streamFinished = false;
|
||||||
|
|
||||||
const init = () => {
|
const init = () => {
|
||||||
|
console.log("[TTSPlayer] init");
|
||||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||||
audioContext.suspend();
|
audioContext.suspend();
|
||||||
};
|
};
|
||||||
|
|
||||||
const play = async (audioBuffer: ArrayBuffer, onended: () => void | null) => {
|
const play = async (
|
||||||
|
audioBuffer: ArrayBuffer | AudioBuffer,
|
||||||
|
onended: () => void | null,
|
||||||
|
) => {
|
||||||
if (audioBufferSourceNode) {
|
if (audioBufferSourceNode) {
|
||||||
audioBufferSourceNode.stop();
|
audioBufferSourceNode.stop();
|
||||||
audioBufferSourceNode.disconnect();
|
audioBufferSourceNode.disconnect();
|
||||||
}
|
}
|
||||||
|
let buffer: AudioBuffer;
|
||||||
const buffer = await audioContext!.decodeAudioData(audioBuffer);
|
if (audioBuffer instanceof AudioBuffer) {
|
||||||
|
buffer = audioBuffer;
|
||||||
|
} else {
|
||||||
|
buffer = await audioContext!.decodeAudioData(audioBuffer);
|
||||||
|
}
|
||||||
audioBufferSourceNode = audioContext!.createBufferSource();
|
audioBufferSourceNode = audioContext!.createBufferSource();
|
||||||
audioBufferSourceNode.buffer = buffer;
|
audioBufferSourceNode.buffer = buffer;
|
||||||
audioBufferSourceNode.connect(audioContext!.destination);
|
audioBufferSourceNode.connect(audioContext!.destination);
|
||||||
@ -29,17 +52,109 @@ export function createTTSPlayer(): TTSPlayer {
|
|||||||
audioBufferSourceNode.onended = onended;
|
audioBufferSourceNode.onended = onended;
|
||||||
};
|
};
|
||||||
|
|
||||||
const stop = () => {
|
const playNext = async () => {
|
||||||
|
if (playQueue.length === 0) {
|
||||||
|
// 在流模式下,如果队列为空但流还没结束,等待
|
||||||
|
if (isStreamMode && !streamFinished) {
|
||||||
|
setTimeout(() => playNext(), 100);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
isPlaying = false;
|
||||||
|
isStreamMode = false;
|
||||||
|
streamFinished = false;
|
||||||
|
if (currentOnended) {
|
||||||
|
currentOnended();
|
||||||
|
currentOnended = null;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nextBuffer = playQueue.shift()!;
|
||||||
|
let buffer: AudioBuffer;
|
||||||
|
if (nextBuffer instanceof AudioBuffer) {
|
||||||
|
buffer = nextBuffer;
|
||||||
|
} else {
|
||||||
|
buffer = await audioContext!.decodeAudioData(nextBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (audioBufferSourceNode) {
|
||||||
|
audioBufferSourceNode.stop();
|
||||||
|
audioBufferSourceNode.disconnect();
|
||||||
|
}
|
||||||
|
|
||||||
|
audioBufferSourceNode = audioContext!.createBufferSource();
|
||||||
|
audioBufferSourceNode.buffer = buffer;
|
||||||
|
audioBufferSourceNode.connect(audioContext!.destination);
|
||||||
|
audioBufferSourceNode.onended = () => {
|
||||||
|
playNext();
|
||||||
|
};
|
||||||
|
|
||||||
|
await audioContext!.resume();
|
||||||
|
audioBufferSourceNode.start();
|
||||||
|
};
|
||||||
|
|
||||||
|
const playQueueMethod = async (
|
||||||
|
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
||||||
|
onended: () => void | null,
|
||||||
|
) => {
|
||||||
|
playQueue = [...audioBuffers];
|
||||||
|
currentOnended = onended;
|
||||||
|
if (!isPlaying) {
|
||||||
|
isPlaying = true;
|
||||||
|
await playNext();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const addToQueue = (audioBuffer: ArrayBuffer | AudioBuffer) => {
|
||||||
|
if (streamFinished) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
playQueue.push(audioBuffer);
|
||||||
|
};
|
||||||
|
|
||||||
|
const startStreamPlay = (onended: () => void | null) => {
|
||||||
|
isStreamMode = true;
|
||||||
|
streamFinished = false;
|
||||||
|
playQueue = [];
|
||||||
|
currentOnended = onended;
|
||||||
|
|
||||||
|
if (!isPlaying) {
|
||||||
|
isPlaying = true;
|
||||||
|
playNext();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const finishStreamPlay = () => {
|
||||||
|
streamFinished = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
const stop = async () => {
|
||||||
|
console.log("[TTSPlayer] stop");
|
||||||
|
playQueue = [];
|
||||||
|
isPlaying = false;
|
||||||
|
isStreamMode = false;
|
||||||
|
streamFinished = true;
|
||||||
|
currentOnended = null;
|
||||||
|
|
||||||
if (audioBufferSourceNode) {
|
if (audioBufferSourceNode) {
|
||||||
audioBufferSourceNode.stop();
|
audioBufferSourceNode.stop();
|
||||||
audioBufferSourceNode.disconnect();
|
audioBufferSourceNode.disconnect();
|
||||||
audioBufferSourceNode = null;
|
audioBufferSourceNode = null;
|
||||||
}
|
}
|
||||||
if (audioContext) {
|
if (audioContext) {
|
||||||
audioContext.close();
|
await audioContext.close();
|
||||||
audioContext = null;
|
audioContext = null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return { init, play, stop };
|
return {
|
||||||
|
init,
|
||||||
|
play,
|
||||||
|
playQueue: playQueueMethod,
|
||||||
|
addToQueue,
|
||||||
|
startStreamPlay,
|
||||||
|
finishStreamPlay,
|
||||||
|
stop,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
@ -93,5 +93,9 @@
|
|||||||
"resolutions": {
|
"resolutions": {
|
||||||
"lint-staged/yaml": "^2.2.2"
|
"lint-staged/yaml": "^2.2.2"
|
||||||
},
|
},
|
||||||
"packageManager": "yarn@1.22.19"
|
"packageManager": "yarn@1.22.19",
|
||||||
|
"volta": {
|
||||||
|
"node": "20.19.4",
|
||||||
|
"yarn": "1.22.22"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user