mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-09-20 10:16:38 +08:00
Compare commits
8 Commits
462d68750b
...
33f8cac264
Author | SHA1 | Date | |
---|---|---|---|
|
33f8cac264 | ||
|
bf999b91a5 | ||
|
995bef73de | ||
|
38ac502d80 | ||
|
0511808900 | ||
|
42eff644b4 | ||
|
8ae6883784 | ||
|
c0f2ab6de3 |
@ -25,6 +25,7 @@ import { XAIApi } from "./platforms/xai";
|
|||||||
import { ChatGLMApi } from "./platforms/glm";
|
import { ChatGLMApi } from "./platforms/glm";
|
||||||
import { SiliconflowApi } from "./platforms/siliconflow";
|
import { SiliconflowApi } from "./platforms/siliconflow";
|
||||||
import { Ai302Api } from "./platforms/ai302";
|
import { Ai302Api } from "./platforms/ai302";
|
||||||
|
import type { TTSPlayManager } from "../utils/audio";
|
||||||
|
|
||||||
export const ROLES = ["system", "user", "assistant"] as const;
|
export const ROLES = ["system", "user", "assistant"] as const;
|
||||||
export type MessageRole = (typeof ROLES)[number];
|
export type MessageRole = (typeof ROLES)[number];
|
||||||
@ -108,7 +109,10 @@ export interface LLMModelProvider {
|
|||||||
export abstract class LLMApi {
|
export abstract class LLMApi {
|
||||||
abstract chat(options: ChatOptions): Promise<void>;
|
abstract chat(options: ChatOptions): Promise<void>;
|
||||||
abstract speech(options: SpeechOptions): Promise<ArrayBuffer | AudioBuffer>;
|
abstract speech(options: SpeechOptions): Promise<ArrayBuffer | AudioBuffer>;
|
||||||
abstract streamSpeech?(options: SpeechOptions): AsyncGenerator<AudioBuffer>;
|
abstract streamSpeech?(
|
||||||
|
options: SpeechOptions,
|
||||||
|
audioManager?: TTSPlayManager,
|
||||||
|
): AsyncGenerator<AudioBuffer>;
|
||||||
abstract usage(): Promise<LLMUsage>;
|
abstract usage(): Promise<LLMUsage>;
|
||||||
abstract models(): Promise<LLMModel[]>;
|
abstract models(): Promise<LLMModel[]>;
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import {
|
|||||||
usePluginStore,
|
usePluginStore,
|
||||||
FunctionToolItem,
|
FunctionToolItem,
|
||||||
} from "@/app/store";
|
} from "@/app/store";
|
||||||
|
import { TTSPlayManager } from "@/app/utils/audio";
|
||||||
import {
|
import {
|
||||||
preProcessImageContentForAlibabaDashScope,
|
preProcessImageContentForAlibabaDashScope,
|
||||||
streamWithThink,
|
streamWithThink,
|
||||||
@ -62,7 +63,6 @@ interface RequestPayload {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export class QwenApi implements LLMApi {
|
export class QwenApi implements LLMApi {
|
||||||
private static audioContext: AudioContext | null = null;
|
|
||||||
path(path: string): string {
|
path(path: string): string {
|
||||||
const accessStore = useAccessStore.getState();
|
const accessStore = useAccessStore.getState();
|
||||||
|
|
||||||
@ -97,7 +97,10 @@ export class QwenApi implements LLMApi {
|
|||||||
throw new Error("Method not implemented.");
|
throw new Error("Method not implemented.");
|
||||||
}
|
}
|
||||||
|
|
||||||
async *streamSpeech(options: SpeechOptions): AsyncGenerator<AudioBuffer> {
|
async *streamSpeech(
|
||||||
|
options: SpeechOptions,
|
||||||
|
audioManager?: TTSPlayManager,
|
||||||
|
): AsyncGenerator<AudioBuffer> {
|
||||||
if (!options.input || !options.model) {
|
if (!options.input || !options.model) {
|
||||||
throw new Error("Missing required parameters: input and model");
|
throw new Error("Missing required parameters: input and model");
|
||||||
}
|
}
|
||||||
@ -112,6 +115,10 @@ export class QwenApi implements LLMApi {
|
|||||||
};
|
};
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
options.onController?.(controller);
|
options.onController?.(controller);
|
||||||
|
|
||||||
|
if (audioManager) {
|
||||||
|
audioManager.setStreamController(controller);
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const speechPath = this.path(Alibaba.SpeechPath);
|
const speechPath = this.path(Alibaba.SpeechPath);
|
||||||
const speechPayload = {
|
const speechPayload = {
|
||||||
@ -151,7 +158,10 @@ export class QwenApi implements LLMApi {
|
|||||||
if (line.startsWith("data:")) {
|
if (line.startsWith("data:")) {
|
||||||
const json = JSON.parse(data);
|
const json = JSON.parse(data);
|
||||||
if (json.output?.audio?.data) {
|
if (json.output?.audio?.data) {
|
||||||
yield this.PCMBase64ToAudioBuffer(json.output.audio.data);
|
yield await audioManager!.pcmBase64ToAudioBuffer(
|
||||||
|
json.output.audio.data,
|
||||||
|
{ channels: 1, sampleRate: 24000, bitDepth: 16 },
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (parseError) {
|
} catch (parseError) {
|
||||||
@ -165,8 +175,17 @@ export class QwenApi implements LLMApi {
|
|||||||
}
|
}
|
||||||
reader.releaseLock();
|
reader.releaseLock();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
// 如果是用户主动取消(AbortError),则不作为错误处理
|
||||||
|
if (e instanceof Error && e.name === "AbortError") {
|
||||||
|
console.log("[Request] Stream speech was aborted by user");
|
||||||
|
return; // 正常退出,不抛出错误
|
||||||
|
}
|
||||||
console.log("[Request] failed to make a speech request", e);
|
console.log("[Request] failed to make a speech request", e);
|
||||||
throw e;
|
throw e;
|
||||||
|
} finally {
|
||||||
|
if (audioManager) {
|
||||||
|
audioManager.clearStreamController();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -356,79 +375,5 @@ export class QwenApi implements LLMApi {
|
|||||||
async models(): Promise<LLMModel[]> {
|
async models(): Promise<LLMModel[]> {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
// 播放 PCM base64 数据
|
|
||||||
private async PCMBase64ToAudioBuffer(base64Data: string) {
|
|
||||||
try {
|
|
||||||
// 解码 base64
|
|
||||||
const binaryString = atob(base64Data);
|
|
||||||
const bytes = new Uint8Array(binaryString.length);
|
|
||||||
for (let i = 0; i < binaryString.length; i++) {
|
|
||||||
bytes[i] = binaryString.charCodeAt(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 转换为 AudioBuffer
|
|
||||||
const audioBuffer = await this.convertToAudioBuffer(bytes);
|
|
||||||
|
|
||||||
return audioBuffer;
|
|
||||||
} catch (error) {
|
|
||||||
console.error("播放 PCM 数据失败:", error);
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static getAudioContext(): AudioContext {
|
|
||||||
if (!QwenApi.audioContext) {
|
|
||||||
QwenApi.audioContext = new (window.AudioContext ||
|
|
||||||
window.webkitAudioContext)();
|
|
||||||
}
|
|
||||||
return QwenApi.audioContext;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 将 PCM 字节数据转换为 AudioBuffer
|
|
||||||
private convertToAudioBuffer(pcmData: Uint8Array) {
|
|
||||||
const audioContext = QwenApi.getAudioContext();
|
|
||||||
const channels = 1;
|
|
||||||
const sampleRate = 24000;
|
|
||||||
return new Promise<AudioBuffer>((resolve, reject) => {
|
|
||||||
try {
|
|
||||||
let float32Array;
|
|
||||||
// 16位 PCM 转换为 32位浮点数
|
|
||||||
float32Array = this.pcm16ToFloat32(pcmData);
|
|
||||||
|
|
||||||
// 创建 AudioBuffer
|
|
||||||
const audioBuffer = audioContext.createBuffer(
|
|
||||||
channels,
|
|
||||||
float32Array.length / channels,
|
|
||||||
sampleRate,
|
|
||||||
);
|
|
||||||
|
|
||||||
// 复制数据到 AudioBuffer
|
|
||||||
for (let channel = 0; channel < channels; channel++) {
|
|
||||||
const channelData = audioBuffer.getChannelData(channel);
|
|
||||||
for (let i = 0; i < channelData.length; i++) {
|
|
||||||
channelData[i] = float32Array[i * channels + channel];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resolve(audioBuffer);
|
|
||||||
} catch (error) {
|
|
||||||
reject(error);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
// 16位 PCM 转 32位浮点数
|
|
||||||
private pcm16ToFloat32(pcmData: Uint8Array) {
|
|
||||||
const length = pcmData.length / 2;
|
|
||||||
const float32Array = new Float32Array(length);
|
|
||||||
|
|
||||||
for (let i = 0; i < length; i++) {
|
|
||||||
const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2];
|
|
||||||
const int16Signed = int16 > 32767 ? int16 - 65536 : int16;
|
|
||||||
float32Array[i] = int16Signed / 32768;
|
|
||||||
}
|
|
||||||
|
|
||||||
return float32Array;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
export { Alibaba };
|
export { Alibaba };
|
||||||
|
@ -200,6 +200,7 @@ export class ChatGPTApi implements LLMApi {
|
|||||||
options.config.model.startsWith("o1") ||
|
options.config.model.startsWith("o1") ||
|
||||||
options.config.model.startsWith("o3") ||
|
options.config.model.startsWith("o3") ||
|
||||||
options.config.model.startsWith("o4-mini");
|
options.config.model.startsWith("o4-mini");
|
||||||
|
const isGpt5 = options.config.model.startsWith("gpt-5");
|
||||||
if (isDalle3) {
|
if (isDalle3) {
|
||||||
const prompt = getMessageTextContent(
|
const prompt = getMessageTextContent(
|
||||||
options.messages.slice(-1)?.pop() as any,
|
options.messages.slice(-1)?.pop() as any,
|
||||||
@ -230,7 +231,7 @@ export class ChatGPTApi implements LLMApi {
|
|||||||
messages,
|
messages,
|
||||||
stream: options.config.stream,
|
stream: options.config.stream,
|
||||||
model: modelConfig.model,
|
model: modelConfig.model,
|
||||||
temperature: !isO1OrO3 ? modelConfig.temperature : 1,
|
temperature: (!isO1OrO3 && !isGpt5) ? modelConfig.temperature : 1,
|
||||||
presence_penalty: !isO1OrO3 ? modelConfig.presence_penalty : 0,
|
presence_penalty: !isO1OrO3 ? modelConfig.presence_penalty : 0,
|
||||||
frequency_penalty: !isO1OrO3 ? modelConfig.frequency_penalty : 0,
|
frequency_penalty: !isO1OrO3 ? modelConfig.frequency_penalty : 0,
|
||||||
top_p: !isO1OrO3 ? modelConfig.top_p : 1,
|
top_p: !isO1OrO3 ? modelConfig.top_p : 1,
|
||||||
@ -238,7 +239,13 @@ export class ChatGPTApi implements LLMApi {
|
|||||||
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
|
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
|
||||||
};
|
};
|
||||||
|
|
||||||
if (isO1OrO3) {
|
if (isGpt5) {
|
||||||
|
// Remove max_tokens if present
|
||||||
|
delete requestPayload.max_tokens;
|
||||||
|
// Add max_completion_tokens (or max_completion_tokens if that's what you meant)
|
||||||
|
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
|
||||||
|
|
||||||
|
} else if (isO1OrO3) {
|
||||||
// by default the o1/o3 models will not attempt to produce output that includes markdown formatting
|
// by default the o1/o3 models will not attempt to produce output that includes markdown formatting
|
||||||
// manually add "Formatting re-enabled" developer message to encourage markdown inclusion in model responses
|
// manually add "Formatting re-enabled" developer message to encourage markdown inclusion in model responses
|
||||||
// (https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning?tabs=python-secure#markdown-output)
|
// (https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/reasoning?tabs=python-secure#markdown-output)
|
||||||
@ -251,8 +258,9 @@ export class ChatGPTApi implements LLMApi {
|
|||||||
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
|
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// add max_tokens to vision model
|
// add max_tokens to vision model
|
||||||
if (visionModel && !isO1OrO3) {
|
if (visionModel && !isO1OrO3 && ! isGpt5) {
|
||||||
requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000);
|
requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1340,12 +1340,15 @@ function _Chat() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for await (const chunk of api.llm.streamSpeech({
|
for await (const chunk of api.llm.streamSpeech(
|
||||||
|
{
|
||||||
model: config.ttsConfig.model,
|
model: config.ttsConfig.model,
|
||||||
input: textContent,
|
input: textContent,
|
||||||
voice: config.ttsConfig.voice,
|
voice: config.ttsConfig.voice,
|
||||||
speed: config.ttsConfig.speed,
|
speed: config.ttsConfig.speed,
|
||||||
})) {
|
},
|
||||||
|
ttsPlayer,
|
||||||
|
)) {
|
||||||
ttsPlayer.addToQueue(chunk);
|
ttsPlayer.addToQueue(chunk);
|
||||||
}
|
}
|
||||||
ttsPlayer.finishStreamPlay();
|
ttsPlayer.finishStreamPlay();
|
||||||
|
@ -524,6 +524,7 @@ export const VISION_MODEL_REGEXES = [
|
|||||||
/o3/,
|
/o3/,
|
||||||
/o4-mini/,
|
/o4-mini/,
|
||||||
/grok-4/i,
|
/grok-4/i,
|
||||||
|
/gpt-5/
|
||||||
];
|
];
|
||||||
|
|
||||||
export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];
|
export const EXCLUDE_VISION_MODEL_REGEXES = [/claude-3-5-haiku-20241022/];
|
||||||
@ -548,6 +549,11 @@ const openaiModels = [
|
|||||||
"gpt-4.1-nano-2025-04-14",
|
"gpt-4.1-nano-2025-04-14",
|
||||||
"gpt-4.5-preview",
|
"gpt-4.5-preview",
|
||||||
"gpt-4.5-preview-2025-02-27",
|
"gpt-4.5-preview-2025-02-27",
|
||||||
|
"gpt-5-chat",
|
||||||
|
"gpt-5-mini",
|
||||||
|
"gpt-5-nano",
|
||||||
|
"gpt-5",
|
||||||
|
"gpt-5-chat-2025-01-01-preview",
|
||||||
"gpt-4o",
|
"gpt-4o",
|
||||||
"gpt-4o-2024-05-13",
|
"gpt-4o-2024-05-13",
|
||||||
"gpt-4o-2024-08-06",
|
"gpt-4o-2024-08-06",
|
||||||
|
@ -4,157 +4,287 @@ type TTSPlayer = {
|
|||||||
audioBuffer: ArrayBuffer | AudioBuffer,
|
audioBuffer: ArrayBuffer | AudioBuffer,
|
||||||
onended: () => void | null,
|
onended: () => void | null,
|
||||||
) => Promise<void>;
|
) => Promise<void>;
|
||||||
playQueue: (
|
playQueueMethod: (
|
||||||
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
||||||
onended: () => void | null,
|
onended: () => void | null,
|
||||||
) => Promise<void>;
|
) => Promise<void>;
|
||||||
addToQueue: (audioBuffer: ArrayBuffer | AudioBuffer) => void;
|
addToQueue: (audioBuffer: ArrayBuffer | AudioBuffer) => void;
|
||||||
startStreamPlay: (onended: () => void | null) => void;
|
startStreamPlay: (onended: () => void | null) => void;
|
||||||
finishStreamPlay: () => void;
|
finishStreamPlay: () => void;
|
||||||
|
setStreamController: (controller: AbortController) => void;
|
||||||
|
clearStreamController: () => void;
|
||||||
stop: () => void;
|
stop: () => void;
|
||||||
|
pcmBase64ToAudioBuffer: (
|
||||||
|
base64Data: string,
|
||||||
|
config?: PCMConfig,
|
||||||
|
) => Promise<AudioBuffer>;
|
||||||
|
pcmDataToAudioBuffer: (
|
||||||
|
pcmData: Uint8Array,
|
||||||
|
config?: PCMConfig,
|
||||||
|
) => Promise<AudioBuffer>;
|
||||||
};
|
};
|
||||||
|
|
||||||
export function createTTSPlayer(): TTSPlayer {
|
// Audio processing utilities
|
||||||
let audioContext: AudioContext | null = null;
|
export interface PCMConfig {
|
||||||
let audioBufferSourceNode: AudioBufferSourceNode | null = null;
|
channels?: number;
|
||||||
let isPlaying = false;
|
sampleRate?: number;
|
||||||
let playQueue: (ArrayBuffer | AudioBuffer)[] = [];
|
bitDepth?: 16 | 24 | 32;
|
||||||
let currentOnended: (() => void | null) | null = null;
|
}
|
||||||
let isStreamMode = false;
|
|
||||||
let streamFinished = false;
|
|
||||||
|
|
||||||
const init = () => {
|
export class TTSPlayManager implements TTSPlayer {
|
||||||
console.log("[TTSPlayer] init");
|
private static audioContext: AudioContext | null = null;
|
||||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
private audioBufferSourceNode: AudioBufferSourceNode | null = null;
|
||||||
audioContext.suspend();
|
private isPlaying = false;
|
||||||
};
|
private playQueue: (ArrayBuffer | AudioBuffer)[] = [];
|
||||||
|
private currentOnended: (() => void | null) | null = null;
|
||||||
|
private isStreamMode = false;
|
||||||
|
private streamFinished = false;
|
||||||
|
private streamController: AbortController | null = null;
|
||||||
|
|
||||||
const play = async (
|
get getAudioContext() {
|
||||||
|
if (!TTSPlayManager.audioContext) {
|
||||||
|
TTSPlayManager.audioContext = new (window.AudioContext ||
|
||||||
|
window.webkitAudioContext)();
|
||||||
|
}
|
||||||
|
return TTSPlayManager.audioContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
init() {
|
||||||
|
console.log("[TTSPlayManager] init");
|
||||||
|
if (TTSPlayManager.audioContext) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.getAudioContext.suspend();
|
||||||
|
}
|
||||||
|
|
||||||
|
async play(
|
||||||
audioBuffer: ArrayBuffer | AudioBuffer,
|
audioBuffer: ArrayBuffer | AudioBuffer,
|
||||||
onended: () => void | null,
|
onended: () => void | null,
|
||||||
) => {
|
) {
|
||||||
if (audioBufferSourceNode) {
|
if (this.audioBufferSourceNode) {
|
||||||
audioBufferSourceNode.stop();
|
this.audioBufferSourceNode.stop();
|
||||||
audioBufferSourceNode.disconnect();
|
this.audioBufferSourceNode.disconnect();
|
||||||
}
|
}
|
||||||
let buffer: AudioBuffer;
|
let buffer: AudioBuffer;
|
||||||
if (audioBuffer instanceof AudioBuffer) {
|
if (audioBuffer instanceof AudioBuffer) {
|
||||||
buffer = audioBuffer;
|
buffer = audioBuffer;
|
||||||
} else {
|
} else {
|
||||||
buffer = await audioContext!.decodeAudioData(audioBuffer);
|
buffer = await TTSPlayManager.audioContext!.decodeAudioData(audioBuffer);
|
||||||
}
|
}
|
||||||
audioBufferSourceNode = audioContext!.createBufferSource();
|
this.audioBufferSourceNode =
|
||||||
audioBufferSourceNode.buffer = buffer;
|
TTSPlayManager.audioContext!.createBufferSource();
|
||||||
audioBufferSourceNode.connect(audioContext!.destination);
|
this.audioBufferSourceNode.buffer = buffer;
|
||||||
audioContext!.resume().then(() => {
|
this.audioBufferSourceNode.connect(
|
||||||
audioBufferSourceNode!.start();
|
TTSPlayManager.audioContext!.destination,
|
||||||
|
);
|
||||||
|
this.getAudioContext.resume().then(() => {
|
||||||
|
this.audioBufferSourceNode!.start();
|
||||||
});
|
});
|
||||||
audioBufferSourceNode.onended = onended;
|
this.audioBufferSourceNode.onended = onended;
|
||||||
};
|
}
|
||||||
|
|
||||||
const playNext = async () => {
|
async stop() {
|
||||||
if (playQueue.length === 0) {
|
console.log("[TTSPlayer] stop");
|
||||||
|
|
||||||
|
// 首先中断流式请求
|
||||||
|
try {
|
||||||
|
if (this.streamController && !this.streamController.signal.aborted) {
|
||||||
|
console.log("[TTSPlayer] Aborting stream request");
|
||||||
|
this.streamController.abort();
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// 忽略中断请求时的错误
|
||||||
|
console.log("[TTSPlayer] Error while aborting stream:", e);
|
||||||
|
}
|
||||||
|
this.clearStreamController();
|
||||||
|
|
||||||
|
// 清理播放状态
|
||||||
|
this.playQueue = [];
|
||||||
|
this.isPlaying = false;
|
||||||
|
this.isStreamMode = false;
|
||||||
|
this.streamFinished = true;
|
||||||
|
this.currentOnended = null;
|
||||||
|
|
||||||
|
// 停止音频播放
|
||||||
|
if (this.audioBufferSourceNode) {
|
||||||
|
this.audioBufferSourceNode.stop();
|
||||||
|
this.audioBufferSourceNode.disconnect();
|
||||||
|
this.audioBufferSourceNode = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 关闭音频上下文
|
||||||
|
if (TTSPlayManager.audioContext) {
|
||||||
|
await TTSPlayManager.audioContext.close();
|
||||||
|
TTSPlayManager.audioContext = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async playNext() {
|
||||||
|
if (this.playQueue.length === 0) {
|
||||||
// 在流模式下,如果队列为空但流还没结束,等待
|
// 在流模式下,如果队列为空但流还没结束,等待
|
||||||
if (isStreamMode && !streamFinished) {
|
if (this.isStreamMode && !this.streamFinished) {
|
||||||
setTimeout(() => playNext(), 100);
|
setTimeout(() => this.playNext(), 100);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
isPlaying = false;
|
this.isPlaying = false;
|
||||||
isStreamMode = false;
|
this.isStreamMode = false;
|
||||||
streamFinished = false;
|
this.streamFinished = false;
|
||||||
if (currentOnended) {
|
if (this.currentOnended) {
|
||||||
currentOnended();
|
this.currentOnended();
|
||||||
currentOnended = null;
|
this.currentOnended = null;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const nextBuffer = playQueue.shift()!;
|
const nextBuffer = this.playQueue.shift()!;
|
||||||
let buffer: AudioBuffer;
|
let buffer: AudioBuffer;
|
||||||
if (nextBuffer instanceof AudioBuffer) {
|
if (nextBuffer instanceof AudioBuffer) {
|
||||||
buffer = nextBuffer;
|
buffer = nextBuffer;
|
||||||
} else {
|
} else {
|
||||||
buffer = await audioContext!.decodeAudioData(nextBuffer);
|
buffer = await this.getAudioContext.decodeAudioData(nextBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (audioBufferSourceNode) {
|
if (this.audioBufferSourceNode) {
|
||||||
audioBufferSourceNode.stop();
|
this.audioBufferSourceNode.stop();
|
||||||
audioBufferSourceNode.disconnect();
|
this.audioBufferSourceNode.disconnect();
|
||||||
}
|
}
|
||||||
|
|
||||||
audioBufferSourceNode = audioContext!.createBufferSource();
|
this.audioBufferSourceNode = this.getAudioContext.createBufferSource();
|
||||||
audioBufferSourceNode.buffer = buffer;
|
this.audioBufferSourceNode.buffer = buffer;
|
||||||
audioBufferSourceNode.connect(audioContext!.destination);
|
this.audioBufferSourceNode.connect(this.getAudioContext.destination);
|
||||||
audioBufferSourceNode.onended = () => {
|
this.audioBufferSourceNode.onended = () => {
|
||||||
playNext();
|
this.playNext();
|
||||||
};
|
};
|
||||||
|
|
||||||
await audioContext!.resume();
|
await this.getAudioContext.resume();
|
||||||
audioBufferSourceNode.start();
|
this.audioBufferSourceNode.start();
|
||||||
};
|
}
|
||||||
|
|
||||||
const playQueueMethod = async (
|
async playQueueMethod(
|
||||||
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
||||||
onended: () => void | null,
|
onended: () => void | null,
|
||||||
) => {
|
) {
|
||||||
playQueue = [...audioBuffers];
|
this.playQueue = [...audioBuffers];
|
||||||
currentOnended = onended;
|
this.currentOnended = onended;
|
||||||
if (!isPlaying) {
|
if (!this.isPlaying) {
|
||||||
isPlaying = true;
|
this.isPlaying = true;
|
||||||
await playNext();
|
await this.playNext();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
const addToQueue = (audioBuffer: ArrayBuffer | AudioBuffer) => {
|
addToQueue(audioBuffer: ArrayBuffer | AudioBuffer) {
|
||||||
if (streamFinished) {
|
if (this.streamFinished) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
playQueue.push(audioBuffer);
|
this.playQueue.push(audioBuffer);
|
||||||
};
|
|
||||||
|
|
||||||
const startStreamPlay = (onended: () => void | null) => {
|
|
||||||
isStreamMode = true;
|
|
||||||
streamFinished = false;
|
|
||||||
playQueue = [];
|
|
||||||
currentOnended = onended;
|
|
||||||
|
|
||||||
if (!isPlaying) {
|
|
||||||
isPlaying = true;
|
|
||||||
playNext();
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
const finishStreamPlay = () => {
|
startStreamPlay(onended: () => void | null) {
|
||||||
streamFinished = true;
|
this.isStreamMode = true;
|
||||||
};
|
this.streamFinished = false;
|
||||||
|
this.playQueue = [];
|
||||||
const stop = async () => {
|
this.currentOnended = onended;
|
||||||
console.log("[TTSPlayer] stop");
|
if (!this.isPlaying) {
|
||||||
playQueue = [];
|
this.isPlaying = true;
|
||||||
isPlaying = false;
|
this.playNext();
|
||||||
isStreamMode = false;
|
|
||||||
streamFinished = true;
|
|
||||||
currentOnended = null;
|
|
||||||
|
|
||||||
if (audioBufferSourceNode) {
|
|
||||||
audioBufferSourceNode.stop();
|
|
||||||
audioBufferSourceNode.disconnect();
|
|
||||||
audioBufferSourceNode = null;
|
|
||||||
}
|
}
|
||||||
if (audioContext) {
|
|
||||||
await audioContext.close();
|
|
||||||
audioContext = null;
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
return {
|
finishStreamPlay() {
|
||||||
init,
|
this.streamFinished = true;
|
||||||
play,
|
}
|
||||||
playQueue: playQueueMethod,
|
|
||||||
addToQueue,
|
// 设置流式请求控制器,用于在 stop 时中断请求
|
||||||
startStreamPlay,
|
setStreamController(controller: AbortController) {
|
||||||
finishStreamPlay,
|
this.streamController = controller;
|
||||||
stop,
|
}
|
||||||
};
|
|
||||||
|
// 清除流式请求控制器
|
||||||
|
clearStreamController() {
|
||||||
|
this.streamController = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将 base64 PCM 数据转换为 AudioBuffer
|
||||||
|
async pcmBase64ToAudioBuffer(
|
||||||
|
base64Data: string,
|
||||||
|
config: PCMConfig = {},
|
||||||
|
): Promise<AudioBuffer> {
|
||||||
|
try {
|
||||||
|
// 解码 base64
|
||||||
|
const binaryString = atob(base64Data);
|
||||||
|
const bytes = new Uint8Array(binaryString.length);
|
||||||
|
for (let i = 0; i < binaryString.length; i++) {
|
||||||
|
bytes[i] = binaryString.charCodeAt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 转换为 AudioBuffer
|
||||||
|
return await this.pcmDataToAudioBuffer(bytes, config);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to convert PCM base64 to AudioBuffer:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将 PCM 字节数据转换为 AudioBuffer
|
||||||
|
async pcmDataToAudioBuffer(
|
||||||
|
pcmData: Uint8Array,
|
||||||
|
config: PCMConfig = {},
|
||||||
|
): Promise<AudioBuffer> {
|
||||||
|
const { channels = 1, sampleRate = 24000, bitDepth = 16 } = config;
|
||||||
|
|
||||||
|
const audioContext = this.getAudioContext;
|
||||||
|
|
||||||
|
return new Promise<AudioBuffer>((resolve, reject) => {
|
||||||
|
try {
|
||||||
|
let float32Array: Float32Array;
|
||||||
|
|
||||||
|
// 根据位深度选择转换方法
|
||||||
|
switch (bitDepth) {
|
||||||
|
case 16:
|
||||||
|
float32Array = this.pcm16ToFloat32(pcmData);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Error(`Unsupported bit depth: ${bitDepth}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 创建 AudioBuffer
|
||||||
|
const audioBuffer = audioContext.createBuffer(
|
||||||
|
channels,
|
||||||
|
float32Array.length / channels,
|
||||||
|
sampleRate,
|
||||||
|
);
|
||||||
|
|
||||||
|
// 复制数据到 AudioBuffer
|
||||||
|
for (let channel = 0; channel < channels; channel++) {
|
||||||
|
const channelData = audioBuffer.getChannelData(channel);
|
||||||
|
for (let i = 0; i < channelData.length; i++) {
|
||||||
|
channelData[i] = float32Array[i * channels + channel];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resolve(audioBuffer);
|
||||||
|
} catch (error) {
|
||||||
|
reject(error);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// 16位 PCM 转 32位浮点数
|
||||||
|
pcm16ToFloat32(pcmData: Uint8Array): Float32Array {
|
||||||
|
const length = pcmData.length / 2;
|
||||||
|
const float32Array = new Float32Array(length);
|
||||||
|
|
||||||
|
for (let i = 0; i < length; i++) {
|
||||||
|
const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2];
|
||||||
|
const int16Signed = int16 > 32767 ? int16 - 65536 : int16;
|
||||||
|
float32Array[i] = int16Signed / 32768;
|
||||||
|
}
|
||||||
|
|
||||||
|
return float32Array;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createTTSPlayer(): TTSPlayManager {
|
||||||
|
return new TTSPlayManager();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user