mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-11-12 20:23:45 +08:00
feat: 增强音频播放管理,新增 TTSPlayManager 类,优化流式语音合成逻辑,支持 PCM 数据和 base64 转换
This commit is contained in:
@@ -4,157 +4,287 @@ type TTSPlayer = {
|
||||
audioBuffer: ArrayBuffer | AudioBuffer,
|
||||
onended: () => void | null,
|
||||
) => Promise<void>;
|
||||
playQueue: (
|
||||
playQueueMethod: (
|
||||
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
||||
onended: () => void | null,
|
||||
) => Promise<void>;
|
||||
addToQueue: (audioBuffer: ArrayBuffer | AudioBuffer) => void;
|
||||
startStreamPlay: (onended: () => void | null) => void;
|
||||
finishStreamPlay: () => void;
|
||||
setStreamController: (controller: AbortController) => void;
|
||||
clearStreamController: () => void;
|
||||
stop: () => void;
|
||||
pcmBase64ToAudioBuffer: (
|
||||
base64Data: string,
|
||||
config?: PCMConfig,
|
||||
) => Promise<AudioBuffer>;
|
||||
pcmDataToAudioBuffer: (
|
||||
pcmData: Uint8Array,
|
||||
config?: PCMConfig,
|
||||
) => Promise<AudioBuffer>;
|
||||
};
|
||||
|
||||
export function createTTSPlayer(): TTSPlayer {
|
||||
let audioContext: AudioContext | null = null;
|
||||
let audioBufferSourceNode: AudioBufferSourceNode | null = null;
|
||||
let isPlaying = false;
|
||||
let playQueue: (ArrayBuffer | AudioBuffer)[] = [];
|
||||
let currentOnended: (() => void | null) | null = null;
|
||||
let isStreamMode = false;
|
||||
let streamFinished = false;
|
||||
// Audio processing utilities
|
||||
export interface PCMConfig {
|
||||
channels?: number;
|
||||
sampleRate?: number;
|
||||
bitDepth?: 16 | 24 | 32;
|
||||
}
|
||||
|
||||
const init = () => {
|
||||
console.log("[TTSPlayer] init");
|
||||
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
audioContext.suspend();
|
||||
};
|
||||
export class TTSPlayManager implements TTSPlayer {
|
||||
private static audioContext: AudioContext | null = null;
|
||||
private audioBufferSourceNode: AudioBufferSourceNode | null = null;
|
||||
private isPlaying = false;
|
||||
private playQueue: (ArrayBuffer | AudioBuffer)[] = [];
|
||||
private currentOnended: (() => void | null) | null = null;
|
||||
private isStreamMode = false;
|
||||
private streamFinished = false;
|
||||
private streamController: AbortController | null = null;
|
||||
|
||||
const play = async (
|
||||
get getAudioContext() {
|
||||
if (!TTSPlayManager.audioContext) {
|
||||
TTSPlayManager.audioContext = new (window.AudioContext ||
|
||||
window.webkitAudioContext)();
|
||||
}
|
||||
return TTSPlayManager.audioContext;
|
||||
}
|
||||
|
||||
init() {
|
||||
console.log("[TTSPlayManager] init");
|
||||
if (TTSPlayManager.audioContext) {
|
||||
return;
|
||||
}
|
||||
this.getAudioContext.suspend();
|
||||
}
|
||||
|
||||
async play(
|
||||
audioBuffer: ArrayBuffer | AudioBuffer,
|
||||
onended: () => void | null,
|
||||
) => {
|
||||
if (audioBufferSourceNode) {
|
||||
audioBufferSourceNode.stop();
|
||||
audioBufferSourceNode.disconnect();
|
||||
) {
|
||||
if (this.audioBufferSourceNode) {
|
||||
this.audioBufferSourceNode.stop();
|
||||
this.audioBufferSourceNode.disconnect();
|
||||
}
|
||||
let buffer: AudioBuffer;
|
||||
if (audioBuffer instanceof AudioBuffer) {
|
||||
buffer = audioBuffer;
|
||||
} else {
|
||||
buffer = await audioContext!.decodeAudioData(audioBuffer);
|
||||
buffer = await TTSPlayManager.audioContext!.decodeAudioData(audioBuffer);
|
||||
}
|
||||
audioBufferSourceNode = audioContext!.createBufferSource();
|
||||
audioBufferSourceNode.buffer = buffer;
|
||||
audioBufferSourceNode.connect(audioContext!.destination);
|
||||
audioContext!.resume().then(() => {
|
||||
audioBufferSourceNode!.start();
|
||||
this.audioBufferSourceNode =
|
||||
TTSPlayManager.audioContext!.createBufferSource();
|
||||
this.audioBufferSourceNode.buffer = buffer;
|
||||
this.audioBufferSourceNode.connect(
|
||||
TTSPlayManager.audioContext!.destination,
|
||||
);
|
||||
this.getAudioContext.resume().then(() => {
|
||||
this.audioBufferSourceNode!.start();
|
||||
});
|
||||
audioBufferSourceNode.onended = onended;
|
||||
};
|
||||
this.audioBufferSourceNode.onended = onended;
|
||||
}
|
||||
|
||||
const playNext = async () => {
|
||||
if (playQueue.length === 0) {
|
||||
async stop() {
|
||||
console.log("[TTSPlayer] stop");
|
||||
|
||||
// 首先中断流式请求
|
||||
try {
|
||||
if (this.streamController && !this.streamController.signal.aborted) {
|
||||
console.log("[TTSPlayer] Aborting stream request");
|
||||
this.streamController.abort();
|
||||
}
|
||||
} catch (e) {
|
||||
// 忽略中断请求时的错误
|
||||
console.log("[TTSPlayer] Error while aborting stream:", e);
|
||||
}
|
||||
this.clearStreamController();
|
||||
|
||||
// 清理播放状态
|
||||
this.playQueue = [];
|
||||
this.isPlaying = false;
|
||||
this.isStreamMode = false;
|
||||
this.streamFinished = true;
|
||||
this.currentOnended = null;
|
||||
|
||||
// 停止音频播放
|
||||
if (this.audioBufferSourceNode) {
|
||||
this.audioBufferSourceNode.stop();
|
||||
this.audioBufferSourceNode.disconnect();
|
||||
this.audioBufferSourceNode = null;
|
||||
}
|
||||
|
||||
// 关闭音频上下文
|
||||
if (TTSPlayManager.audioContext) {
|
||||
await TTSPlayManager.audioContext.close();
|
||||
TTSPlayManager.audioContext = null;
|
||||
}
|
||||
}
|
||||
|
||||
async playNext() {
|
||||
if (this.playQueue.length === 0) {
|
||||
// 在流模式下,如果队列为空但流还没结束,等待
|
||||
if (isStreamMode && !streamFinished) {
|
||||
setTimeout(() => playNext(), 100);
|
||||
if (this.isStreamMode && !this.streamFinished) {
|
||||
setTimeout(() => this.playNext(), 100);
|
||||
return;
|
||||
}
|
||||
|
||||
isPlaying = false;
|
||||
isStreamMode = false;
|
||||
streamFinished = false;
|
||||
if (currentOnended) {
|
||||
currentOnended();
|
||||
currentOnended = null;
|
||||
this.isPlaying = false;
|
||||
this.isStreamMode = false;
|
||||
this.streamFinished = false;
|
||||
if (this.currentOnended) {
|
||||
this.currentOnended();
|
||||
this.currentOnended = null;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const nextBuffer = playQueue.shift()!;
|
||||
const nextBuffer = this.playQueue.shift()!;
|
||||
let buffer: AudioBuffer;
|
||||
if (nextBuffer instanceof AudioBuffer) {
|
||||
buffer = nextBuffer;
|
||||
} else {
|
||||
buffer = await audioContext!.decodeAudioData(nextBuffer);
|
||||
buffer = await this.getAudioContext.decodeAudioData(nextBuffer);
|
||||
}
|
||||
|
||||
if (audioBufferSourceNode) {
|
||||
audioBufferSourceNode.stop();
|
||||
audioBufferSourceNode.disconnect();
|
||||
if (this.audioBufferSourceNode) {
|
||||
this.audioBufferSourceNode.stop();
|
||||
this.audioBufferSourceNode.disconnect();
|
||||
}
|
||||
|
||||
audioBufferSourceNode = audioContext!.createBufferSource();
|
||||
audioBufferSourceNode.buffer = buffer;
|
||||
audioBufferSourceNode.connect(audioContext!.destination);
|
||||
audioBufferSourceNode.onended = () => {
|
||||
playNext();
|
||||
this.audioBufferSourceNode = this.getAudioContext.createBufferSource();
|
||||
this.audioBufferSourceNode.buffer = buffer;
|
||||
this.audioBufferSourceNode.connect(this.getAudioContext.destination);
|
||||
this.audioBufferSourceNode.onended = () => {
|
||||
this.playNext();
|
||||
};
|
||||
|
||||
await audioContext!.resume();
|
||||
audioBufferSourceNode.start();
|
||||
};
|
||||
await this.getAudioContext.resume();
|
||||
this.audioBufferSourceNode.start();
|
||||
}
|
||||
|
||||
const playQueueMethod = async (
|
||||
async playQueueMethod(
|
||||
audioBuffers: (ArrayBuffer | AudioBuffer)[],
|
||||
onended: () => void | null,
|
||||
) => {
|
||||
playQueue = [...audioBuffers];
|
||||
currentOnended = onended;
|
||||
if (!isPlaying) {
|
||||
isPlaying = true;
|
||||
await playNext();
|
||||
) {
|
||||
this.playQueue = [...audioBuffers];
|
||||
this.currentOnended = onended;
|
||||
if (!this.isPlaying) {
|
||||
this.isPlaying = true;
|
||||
await this.playNext();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const addToQueue = (audioBuffer: ArrayBuffer | AudioBuffer) => {
|
||||
if (streamFinished) {
|
||||
addToQueue(audioBuffer: ArrayBuffer | AudioBuffer) {
|
||||
if (this.streamFinished) {
|
||||
return;
|
||||
}
|
||||
playQueue.push(audioBuffer);
|
||||
};
|
||||
this.playQueue.push(audioBuffer);
|
||||
}
|
||||
|
||||
const startStreamPlay = (onended: () => void | null) => {
|
||||
isStreamMode = true;
|
||||
streamFinished = false;
|
||||
playQueue = [];
|
||||
currentOnended = onended;
|
||||
|
||||
if (!isPlaying) {
|
||||
isPlaying = true;
|
||||
playNext();
|
||||
startStreamPlay(onended: () => void | null) {
|
||||
this.isStreamMode = true;
|
||||
this.streamFinished = false;
|
||||
this.playQueue = [];
|
||||
this.currentOnended = onended;
|
||||
if (!this.isPlaying) {
|
||||
this.isPlaying = true;
|
||||
this.playNext();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const finishStreamPlay = () => {
|
||||
streamFinished = true;
|
||||
};
|
||||
finishStreamPlay() {
|
||||
this.streamFinished = true;
|
||||
}
|
||||
|
||||
const stop = async () => {
|
||||
console.log("[TTSPlayer] stop");
|
||||
playQueue = [];
|
||||
isPlaying = false;
|
||||
isStreamMode = false;
|
||||
streamFinished = true;
|
||||
currentOnended = null;
|
||||
// 设置流式请求控制器,用于在 stop 时中断请求
|
||||
setStreamController(controller: AbortController) {
|
||||
this.streamController = controller;
|
||||
}
|
||||
|
||||
if (audioBufferSourceNode) {
|
||||
audioBufferSourceNode.stop();
|
||||
audioBufferSourceNode.disconnect();
|
||||
audioBufferSourceNode = null;
|
||||
// 清除流式请求控制器
|
||||
clearStreamController() {
|
||||
this.streamController = null;
|
||||
}
|
||||
|
||||
// 将 base64 PCM 数据转换为 AudioBuffer
|
||||
async pcmBase64ToAudioBuffer(
|
||||
base64Data: string,
|
||||
config: PCMConfig = {},
|
||||
): Promise<AudioBuffer> {
|
||||
try {
|
||||
// 解码 base64
|
||||
const binaryString = atob(base64Data);
|
||||
const bytes = new Uint8Array(binaryString.length);
|
||||
for (let i = 0; i < binaryString.length; i++) {
|
||||
bytes[i] = binaryString.charCodeAt(i);
|
||||
}
|
||||
|
||||
// 转换为 AudioBuffer
|
||||
return await this.pcmDataToAudioBuffer(bytes, config);
|
||||
} catch (error) {
|
||||
console.error("Failed to convert PCM base64 to AudioBuffer:", error);
|
||||
throw error;
|
||||
}
|
||||
if (audioContext) {
|
||||
await audioContext.close();
|
||||
audioContext = null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
init,
|
||||
play,
|
||||
playQueue: playQueueMethod,
|
||||
addToQueue,
|
||||
startStreamPlay,
|
||||
finishStreamPlay,
|
||||
stop,
|
||||
};
|
||||
// 将 PCM 字节数据转换为 AudioBuffer
|
||||
async pcmDataToAudioBuffer(
|
||||
pcmData: Uint8Array,
|
||||
config: PCMConfig = {},
|
||||
): Promise<AudioBuffer> {
|
||||
const { channels = 1, sampleRate = 24000, bitDepth = 16 } = config;
|
||||
|
||||
const audioContext = this.getAudioContext;
|
||||
|
||||
return new Promise<AudioBuffer>((resolve, reject) => {
|
||||
try {
|
||||
let float32Array: Float32Array;
|
||||
|
||||
// 根据位深度选择转换方法
|
||||
switch (bitDepth) {
|
||||
case 16:
|
||||
float32Array = this.pcm16ToFloat32(pcmData);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported bit depth: ${bitDepth}`);
|
||||
}
|
||||
|
||||
// 创建 AudioBuffer
|
||||
const audioBuffer = audioContext.createBuffer(
|
||||
channels,
|
||||
float32Array.length / channels,
|
||||
sampleRate,
|
||||
);
|
||||
|
||||
// 复制数据到 AudioBuffer
|
||||
for (let channel = 0; channel < channels; channel++) {
|
||||
const channelData = audioBuffer.getChannelData(channel);
|
||||
for (let i = 0; i < channelData.length; i++) {
|
||||
channelData[i] = float32Array[i * channels + channel];
|
||||
}
|
||||
}
|
||||
|
||||
resolve(audioBuffer);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 16位 PCM 转 32位浮点数
|
||||
pcm16ToFloat32(pcmData: Uint8Array): Float32Array {
|
||||
const length = pcmData.length / 2;
|
||||
const float32Array = new Float32Array(length);
|
||||
|
||||
for (let i = 0; i < length; i++) {
|
||||
const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2];
|
||||
const int16Signed = int16 > 32767 ? int16 - 65536 : int16;
|
||||
float32Array[i] = int16Signed / 32768;
|
||||
}
|
||||
|
||||
return float32Array;
|
||||
}
|
||||
}
|
||||
|
||||
export function createTTSPlayer(): TTSPlayManager {
|
||||
return new TTSPlayManager();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user