feat: 增强音频播放管理，新增 TTSPlayManager 类，优化流式语音合成逻辑，支持 PCM 数据和 base64 转换

2026-02-11 08:54:29 +08:00 · 2025-08-21 13:47:58 +08:00
parent 16c3255e99
commit bf999b91a5
4 changed files with 266 additions and 184 deletions
--- a/app/utils/audio.ts
+++ b/app/utils/audio.ts
@@ -4,157 +4,287 @@ type TTSPlayer = {
    audioBuffer: ArrayBuffer | AudioBuffer,
    onended: () => void | null,
  ) => Promise<void>;
-  playQueue: (
+  playQueueMethod: (
    audioBuffers: (ArrayBuffer | AudioBuffer)[],
    onended: () => void | null,
  ) => Promise<void>;
  addToQueue: (audioBuffer: ArrayBuffer | AudioBuffer) => void;
  startStreamPlay: (onended: () => void | null) => void;
  finishStreamPlay: () => void;
+  setStreamController: (controller: AbortController) => void;
+  clearStreamController: () => void;
  stop: () => void;
+  pcmBase64ToAudioBuffer: (
+    base64Data: string,
+    config?: PCMConfig,
+  ) => Promise<AudioBuffer>;
+  pcmDataToAudioBuffer: (
+    pcmData: Uint8Array,
+    config?: PCMConfig,
+  ) => Promise<AudioBuffer>;
 };

-export function createTTSPlayer(): TTSPlayer {
-  let audioContext: AudioContext | null = null;
-  let audioBufferSourceNode: AudioBufferSourceNode | null = null;
-  let isPlaying = false;
-  let playQueue: (ArrayBuffer | AudioBuffer)[] = [];
-  let currentOnended: (() => void | null) | null = null;
-  let isStreamMode = false;
-  let streamFinished = false;
+// Audio processing utilities
+export interface PCMConfig {
+  channels?: number;
+  sampleRate?: number;
+  bitDepth?: 16 | 24 | 32;
+}

-  const init = () => {
-    console.log("[TTSPlayer] init");
-    audioContext = new (window.AudioContext || window.webkitAudioContext)();
-    audioContext.suspend();
-  };
+export class TTSPlayManager implements TTSPlayer {
+  private static audioContext: AudioContext | null = null;
+  private audioBufferSourceNode: AudioBufferSourceNode | null = null;
+  private isPlaying = false;
+  private playQueue: (ArrayBuffer | AudioBuffer)[] = [];
+  private currentOnended: (() => void | null) | null = null;
+  private isStreamMode = false;
+  private streamFinished = false;
+  private streamController: AbortController | null = null;

-  const play = async (
+  get getAudioContext() {
+    if (!TTSPlayManager.audioContext) {
+      TTSPlayManager.audioContext = new (window.AudioContext ||
+        window.webkitAudioContext)();
+    }
+    return TTSPlayManager.audioContext;
+  }
+
+  init() {
+    console.log("[TTSPlayManager] init");
+    if (TTSPlayManager.audioContext) {
+      return;
+    }
+    this.getAudioContext.suspend();
+  }
+
+  async play(
    audioBuffer: ArrayBuffer | AudioBuffer,
    onended: () => void | null,
-  ) => {
-    if (audioBufferSourceNode) {
-      audioBufferSourceNode.stop();
-      audioBufferSourceNode.disconnect();
+  ) {
+    if (this.audioBufferSourceNode) {
+      this.audioBufferSourceNode.stop();
+      this.audioBufferSourceNode.disconnect();
    }
    let buffer: AudioBuffer;
    if (audioBuffer instanceof AudioBuffer) {
      buffer = audioBuffer;
    } else {
-      buffer = await audioContext!.decodeAudioData(audioBuffer);
+      buffer = await TTSPlayManager.audioContext!.decodeAudioData(audioBuffer);
    }
-    audioBufferSourceNode = audioContext!.createBufferSource();
-    audioBufferSourceNode.buffer = buffer;
-    audioBufferSourceNode.connect(audioContext!.destination);
-    audioContext!.resume().then(() => {
-      audioBufferSourceNode!.start();
+    this.audioBufferSourceNode =
+      TTSPlayManager.audioContext!.createBufferSource();
+    this.audioBufferSourceNode.buffer = buffer;
+    this.audioBufferSourceNode.connect(
+      TTSPlayManager.audioContext!.destination,
+    );
+    this.getAudioContext.resume().then(() => {
+      this.audioBufferSourceNode!.start();
    });
-    audioBufferSourceNode.onended = onended;
-  };
+    this.audioBufferSourceNode.onended = onended;
+  }

-  const playNext = async () => {
-    if (playQueue.length === 0) {
+  async stop() {
+    console.log("[TTSPlayer] stop");
+
+    // 首先中断流式请求
+    try {
+      if (this.streamController && !this.streamController.signal.aborted) {
+        console.log("[TTSPlayer] Aborting stream request");
+        this.streamController.abort();
+      }
+    } catch (e) {
+      // 忽略中断请求时的错误
+      console.log("[TTSPlayer] Error while aborting stream:", e);
+    }
+    this.clearStreamController();
+
+    // 清理播放状态
+    this.playQueue = [];
+    this.isPlaying = false;
+    this.isStreamMode = false;
+    this.streamFinished = true;
+    this.currentOnended = null;
+
+    // 停止音频播放
+    if (this.audioBufferSourceNode) {
+      this.audioBufferSourceNode.stop();
+      this.audioBufferSourceNode.disconnect();
+      this.audioBufferSourceNode = null;
+    }
+
+    // 关闭音频上下文
+    if (TTSPlayManager.audioContext) {
+      await TTSPlayManager.audioContext.close();
+      TTSPlayManager.audioContext = null;
+    }
+  }
+
+  async playNext() {
+    if (this.playQueue.length === 0) {
      // 在流模式下，如果队列为空但流还没结束，等待
-      if (isStreamMode && !streamFinished) {
-        setTimeout(() => playNext(), 100);
+      if (this.isStreamMode && !this.streamFinished) {
+        setTimeout(() => this.playNext(), 100);
        return;
      }

-      isPlaying = false;
-      isStreamMode = false;
-      streamFinished = false;
-      if (currentOnended) {
-        currentOnended();
-        currentOnended = null;
+      this.isPlaying = false;
+      this.isStreamMode = false;
+      this.streamFinished = false;
+      if (this.currentOnended) {
+        this.currentOnended();
+        this.currentOnended = null;
      }
      return;
    }

-    const nextBuffer = playQueue.shift()!;
+    const nextBuffer = this.playQueue.shift()!;
    let buffer: AudioBuffer;
    if (nextBuffer instanceof AudioBuffer) {
      buffer = nextBuffer;
    } else {
-      buffer = await audioContext!.decodeAudioData(nextBuffer);
+      buffer = await this.getAudioContext.decodeAudioData(nextBuffer);
    }

-    if (audioBufferSourceNode) {
-      audioBufferSourceNode.stop();
-      audioBufferSourceNode.disconnect();
+    if (this.audioBufferSourceNode) {
+      this.audioBufferSourceNode.stop();
+      this.audioBufferSourceNode.disconnect();
    }

-    audioBufferSourceNode = audioContext!.createBufferSource();
-    audioBufferSourceNode.buffer = buffer;
-    audioBufferSourceNode.connect(audioContext!.destination);
-    audioBufferSourceNode.onended = () => {
-      playNext();
+    this.audioBufferSourceNode = this.getAudioContext.createBufferSource();
+    this.audioBufferSourceNode.buffer = buffer;
+    this.audioBufferSourceNode.connect(this.getAudioContext.destination);
+    this.audioBufferSourceNode.onended = () => {
+      this.playNext();
    };

-    await audioContext!.resume();
-    audioBufferSourceNode.start();
-  };
+    await this.getAudioContext.resume();
+    this.audioBufferSourceNode.start();
+  }

-  const playQueueMethod = async (
+  async playQueueMethod(
    audioBuffers: (ArrayBuffer | AudioBuffer)[],
    onended: () => void | null,
-  ) => {
-    playQueue = [...audioBuffers];
-    currentOnended = onended;
-    if (!isPlaying) {
-      isPlaying = true;
-      await playNext();
+  ) {
+    this.playQueue = [...audioBuffers];
+    this.currentOnended = onended;
+    if (!this.isPlaying) {
+      this.isPlaying = true;
+      await this.playNext();
    }
-  };
+  }

-  const addToQueue = (audioBuffer: ArrayBuffer | AudioBuffer) => {
-    if (streamFinished) {
+  addToQueue(audioBuffer: ArrayBuffer | AudioBuffer) {
+    if (this.streamFinished) {
      return;
    }
-    playQueue.push(audioBuffer);
-  };
+    this.playQueue.push(audioBuffer);
+  }

-  const startStreamPlay = (onended: () => void | null) => {
-    isStreamMode = true;
-    streamFinished = false;
-    playQueue = [];
-    currentOnended = onended;
-
-    if (!isPlaying) {
-      isPlaying = true;
-      playNext();
+  startStreamPlay(onended: () => void | null) {
+    this.isStreamMode = true;
+    this.streamFinished = false;
+    this.playQueue = [];
+    this.currentOnended = onended;
+    if (!this.isPlaying) {
+      this.isPlaying = true;
+      this.playNext();
    }
-  };
+  }

-  const finishStreamPlay = () => {
-    streamFinished = true;
-  };
+  finishStreamPlay() {
+    this.streamFinished = true;
+  }

-  const stop = async () => {
-    console.log("[TTSPlayer] stop");
-    playQueue = [];
-    isPlaying = false;
-    isStreamMode = false;
-    streamFinished = true;
-    currentOnended = null;
+  // 设置流式请求控制器，用于在 stop 时中断请求
+  setStreamController(controller: AbortController) {
+    this.streamController = controller;
+  }

-    if (audioBufferSourceNode) {
-      audioBufferSourceNode.stop();
-      audioBufferSourceNode.disconnect();
-      audioBufferSourceNode = null;
+  // 清除流式请求控制器
+  clearStreamController() {
+    this.streamController = null;
+  }
+
+  // 将 base64 PCM 数据转换为 AudioBuffer
+  async pcmBase64ToAudioBuffer(
+    base64Data: string,
+    config: PCMConfig = {},
+  ): Promise<AudioBuffer> {
+    try {
+      // 解码 base64
+      const binaryString = atob(base64Data);
+      const bytes = new Uint8Array(binaryString.length);
+      for (let i = 0; i < binaryString.length; i++) {
+        bytes[i] = binaryString.charCodeAt(i);
+      }
+
+      // 转换为 AudioBuffer
+      return await this.pcmDataToAudioBuffer(bytes, config);
+    } catch (error) {
+      console.error("Failed to convert PCM base64 to AudioBuffer:", error);
+      throw error;
    }
-    if (audioContext) {
-      await audioContext.close();
-      audioContext = null;
-    }
-  };
+  }

-  return {
-    init,
-    play,
-    playQueue: playQueueMethod,
-    addToQueue,
-    startStreamPlay,
-    finishStreamPlay,
-    stop,
-  };
+  // 将 PCM 字节数据转换为 AudioBuffer
+  async pcmDataToAudioBuffer(
+    pcmData: Uint8Array,
+    config: PCMConfig = {},
+  ): Promise<AudioBuffer> {
+    const { channels = 1, sampleRate = 24000, bitDepth = 16 } = config;
+
+    const audioContext = this.getAudioContext;
+
+    return new Promise<AudioBuffer>((resolve, reject) => {
+      try {
+        let float32Array: Float32Array;
+
+        // 根据位深度选择转换方法
+        switch (bitDepth) {
+          case 16:
+            float32Array = this.pcm16ToFloat32(pcmData);
+            break;
+          default:
+            throw new Error(`Unsupported bit depth: ${bitDepth}`);
+        }
+
+        // 创建 AudioBuffer
+        const audioBuffer = audioContext.createBuffer(
+          channels,
+          float32Array.length / channels,
+          sampleRate,
+        );
+
+        // 复制数据到 AudioBuffer
+        for (let channel = 0; channel < channels; channel++) {
+          const channelData = audioBuffer.getChannelData(channel);
+          for (let i = 0; i < channelData.length; i++) {
+            channelData[i] = float32Array[i * channels + channel];
+          }
+        }
+
+        resolve(audioBuffer);
+      } catch (error) {
+        reject(error);
+      }
+    });
+  }
+
+  // 16位 PCM 转 32位浮点数
+  pcm16ToFloat32(pcmData: Uint8Array): Float32Array {
+    const length = pcmData.length / 2;
+    const float32Array = new Float32Array(length);
+
+    for (let i = 0; i < length; i++) {
+      const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2];
+      const int16Signed = int16 > 32767 ? int16 - 65536 : int16;
+      float32Array[i] = int16Signed / 32768;
+    }
+
+    return float32Array;
+  }
+}
+
+export function createTTSPlayer(): TTSPlayManager {
+  return new TTSPlayManager();
 }