feat: 添加 TTS 引擎配置，更新阿里巴巴语音接口，支持实时语音合成

2026-02-15 10:54:24 +08:00 · 2025-07-30 21:30:49 +08:00
parent 557a2cce35
commit 9990a89698
9 changed files with 241 additions and 45 deletions
--- a/app/client/platforms/alibaba.ts
+++ b/app/client/platforms/alibaba.ts
@@ -1,5 +1,5 @@
 "use client";
-import { ApiPath, Alibaba, ALIBABA_BASE_URL } from "@/app/constant";
+import { ApiPath, Alibaba, ALIBABA_BASE_URL, REQUEST_TIMEOUT_MS } from "@/app/constant";
 import {
  useAccessStore,
  useAppConfig,
@@ -89,8 +89,66 @@ export class QwenApi implements LLMApi {
    return res?.output?.choices?.at(0)?.message?.content ?? "";
  }

-  speech(options: SpeechOptions): Promise<ArrayBuffer> {
-    throw new Error("Method not implemented.");
+  async speech(options: SpeechOptions): Promise<AudioBuffer> {
+    const requestPayload = {
+        model: options.model,
+        input: {
+            text: options.input,
+            voice: options.voice,
+        },
+        speed: options.speed,
+        response_format: options.response_format,
+    };
+    console.log("[Request] alibaba speech payload: ", requestPayload);
+    const controller = new AbortController();
+    options.onController?.(controller);
+    try {
+        const speechPath = this.path(Alibaba.SpeechPath);
+        const speechPayload = {
+          method: "POST",
+          body: JSON.stringify(requestPayload),
+          signal: controller.signal,
+          headers: {
+            ...getHeaders(),
+            "X-DashScope-SSE": "enable",
+          },
+        };
+  
+        // make a fetch request
+        const requestTimeoutId = setTimeout(
+          () => controller.abort(),
+          REQUEST_TIMEOUT_MS,
+        );
+  
+        const res = await fetch(speechPath, speechPayload);
+
+        const reader = res.body!.getReader();
+        const decoder = new TextDecoder();
+        let buffer = "";
+        let base64 = "";
+        while (true) {
+            const { done, value } = await reader.read();
+            if (done) break;
+            buffer += decoder.decode(value, { stream: true, });
+                const lines = buffer.split('\n');
+                buffer = lines.pop() || '';
+
+                for (const line of lines) {
+                    if (line.startsWith('data:')) {
+                        const data = line.slice(5);
+                        const json = JSON.parse(data);
+                        base64 += json.output.audio.data;
+                    }
+                }
+        }
+        const audioBuffer = await this.PCMBase64ToAudioBuffer(base64);
+        clearTimeout(requestTimeoutId);
+        reader.releaseLock();
+        return audioBuffer;
+      } catch (e) {
+        console.log("[Request] failed to make a speech request", e);
+        throw e;
+      }
  }

  async chat(options: ChatOptions) {
@@ -273,5 +331,71 @@ export class QwenApi implements LLMApi {
  async models(): Promise<LLMModel[]> {
    return [];
  }
+
+  // 播放 PCM base64 数据
+  private async PCMBase64ToAudioBuffer(base64Data: string) {
+    try {
+        // 解码 base64
+        const binaryString = atob(base64Data);
+        const bytes = new Uint8Array(binaryString.length);
+        for (let i = 0; i < binaryString.length; i++) {
+            bytes[i] = binaryString.charCodeAt(i);
+        }
+
+        // 转换为 AudioBuffer
+        const audioBuffer = await this.convertToAudioBuffer(bytes);
+        
+        return audioBuffer;
+    } catch (error) {
+        console.error('播放 PCM 数据失败:', error);
+        throw error;
+    }
+  }
+  
+   // 将 PCM 字节数据转换为 AudioBuffer
+   private convertToAudioBuffer(pcmData: Uint8Array) {
+    const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+    const channels = 1;
+    const sampleRate = 24000;
+    return new Promise<AudioBuffer>((resolve, reject) => {
+        try {
+            let float32Array;
+            // 16位 PCM 转换为 32位浮点数
+            float32Array = this.pcm16ToFloat32(pcmData);
+
+            // 创建 AudioBuffer
+            const audioBuffer = audioContext.createBuffer(
+                channels,
+                float32Array.length / channels,
+                sampleRate
+            );
+
+            // 复制数据到 AudioBuffer
+            for (let channel = 0; channel < channels; channel++) {
+                const channelData = audioBuffer.getChannelData(channel);
+                for (let i = 0; i < channelData.length; i++) {
+                    channelData[i] = float32Array[i * channels + channel];
+                }
+            }
+
+            resolve(audioBuffer);
+        } catch (error) {
+            reject(error);
+        }
+    });
+  }
+    // 16位 PCM 转 32位浮点数
+    private pcm16ToFloat32(pcmData: Uint8Array) {
+        const length = pcmData.length / 2;
+        const float32Array = new Float32Array(length);
+        
+        for (let i = 0; i < length; i++) {
+            const int16 = (pcmData[i * 2 + 1] << 8) | pcmData[i * 2];
+            const int16Signed = int16 > 32767 ? int16 - 65536 : int16;
+            float32Array[i] = int16Signed / 32768;
+        }
+        
+        return float32Array;
+    }
 }
-export { Alibaba };
+export { Alibaba };