feat: support openai tts

2025-11-13 12:43:42 +08:00 · 2024-03-03 15:15:23 +08:00
parent d2733a9128
commit c3656609ab
17 changed files with 475 additions and 20 deletions
--- a/app/client/api.ts
+++ b/app/client/api.ts
@@ -13,6 +13,7 @@ export const ROLES = ["system", "user", "assistant"] as const;
 export type MessageRole = (typeof ROLES)[number];

 export const Models = ["gpt-3.5-turbo", "gpt-4"] as const;
+export const TTSModels = ["tts-1", "tts-1-hd"] as const;
 export type ChatModel = ModelType;

 export interface RequestMessage {
@@ -36,6 +37,15 @@ export interface LLMAgentConfig {
  useTools?: (string | undefined)[];
 }

+export interface SpeechOptions {
+  model: string;
+  input: string;
+  voice: string;
+  response_format?: string;
+  speed?: number;
+  onController?: (controller: AbortController) => void;
+}
+
 export interface ChatOptions {
  messages: RequestMessage[];
  config: LLMConfig;
@@ -76,6 +86,7 @@ export interface LLMModelProvider {

 export abstract class LLMApi {
  abstract chat(options: ChatOptions): Promise<void>;
+  abstract speech(options: SpeechOptions): Promise<ArrayBuffer>;
  abstract toolAgentChat(options: AgentChatOptions): Promise<void>;
  abstract usage(): Promise<LLMUsage>;
  abstract models(): Promise<LLMModel[]>;
--- a/app/client/platforms/google.ts
+++ b/app/client/platforms/google.ts
@@ -11,6 +11,7 @@ import {
  LLMApi,
  LLMModel,
  LLMUsage,
+  SpeechOptions,
 } from "../api";
 import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
 import axios from "axios";
@@ -22,6 +23,9 @@ const getImageBase64Data = async (url: string) => {
 };

 export class GeminiProApi implements LLMApi {
+  speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    throw new Error("Method not implemented.");
+  }
  toolAgentChat(options: AgentChatOptions): Promise<void> {
    throw new Error("Method not implemented.");
  }
--- a/app/client/platforms/openai.ts
+++ b/app/client/platforms/openai.ts
@@ -16,6 +16,7 @@ import {
  LLMApi,
  LLMModel,
  LLMUsage,
+  SpeechOptions,
 } from "../api";
 import Locale from "../../locales";
 import {
@@ -80,6 +81,44 @@ export class ChatGPTApi implements LLMApi {
    return res.choices?.at(0)?.message?.content ?? "";
  }

+  async speech(options: SpeechOptions): Promise<ArrayBuffer> {
+    const requestPayload = {
+      model: options.model,
+      input: options.input,
+      voice: options.voice,
+      response_format: options.response_format,
+      speed: options.speed,
+    };
+
+    console.log("[Request] openai speech payload: ", requestPayload);
+
+    const controller = new AbortController();
+    options.onController?.(controller);
+
+    try {
+      const speechPath = this.path(OpenaiPath.SpeechPath, options.model);
+      const speechPayload = {
+        method: "POST",
+        body: JSON.stringify(requestPayload),
+        signal: controller.signal,
+        headers: getHeaders(),
+      };
+
+      // make a fetch request
+      const requestTimeoutId = setTimeout(
+        () => controller.abort(),
+        REQUEST_TIMEOUT_MS,
+      );
+
+      const res = await fetch(speechPath, speechPayload);
+      clearTimeout(requestTimeoutId);
+      return await res.arrayBuffer();
+    } catch (e) {
+      console.log("[Request] failed to make a speech request", e);
+      throw e;
+    }
+  }
+
  async chat(options: ChatOptions) {
    const messages: any[] = [];