feat: support whisper

This commit is contained in:
Hk-Gosuto
2024-03-18 16:02:39 +08:00
parent f10fa91432
commit bab838b9c6
6 changed files with 216 additions and 28 deletions

View File

@@ -7,6 +7,7 @@ import {
LLMModel,
LLMUsage,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getClientConfig } from "@/app/config/client";
@@ -18,6 +19,9 @@ import {
} from "@/app/utils";
export class GeminiProApi implements LLMApi {
transcription(options: TranscriptionOptions): Promise<string> {
throw new Error("Method not implemented.");
}
speech(options: SpeechOptions): Promise<ArrayBuffer> {
throw new Error("Method not implemented.");
}

View File

@@ -18,6 +18,7 @@ import {
LLMUsage,
MultimodalContent,
SpeechOptions,
TranscriptionOptions,
} from "../api";
import Locale from "../../locales";
import {
@@ -124,6 +125,47 @@ export class ChatGPTApi implements LLMApi {
}
}
async transcription(options: TranscriptionOptions): Promise<string> {
const formData = new FormData();
formData.append("file", options.file, "audio.wav");
formData.append("model", options.model ?? "whisper-1");
if (options.language) formData.append("language", options.language);
if (options.prompt) formData.append("prompt", options.prompt);
if (options.response_format)
formData.append("response_format", options.response_format);
if (options.temperature)
formData.append("temperature", options.temperature.toString());
console.log("[Request] openai audio transcriptions payload: ", options);
const controller = new AbortController();
options.onController?.(controller);
try {
const path = this.path(OpenaiPath.TranscriptionPath, options.model);
const payload = {
method: "POST",
body: formData,
signal: controller.signal,
headers: getHeaders(),
};
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
REQUEST_TIMEOUT_MS,
);
const res = await fetch(path, payload);
clearTimeout(requestTimeoutId);
const json = await res.json();
return json.text;
} catch (e) {
console.log("[Request] failed to make a audio transcriptions request", e);
throw e;
}
}
async chat(options: ChatOptions) {
const visionModel = isVisionModel(options.config.model);
const messages = options.messages.map((v) => ({