mirror of
https://github.com/songquanpeng/one-api.git
synced 2025-11-14 12:23:41 +08:00
✨ add transcriptions api
This commit is contained in:
@@ -151,6 +151,8 @@ func (p *AliProvider) streamResponseAli2OpenAI(aliResponse *AliChatResponse) *ty
|
||||
|
||||
// 发送流请求
|
||||
func (p *AliProvider) sendStreamRequest(req *http.Request) (usage *types.Usage, errWithCode *types.OpenAIErrorWithStatusCode) {
|
||||
defer req.Body.Close()
|
||||
|
||||
usage = &types.Usage{}
|
||||
// 发送请求
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
|
||||
@@ -124,6 +124,8 @@ func (p *BaiduProvider) streamResponseBaidu2OpenAI(baiduResponse *BaiduChatStrea
|
||||
}
|
||||
|
||||
func (p *BaiduProvider) sendStreamRequest(req *http.Request) (usage *types.Usage, errWithCode *types.OpenAIErrorWithStatusCode) {
|
||||
defer req.Body.Close()
|
||||
|
||||
usage = &types.Usage{}
|
||||
// 发送请求
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
|
||||
@@ -54,6 +54,7 @@ func (p *BaseProvider) CommonRequestHeaders(headers map[string]string) {
|
||||
|
||||
// 发送请求
|
||||
func (p *BaseProvider) SendRequest(req *http.Request, response ProviderResponseHandler, rawOutput bool) (openAIErrorWithStatusCode *types.OpenAIErrorWithStatusCode) {
|
||||
defer req.Body.Close()
|
||||
|
||||
resp, openAIErrorWithStatusCode := common.SendRequest(req, response, true)
|
||||
if openAIErrorWithStatusCode != nil {
|
||||
@@ -95,6 +96,7 @@ func (p *BaseProvider) SendRequest(req *http.Request, response ProviderResponseH
|
||||
}
|
||||
|
||||
func (p *BaseProvider) SendRequestRaw(req *http.Request) (openAIErrorWithStatusCode *types.OpenAIErrorWithStatusCode) {
|
||||
defer req.Body.Close()
|
||||
|
||||
// 发送请求
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
|
||||
@@ -44,6 +44,12 @@ type SpeechInterface interface {
|
||||
SpeechAction(request *types.SpeechAudioRequest, isModelMapped bool, promptTokens int) (usage *types.Usage, errWithCode *types.OpenAIErrorWithStatusCode)
|
||||
}
|
||||
|
||||
// 语音转文字接口
|
||||
type TranscriptionsInterface interface {
|
||||
ProviderInterface
|
||||
TranscriptionsAction(request *types.AudioRequest, isModelMapped bool, promptTokens int) (usage *types.Usage, errWithCode *types.OpenAIErrorWithStatusCode)
|
||||
}
|
||||
|
||||
// 余额接口
|
||||
type BalanceInterface interface {
|
||||
BalanceAction(channel *model.Channel) (float64, error)
|
||||
|
||||
@@ -134,6 +134,8 @@ func (p *ClaudeProvider) streamResponseClaude2OpenAI(claudeResponse *ClaudeRespo
|
||||
}
|
||||
|
||||
func (p *ClaudeProvider) sendStreamRequest(req *http.Request) (*types.OpenAIErrorWithStatusCode, string) {
|
||||
defer req.Body.Close()
|
||||
|
||||
// 发送请求
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
if err != nil {
|
||||
|
||||
@@ -93,6 +93,7 @@ func (p *OpenAIProvider) getRequestBody(request any, isModelMapped bool) (reques
|
||||
|
||||
// 发送流式请求
|
||||
func (p *OpenAIProvider) sendStreamRequest(req *http.Request, response OpenAIProviderStreamResponseHandler) (openAIErrorWithStatusCode *types.OpenAIErrorWithStatusCode, responseText string) {
|
||||
defer req.Body.Close()
|
||||
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,15 +1,5 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"one-api/types"
|
||||
)
|
||||
|
||||
type OpenAIProviderResponseHandler interface {
|
||||
// 请求处理函数
|
||||
responseHandler(resp *http.Response) (errWithCode *types.OpenAIErrorWithStatusCode)
|
||||
}
|
||||
|
||||
type OpenAIProviderStreamResponseHandler interface {
|
||||
// 请求流处理函数
|
||||
responseStreamHandler() (responseText string)
|
||||
|
||||
181
providers/openai/transcriptions.go
Normal file
181
providers/openai/transcriptions.go
Normal file
@@ -0,0 +1,181 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
"one-api/types"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func (c *OpenAIProviderTranscriptionsResponse) ResponseHandler(resp *http.Response) (OpenAIResponse any, errWithCode *types.OpenAIErrorWithStatusCode) {
|
||||
if c.Error.Type != "" {
|
||||
errWithCode = &types.OpenAIErrorWithStatusCode{
|
||||
OpenAIError: c.Error,
|
||||
StatusCode: resp.StatusCode,
|
||||
}
|
||||
return
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (c *OpenAIProviderTranscriptionsTextResponse) ResponseHandler(resp *http.Response) (OpenAIResponse any, errWithCode *types.OpenAIErrorWithStatusCode) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (p *OpenAIProvider) TranscriptionsAction(request *types.AudioRequest, isModelMapped bool, promptTokens int) (usage *types.Usage, errWithCode *types.OpenAIErrorWithStatusCode) {
|
||||
fullRequestURL := p.GetFullRequestURL(p.AudioTranscriptions, request.Model)
|
||||
headers := p.GetRequestHeaders()
|
||||
|
||||
client := common.NewClient()
|
||||
|
||||
var formBody bytes.Buffer
|
||||
var req *http.Request
|
||||
var err error
|
||||
if isModelMapped {
|
||||
builder := client.CreateFormBuilder(&formBody)
|
||||
if err := audioMultipartForm(request, builder); err != nil {
|
||||
return nil, types.ErrorWrapper(err, "create_form_builder_failed", http.StatusInternalServerError)
|
||||
}
|
||||
req, err = client.NewRequest(p.Context.Request.Method, fullRequestURL, common.WithBody(&formBody), common.WithHeader(headers), common.WithContentType(builder.FormDataContentType()))
|
||||
|
||||
} else {
|
||||
req, err = client.NewRequest(p.Context.Request.Method, fullRequestURL, common.WithBody(p.Context.Request.Body), common.WithHeader(headers), common.WithContentType(p.Context.Request.Header.Get("Content-Type")))
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, types.ErrorWrapper(err, "new_request_failed", http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
var textResponse string
|
||||
if hasJSONResponse(request) {
|
||||
openAIProviderTranscriptionsResponse := &OpenAIProviderTranscriptionsResponse{}
|
||||
errWithCode = p.SendRequest(req, openAIProviderTranscriptionsResponse, true)
|
||||
if errWithCode != nil {
|
||||
return
|
||||
}
|
||||
textResponse = openAIProviderTranscriptionsResponse.Text
|
||||
} else {
|
||||
openAIProviderTranscriptionsTextResponse := new(OpenAIProviderTranscriptionsTextResponse)
|
||||
errWithCode = p.SendRequest(req, openAIProviderTranscriptionsTextResponse, true)
|
||||
if errWithCode != nil {
|
||||
return
|
||||
}
|
||||
textResponse = getTextContent(*openAIProviderTranscriptionsTextResponse.GetString(), request.ResponseFormat)
|
||||
}
|
||||
|
||||
completionTokens := common.CountTokenText(textResponse, request.Model)
|
||||
usage = &types.Usage{
|
||||
PromptTokens: promptTokens,
|
||||
CompletionTokens: completionTokens,
|
||||
TotalTokens: promptTokens + completionTokens,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func hasJSONResponse(request *types.AudioRequest) bool {
|
||||
return request.ResponseFormat == "" || request.ResponseFormat == "json" || request.ResponseFormat == "verbose_json"
|
||||
}
|
||||
|
||||
func audioMultipartForm(request *types.AudioRequest, b common.FormBuilder) error {
|
||||
|
||||
err := b.CreateFormFile("file", request.File)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating form file: %w", err)
|
||||
}
|
||||
|
||||
err = b.WriteField("model", request.Model)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing model name: %w", err)
|
||||
}
|
||||
|
||||
if request.Prompt != "" {
|
||||
err = b.WriteField("prompt", request.Prompt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing prompt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if request.ResponseFormat != "" {
|
||||
err = b.WriteField("response_format", request.ResponseFormat)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing format: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if request.Temperature != 0 {
|
||||
err = b.WriteField("temperature", fmt.Sprintf("%.2f", request.Temperature))
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing temperature: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if request.Language != "" {
|
||||
err = b.WriteField("language", request.Language)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing language: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return b.Close()
|
||||
}
|
||||
|
||||
func getTextContent(text, format string) string {
|
||||
switch format {
|
||||
case "srt":
|
||||
return extractTextFromSRT(text)
|
||||
case "vtt":
|
||||
return extractTextFromVTT(text)
|
||||
default:
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
func extractTextFromVTT(vttContent string) string {
|
||||
scanner := bufio.NewScanner(strings.NewReader(vttContent))
|
||||
re := regexp.MustCompile(`\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}`)
|
||||
text := []string{}
|
||||
isStart := true
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if isStart && strings.HasPrefix(line, "WEBVTT") {
|
||||
isStart = false
|
||||
continue
|
||||
}
|
||||
if !re.MatchString(line) && !isNumber(line) && line != "" {
|
||||
text = append(text, line)
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(text, " ")
|
||||
}
|
||||
|
||||
func extractTextFromSRT(srtContent string) string {
|
||||
scanner := bufio.NewScanner(strings.NewReader(srtContent))
|
||||
re := regexp.MustCompile(`\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}`)
|
||||
text := []string{}
|
||||
isContent := false
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if re.MatchString(line) {
|
||||
isContent = true
|
||||
} else if line == "" {
|
||||
isContent = false
|
||||
} else if isContent {
|
||||
text = append(text, line)
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(text, " ")
|
||||
}
|
||||
|
||||
func isNumber(s string) bool {
|
||||
_, err := strconv.Atoi(s)
|
||||
return err == nil
|
||||
}
|
||||
@@ -26,3 +26,14 @@ type OpenAIProviderModerationResponse struct {
|
||||
types.ModerationResponse
|
||||
types.OpenAIErrorResponse
|
||||
}
|
||||
|
||||
type OpenAIProviderTranscriptionsResponse struct {
|
||||
types.AudioResponse
|
||||
types.OpenAIErrorResponse
|
||||
}
|
||||
|
||||
type OpenAIProviderTranscriptionsTextResponse string
|
||||
|
||||
func (a *OpenAIProviderTranscriptionsTextResponse) GetString() *string {
|
||||
return (*string)(a)
|
||||
}
|
||||
|
||||
@@ -128,6 +128,8 @@ func (p *PalmProvider) streamResponsePaLM2OpenAI(palmResponse *PaLMChatResponse)
|
||||
}
|
||||
|
||||
func (p *PalmProvider) sendStreamRequest(req *http.Request) (*types.OpenAIErrorWithStatusCode, string) {
|
||||
defer req.Body.Close()
|
||||
|
||||
// 发送请求
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
if err != nil {
|
||||
|
||||
@@ -140,6 +140,7 @@ func (p *TencentProvider) streamResponseTencent2OpenAI(TencentResponse *TencentC
|
||||
}
|
||||
|
||||
func (p *TencentProvider) sendStreamRequest(req *http.Request) (*types.OpenAIErrorWithStatusCode, string) {
|
||||
defer req.Body.Close()
|
||||
// 发送请求
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
if err != nil {
|
||||
@@ -208,6 +209,5 @@ func (p *TencentProvider) sendStreamRequest(req *http.Request) (*types.OpenAIErr
|
||||
return false
|
||||
}
|
||||
})
|
||||
|
||||
return nil, responseText
|
||||
}
|
||||
|
||||
@@ -139,6 +139,8 @@ func (p *ZhipuProvider) streamMetaResponseZhipu2OpenAI(zhipuResponse *ZhipuStrea
|
||||
}
|
||||
|
||||
func (p *ZhipuProvider) sendStreamRequest(req *http.Request) (*types.OpenAIErrorWithStatusCode, *types.Usage) {
|
||||
defer req.Body.Close()
|
||||
|
||||
// 发送请求
|
||||
resp, err := common.HttpClient.Do(req)
|
||||
if err != nil {
|
||||
@@ -221,6 +223,5 @@ func (p *ZhipuProvider) sendStreamRequest(req *http.Request) (*types.OpenAIError
|
||||
return false
|
||||
}
|
||||
})
|
||||
|
||||
return nil, usage
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user