fix: implement audio duration retrieval and add tests

This commit is contained in:
Laisky.Cai
2025-01-08 03:55:02 +00:00
parent c1a0471e73
commit e17017eb4a
8 changed files with 162 additions and 75 deletions

View File

@@ -1,6 +1,10 @@
package openai
import "github.com/songquanpeng/one-api/relay/model"
import (
"mime/multipart"
"github.com/songquanpeng/one-api/relay/model"
)
type TextContent struct {
Type string `json:"type,omitempty"`
@@ -71,6 +75,24 @@ type TextToSpeechRequest struct {
ResponseFormat string `json:"response_format"`
}
type AudioTranscriptionRequest struct {
File *multipart.FileHeader `form:"file" binding:"required"`
Model string `form:"model" binding:"required"`
Language string `form:"language"`
Prompt string `form:"prompt"`
ReponseFormat string `form:"response_format" binding:"oneof=json text srt verbose_json vtt"`
Temperature float64 `form:"temperature"`
TimestampGranularity []string `form:"timestamp_granularity"`
}
type AudioTranslationRequest struct {
File *multipart.FileHeader `form:"file" binding:"required"`
Model string `form:"model" binding:"required"`
Prompt string `form:"prompt"`
ResponseFormat string `form:"response_format" binding:"oneof=json text srt verbose_json vtt"`
Temperature float64 `form:"temperature"`
}
type UsageOrResponseText struct {
*model.Usage
ResponseText string

View File

@@ -8,6 +8,7 @@ import (
"fmt"
"io"
"math"
"mime/multipart"
"net/http"
"os"
"strings"
@@ -33,22 +34,40 @@ const (
TokensPerSecond = 1000 / 20 // $0.006 / minute -> $0.002 / 20 seconds -> $0.002 / 1K tokens
)
type commonAudioRequest struct {
File *multipart.FileHeader `form:"file" binding:"required"`
}
func countAudioTokens(c *gin.Context) (int, error) {
body, err := common.GetRequestBody(c)
if err != nil {
return 0, errors.WithStack(err)
}
reqBody := new(commonAudioRequest)
c.Request.Body = io.NopCloser(bytes.NewReader(body))
if err = c.ShouldBind(reqBody); err != nil {
return 0, errors.WithStack(err)
}
reqFp, err := reqBody.File.Open()
if err != nil {
return 0, errors.WithStack(err)
}
fp, err := os.CreateTemp("", "audio-*")
if err != nil {
return 0, errors.WithStack(err)
}
defer os.Remove(fp.Name())
_, err = io.Copy(fp, bytes.NewReader(body))
_, err = io.Copy(fp, reqFp)
if err != nil {
return 0, errors.WithStack(err)
}
if err = fp.Close(); err != nil {
return 0, errors.WithStack(err)
}
duration, err := helper.GetAudioDuration(c.Request.Context(), fp.Name())
if err != nil {