feat: support gpt-4o-audio

This commit is contained in:
Laisky.Cai
2025-01-14 06:38:07 +00:00
parent b5c55a03cf
commit 9730ac3bb5
14 changed files with 378 additions and 189 deletions

View File

@@ -99,6 +99,12 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
}(request.Messages)
}
if request.Stream && strings.HasPrefix(request.Model, "gpt-4o-audio") {
// TODO: Since it is not clear how to implement billing in stream mode,
// it is temporarily not supported
return nil, errors.New("stream mode is not supported for gpt-4o-audio")
}
return request, nil
}

View File

@@ -9,6 +9,7 @@ var ModelList = []string{
"gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
"gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "chatgpt-4o-latest",
"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
"gpt-4-vision-preview",
"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",

View File

@@ -5,6 +5,7 @@ import (
"bytes"
"encoding/json"
"io"
"math"
"net/http"
"strings"
@@ -13,6 +14,7 @@ import (
"github.com/songquanpeng/one-api/common/conv"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/common/render"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
)
@@ -95,6 +97,7 @@ func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.E
return nil, responseText, usage
}
// Handler handles the non-stream response from OpenAI API
func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName string) (*model.ErrorWithStatusCode, *model.Usage) {
var textResponse SlimTextResponse
responseBody, err := io.ReadAll(resp.Body)
@@ -145,6 +148,22 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
CompletionTokens: completionTokens,
TotalTokens: promptTokens + completionTokens,
}
} else {
// Convert the more expensive audio tokens to uniformly priced text tokens
textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens +
int(math.Ceil(
float64(textResponse.CompletionTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName),
))
textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
int(math.Ceil(
float64(textResponse.CompletionTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName)*
ratio.GetAudioCompletionRatio(modelName),
))
textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
textResponse.Usage.CompletionTokens
}
return nil, &textResponse.Usage
}

View File

@@ -1,16 +1,22 @@
package openai
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"math"
"strings"
"github.com/pkg/errors"
"github.com/pkoukk/tiktoken-go"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/helper"
"github.com/songquanpeng/one-api/common/image"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/relay/billing/ratio"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/model"
"math"
"strings"
)
// tokenEncoderMap won't grow after initialization
@@ -70,8 +76,9 @@ func getTokenNum(tokenEncoder *tiktoken.Tiktoken, text string) int {
return len(tokenEncoder.Encode(text, nil, nil))
}
func CountTokenMessages(messages []model.Message, model string) int {
tokenEncoder := getTokenEncoder(model)
func CountTokenMessages(ctx context.Context,
messages []model.Message, actualModel string) int {
tokenEncoder := getTokenEncoder(actualModel)
// Reference:
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
// https://github.com/pkoukk/tiktoken-go/issues/6
@@ -79,7 +86,7 @@ func CountTokenMessages(messages []model.Message, model string) int {
// Every message follows <|start|>{role/name}\n{content}<|end|>\n
var tokensPerMessage int
var tokensPerName int
if model == "gpt-3.5-turbo-0301" {
if actualModel == "gpt-3.5-turbo-0301" {
tokensPerMessage = 4
tokensPerName = -1 // If there's a name, the role is omitted
} else {
@@ -89,37 +96,38 @@ func CountTokenMessages(messages []model.Message, model string) int {
tokenNum := 0
for _, message := range messages {
tokenNum += tokensPerMessage
switch v := message.Content.(type) {
case string:
tokenNum += getTokenNum(tokenEncoder, v)
case []any:
for _, it := range v {
m := it.(map[string]any)
switch m["type"] {
case "text":
if textValue, ok := m["text"]; ok {
if textString, ok := textValue.(string); ok {
tokenNum += getTokenNum(tokenEncoder, textString)
}
}
case "image_url":
imageUrl, ok := m["image_url"].(map[string]any)
if ok {
url := imageUrl["url"].(string)
detail := ""
if imageUrl["detail"] != nil {
detail = imageUrl["detail"].(string)
}
imageTokens, err := countImageTokens(url, detail, model)
if err != nil {
logger.SysError("error counting image tokens: " + err.Error())
} else {
tokenNum += imageTokens
}
}
contents := message.ParseContent()
for _, content := range contents {
switch content.Type {
case model.ContentTypeText:
tokenNum += getTokenNum(tokenEncoder, content.Text)
case model.ContentTypeImageURL:
imageTokens, err := countImageTokens(
content.ImageURL.Url,
content.ImageURL.Detail,
actualModel)
if err != nil {
logger.SysError("error counting image tokens: " + err.Error())
} else {
tokenNum += imageTokens
}
case model.ContentTypeInputAudio:
audioData, err := base64.StdEncoding.DecodeString(content.InputAudio.Data)
if err != nil {
logger.SysError("error decoding audio data: " + err.Error())
}
tokens, err := helper.GetAudioTokens(ctx,
bytes.NewReader(audioData),
ratio.GetAudioPromptTokensPerSecond(actualModel))
if err != nil {
logger.SysError("error counting audio tokens: " + err.Error())
} else {
tokenNum += tokens
}
}
}
tokenNum += getTokenNum(tokenEncoder, message.Role)
if message.Name != nil {
tokenNum += tokensPerName