mirror of
https://github.com/songquanpeng/one-api.git
synced 2025-11-09 10:13:42 +08:00
feat: support gpt-4o-audio
This commit is contained in:
@@ -99,6 +99,12 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
|
||||
}(request.Messages)
|
||||
}
|
||||
|
||||
if request.Stream && strings.HasPrefix(request.Model, "gpt-4o-audio") {
|
||||
// TODO: Since it is not clear how to implement billing in stream mode,
|
||||
// it is temporarily not supported
|
||||
return nil, errors.New("stream mode is not supported for gpt-4o-audio")
|
||||
}
|
||||
|
||||
return request, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ var ModelList = []string{
|
||||
"gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
|
||||
"gpt-4o", "gpt-4o-2024-05-13", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "chatgpt-4o-latest",
|
||||
"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
|
||||
"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
|
||||
"gpt-4-vision-preview",
|
||||
"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
|
||||
"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"github.com/songquanpeng/one-api/common/conv"
|
||||
"github.com/songquanpeng/one-api/common/logger"
|
||||
"github.com/songquanpeng/one-api/common/render"
|
||||
"github.com/songquanpeng/one-api/relay/billing/ratio"
|
||||
"github.com/songquanpeng/one-api/relay/model"
|
||||
"github.com/songquanpeng/one-api/relay/relaymode"
|
||||
)
|
||||
@@ -95,6 +97,7 @@ func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.E
|
||||
return nil, responseText, usage
|
||||
}
|
||||
|
||||
// Handler handles the non-stream response from OpenAI API
|
||||
func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName string) (*model.ErrorWithStatusCode, *model.Usage) {
|
||||
var textResponse SlimTextResponse
|
||||
responseBody, err := io.ReadAll(resp.Body)
|
||||
@@ -145,6 +148,22 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
|
||||
CompletionTokens: completionTokens,
|
||||
TotalTokens: promptTokens + completionTokens,
|
||||
}
|
||||
} else {
|
||||
// Convert the more expensive audio tokens to uniformly priced text tokens
|
||||
textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens +
|
||||
int(math.Ceil(
|
||||
float64(textResponse.CompletionTokensDetails.AudioTokens)*
|
||||
ratio.GetAudioPromptRatio(modelName),
|
||||
))
|
||||
textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
|
||||
int(math.Ceil(
|
||||
float64(textResponse.CompletionTokensDetails.AudioTokens)*
|
||||
ratio.GetAudioPromptRatio(modelName)*
|
||||
ratio.GetAudioCompletionRatio(modelName),
|
||||
))
|
||||
textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
|
||||
textResponse.Usage.CompletionTokens
|
||||
}
|
||||
|
||||
return nil, &textResponse.Usage
|
||||
}
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/pkoukk/tiktoken-go"
|
||||
"github.com/songquanpeng/one-api/common/config"
|
||||
"github.com/songquanpeng/one-api/common/helper"
|
||||
"github.com/songquanpeng/one-api/common/image"
|
||||
"github.com/songquanpeng/one-api/common/logger"
|
||||
"github.com/songquanpeng/one-api/relay/billing/ratio"
|
||||
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
|
||||
"github.com/songquanpeng/one-api/relay/model"
|
||||
"math"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// tokenEncoderMap won't grow after initialization
|
||||
@@ -70,8 +76,9 @@ func getTokenNum(tokenEncoder *tiktoken.Tiktoken, text string) int {
|
||||
return len(tokenEncoder.Encode(text, nil, nil))
|
||||
}
|
||||
|
||||
func CountTokenMessages(messages []model.Message, model string) int {
|
||||
tokenEncoder := getTokenEncoder(model)
|
||||
func CountTokenMessages(ctx context.Context,
|
||||
messages []model.Message, actualModel string) int {
|
||||
tokenEncoder := getTokenEncoder(actualModel)
|
||||
// Reference:
|
||||
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
||||
// https://github.com/pkoukk/tiktoken-go/issues/6
|
||||
@@ -79,7 +86,7 @@ func CountTokenMessages(messages []model.Message, model string) int {
|
||||
// Every message follows <|start|>{role/name}\n{content}<|end|>\n
|
||||
var tokensPerMessage int
|
||||
var tokensPerName int
|
||||
if model == "gpt-3.5-turbo-0301" {
|
||||
if actualModel == "gpt-3.5-turbo-0301" {
|
||||
tokensPerMessage = 4
|
||||
tokensPerName = -1 // If there's a name, the role is omitted
|
||||
} else {
|
||||
@@ -89,37 +96,38 @@ func CountTokenMessages(messages []model.Message, model string) int {
|
||||
tokenNum := 0
|
||||
for _, message := range messages {
|
||||
tokenNum += tokensPerMessage
|
||||
switch v := message.Content.(type) {
|
||||
case string:
|
||||
tokenNum += getTokenNum(tokenEncoder, v)
|
||||
case []any:
|
||||
for _, it := range v {
|
||||
m := it.(map[string]any)
|
||||
switch m["type"] {
|
||||
case "text":
|
||||
if textValue, ok := m["text"]; ok {
|
||||
if textString, ok := textValue.(string); ok {
|
||||
tokenNum += getTokenNum(tokenEncoder, textString)
|
||||
}
|
||||
}
|
||||
case "image_url":
|
||||
imageUrl, ok := m["image_url"].(map[string]any)
|
||||
if ok {
|
||||
url := imageUrl["url"].(string)
|
||||
detail := ""
|
||||
if imageUrl["detail"] != nil {
|
||||
detail = imageUrl["detail"].(string)
|
||||
}
|
||||
imageTokens, err := countImageTokens(url, detail, model)
|
||||
if err != nil {
|
||||
logger.SysError("error counting image tokens: " + err.Error())
|
||||
} else {
|
||||
tokenNum += imageTokens
|
||||
}
|
||||
}
|
||||
contents := message.ParseContent()
|
||||
for _, content := range contents {
|
||||
switch content.Type {
|
||||
case model.ContentTypeText:
|
||||
tokenNum += getTokenNum(tokenEncoder, content.Text)
|
||||
case model.ContentTypeImageURL:
|
||||
imageTokens, err := countImageTokens(
|
||||
content.ImageURL.Url,
|
||||
content.ImageURL.Detail,
|
||||
actualModel)
|
||||
if err != nil {
|
||||
logger.SysError("error counting image tokens: " + err.Error())
|
||||
} else {
|
||||
tokenNum += imageTokens
|
||||
}
|
||||
case model.ContentTypeInputAudio:
|
||||
audioData, err := base64.StdEncoding.DecodeString(content.InputAudio.Data)
|
||||
if err != nil {
|
||||
logger.SysError("error decoding audio data: " + err.Error())
|
||||
}
|
||||
|
||||
tokens, err := helper.GetAudioTokens(ctx,
|
||||
bytes.NewReader(audioData),
|
||||
ratio.GetAudioPromptTokensPerSecond(actualModel))
|
||||
if err != nil {
|
||||
logger.SysError("error counting audio tokens: " + err.Error())
|
||||
} else {
|
||||
tokenNum += tokens
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tokenNum += getTokenNum(tokenEncoder, message.Role)
|
||||
if message.Name != nil {
|
||||
tokenNum += tokensPerName
|
||||
|
||||
Reference in New Issue
Block a user