This commit is contained in:
Laisky.Cai
2025-01-31 19:25:30 +08:00
committed by GitHub
25 changed files with 670 additions and 273 deletions

View File

@@ -8,11 +8,14 @@ import (
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/doubao"
"github.com/songquanpeng/one-api/relay/adaptor/minimax"
"github.com/songquanpeng/one-api/relay/adaptor/novita"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/constant/role"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
@@ -82,6 +85,28 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.G
}
request.StreamOptions.IncludeUsage = true
}
// o1/o1-mini/o1-preview do not support system prompt and max_tokens
// refer: https://platform.openai.com/docs/api-reference/chat/create#chat-create-messages
if strings.HasPrefix(request.Model, "o1") {
request.MaxTokens = 0
request.Messages = func(raw []model.Message) (filtered []model.Message) {
for i := range raw {
if raw[i].Role != role.System {
filtered = append(filtered, raw[i])
}
}
return
}(request.Messages)
}
if request.Stream && strings.HasPrefix(request.Model, "gpt-4o-audio") && !config.EnforceIncludeUsage {
// TODO: Since it is not clear how to implement billing in stream mode,
// it is temporarily not supported
return nil, errors.New("set ENFORCE_INCLUDE_USAGE=true to enable stream mode for gpt-4o-audio")
}
return request, nil
}

View File

@@ -12,6 +12,7 @@ var ModelList = []string{
"gpt-4o-2024-11-20",
"chatgpt-4o-latest",
"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
"gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-12-17", "gpt-4o-audio-preview-2024-10-01",
"gpt-4-vision-preview",
"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",

View File

@@ -5,15 +5,16 @@ import (
"bytes"
"encoding/json"
"io"
"math"
"net/http"
"strings"
"github.com/songquanpeng/one-api/common/render"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common"
"github.com/songquanpeng/one-api/common/conv"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/common/render"
"github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
)
@@ -96,6 +97,7 @@ func StreamHandler(c *gin.Context, resp *http.Response, relayMode int) (*model.E
return nil, responseText, usage
}
// Handler handles the non-stream response from OpenAI API
func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName string) (*model.ErrorWithStatusCode, *model.Usage) {
var textResponse SlimTextResponse
responseBody, err := io.ReadAll(resp.Body)
@@ -116,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
StatusCode: resp.StatusCode,
}, nil
}
// Reset response body
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody))
// We shouldn't set the header before we parse the response body, because the parse part may fail.
// And then we will have to send an error response, but in this case, the header has already been set.
@@ -146,6 +150,24 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
CompletionTokens: completionTokens,
TotalTokens: promptTokens + completionTokens,
}
} else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 {
// Convert the more expensive audio tokens to uniformly priced text tokens.
// Note that when there are no audio tokens in prompt and completion,
// OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading.
textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens +
int(math.Ceil(
float64(textResponse.PromptTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName),
))
textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
int(math.Ceil(
float64(textResponse.CompletionTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName),
))
textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
textResponse.Usage.CompletionTokens
}
return nil, &textResponse.Usage
}

View File

@@ -1,6 +1,10 @@
package openai
import "github.com/songquanpeng/one-api/relay/model"
import (
"mime/multipart"
"github.com/songquanpeng/one-api/relay/model"
)
type TextContent struct {
Type string `json:"type,omitempty"`
@@ -71,6 +75,24 @@ type TextToSpeechRequest struct {
ResponseFormat string `json:"response_format"`
}
type AudioTranscriptionRequest struct {
File *multipart.FileHeader `form:"file" binding:"required"`
Model string `form:"model" binding:"required"`
Language string `form:"language"`
Prompt string `form:"prompt"`
ReponseFormat string `form:"response_format" binding:"oneof=json text srt verbose_json vtt"`
Temperature float64 `form:"temperature"`
TimestampGranularity []string `form:"timestamp_granularity"`
}
type AudioTranslationRequest struct {
File *multipart.FileHeader `form:"file" binding:"required"`
Model string `form:"model" binding:"required"`
Prompt string `form:"prompt"`
ResponseFormat string `form:"response_format" binding:"oneof=json text srt verbose_json vtt"`
Temperature float64 `form:"temperature"`
}
type UsageOrResponseText struct {
*model.Usage
ResponseText string

View File

@@ -1,16 +1,22 @@
package openai
import (
"errors"
"bytes"
"context"
"encoding/base64"
"fmt"
"github.com/pkoukk/tiktoken-go"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/image"
"github.com/songquanpeng/one-api/common/logger"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/model"
"math"
"strings"
"github.com/pkg/errors"
"github.com/pkoukk/tiktoken-go"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/helper"
"github.com/songquanpeng/one-api/common/image"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/relay/billing/ratio"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/model"
)
// tokenEncoderMap won't grow after initialization
@@ -70,8 +76,9 @@ func getTokenNum(tokenEncoder *tiktoken.Tiktoken, text string) int {
return len(tokenEncoder.Encode(text, nil, nil))
}
func CountTokenMessages(messages []model.Message, model string) int {
tokenEncoder := getTokenEncoder(model)
func CountTokenMessages(ctx context.Context,
messages []model.Message, actualModel string) int {
tokenEncoder := getTokenEncoder(actualModel)
// Reference:
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
// https://github.com/pkoukk/tiktoken-go/issues/6
@@ -79,47 +86,52 @@ func CountTokenMessages(messages []model.Message, model string) int {
// Every message follows <|start|>{role/name}\n{content}<|end|>\n
var tokensPerMessage int
var tokensPerName int
if model == "gpt-3.5-turbo-0301" {
if actualModel == "gpt-3.5-turbo-0301" {
tokensPerMessage = 4
tokensPerName = -1 // If there's a name, the role is omitted
} else {
tokensPerMessage = 3
tokensPerName = 1
}
tokenNum := 0
var totalAudioTokens float64
for _, message := range messages {
tokenNum += tokensPerMessage
switch v := message.Content.(type) {
case string:
tokenNum += getTokenNum(tokenEncoder, v)
case []any:
for _, it := range v {
m := it.(map[string]any)
switch m["type"] {
case "text":
if textValue, ok := m["text"]; ok {
if textString, ok := textValue.(string); ok {
tokenNum += getTokenNum(tokenEncoder, textString)
}
}
case "image_url":
imageUrl, ok := m["image_url"].(map[string]any)
if ok {
url := imageUrl["url"].(string)
detail := ""
if imageUrl["detail"] != nil {
detail = imageUrl["detail"].(string)
}
imageTokens, err := countImageTokens(url, detail, model)
if err != nil {
logger.SysError("error counting image tokens: " + err.Error())
} else {
tokenNum += imageTokens
}
}
contents := message.ParseContent()
for _, content := range contents {
switch content.Type {
case model.ContentTypeText:
tokenNum += getTokenNum(tokenEncoder, content.Text)
case model.ContentTypeImageURL:
imageTokens, err := countImageTokens(
content.ImageURL.Url,
content.ImageURL.Detail,
actualModel)
if err != nil {
logger.SysError("error counting image tokens: " + err.Error())
} else {
tokenNum += imageTokens
}
case model.ContentTypeInputAudio:
audioData, err := base64.StdEncoding.DecodeString(content.InputAudio.Data)
if err != nil {
logger.SysError("error decoding audio data: " + err.Error())
}
audioTokens, err := helper.GetAudioTokens(ctx,
bytes.NewReader(audioData),
ratio.GetAudioPromptTokensPerSecond(actualModel))
if err != nil {
logger.SysError("error counting audio tokens: " + err.Error())
} else {
totalAudioTokens += audioTokens
}
}
}
tokenNum += int(math.Ceil(totalAudioTokens))
tokenNum += getTokenNum(tokenEncoder, message.Role)
if message.Name != nil {
tokenNum += tokensPerName

View File

@@ -60,7 +60,6 @@ func (a *Adaptor) GetChannelName() string {
func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
prefix := fmt.Sprintf("/v1/oneapi/proxy/%d", meta.ChannelId)
return meta.BaseURL + strings.TrimPrefix(meta.RequestURLPath, prefix), nil
}
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *meta.Meta) error {

View File

@@ -23,65 +23,71 @@ const (
// 1 === ¥0.014 / 1k tokens
var ModelRatio = map[string]float64{
// https://openai.com/pricing
"gpt-4": 15,
"gpt-4-0314": 15,
"gpt-4-0613": 15,
"gpt-4-32k": 30,
"gpt-4-32k-0314": 30,
"gpt-4-32k-0613": 30,
"gpt-4-1106-preview": 5, // $0.01 / 1K tokens
"gpt-4-0125-preview": 5, // $0.01 / 1K tokens
"gpt-4-turbo-preview": 5, // $0.01 / 1K tokens
"gpt-4-turbo": 5, // $0.01 / 1K tokens
"gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens
"gpt-4o": 2.5, // $0.005 / 1K tokens
"chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens
"gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens
"gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens
"gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens
"gpt-4o-mini": 0.075, // $0.00015 / 1K tokens
"gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens
"gpt-4-vision-preview": 5, // $0.01 / 1K tokens
"gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens
"gpt-3.5-turbo-0301": 0.75,
"gpt-3.5-turbo-0613": 0.75,
"gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens
"gpt-3.5-turbo-16k-0613": 1.5,
"gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens
"gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens
"gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens
"o1": 7.5, // $15.00 / 1M input tokens
"o1-2024-12-17": 7.5,
"o1-preview": 7.5, // $15.00 / 1M input tokens
"o1-preview-2024-09-12": 7.5,
"o1-mini": 1.5, // $3.00 / 1M input tokens
"o1-mini-2024-09-12": 1.5,
"davinci-002": 1, // $0.002 / 1K tokens
"babbage-002": 0.2, // $0.0004 / 1K tokens
"text-ada-001": 0.2,
"text-babbage-001": 0.25,
"text-curie-001": 1,
"text-davinci-002": 10,
"text-davinci-003": 10,
"text-davinci-edit-001": 10,
"code-davinci-edit-001": 10,
"whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
"tts-1": 7.5, // $0.015 / 1K characters
"tts-1-1106": 7.5,
"tts-1-hd": 15, // $0.030 / 1K characters
"tts-1-hd-1106": 15,
"davinci": 10,
"curie": 10,
"babbage": 10,
"ada": 10,
"text-embedding-ada-002": 0.05,
"text-embedding-3-small": 0.01,
"text-embedding-3-large": 0.065,
"text-search-ada-doc-001": 10,
"text-moderation-stable": 0.1,
"text-moderation-latest": 0.1,
"dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image
"dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image
"gpt-4": 15,
"gpt-4-0314": 15,
"gpt-4-0613": 15,
"gpt-4-32k": 30,
"gpt-4-32k-0314": 30,
"gpt-4-32k-0613": 30,
"gpt-4-1106-preview": 5, // $0.01 / 1K tokens
"gpt-4-0125-preview": 5, // $0.01 / 1K tokens
"gpt-4-turbo-preview": 5, // $0.01 / 1K tokens
"gpt-4-turbo": 5, // $0.01 / 1K tokens
"gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens
"gpt-4o": 2.5, // $0.005 / 1K tokens
"chatgpt-4o-latest": 2.5, // $0.005 / 1K tokens
"gpt-4o-2024-05-13": 2.5, // $0.005 / 1K tokens
"gpt-4o-2024-08-06": 1.25, // $0.0025 / 1K tokens
"gpt-4o-2024-11-20": 1.25, // $0.0025 / 1K tokens
"gpt-4o-mini": 0.075, // $0.00015 / 1K tokens
"gpt-4o-mini-2024-07-18": 0.075, // $0.00015 / 1K tokens
"gpt-4-vision-preview": 5, // $0.01 / 1K tokens
// Audio billing will mix text and audio tokens, the unit price is different.
// Here records the cost of text, the cost multiplier of audio
// relative to text is in AudioRatio
"gpt-4o-audio-preview": 1.25, // $0.0025 / 1K tokens
"gpt-4o-audio-preview-2024-12-17": 1.25, // $0.0025 / 1K tokens
"gpt-4o-audio-preview-2024-10-01": 1.25, // $0.0025 / 1K tokens
"gpt-3.5-turbo": 0.25, // $0.0005 / 1K tokens
"gpt-3.5-turbo-0301": 0.75,
"gpt-3.5-turbo-0613": 0.75,
"gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens
"gpt-3.5-turbo-16k-0613": 1.5,
"gpt-3.5-turbo-instruct": 0.75, // $0.0015 / 1K tokens
"gpt-3.5-turbo-1106": 0.5, // $0.001 / 1K tokens
"gpt-3.5-turbo-0125": 0.25, // $0.0005 / 1K tokens
"o1": 7.5, // $15.00 / 1M input tokens
"o1-2024-12-17": 7.5,
"o1-preview": 7.5, // $15.00 / 1M input tokens
"o1-preview-2024-09-12": 7.5,
"o1-mini": 1.5, // $3.00 / 1M input tokens
"o1-mini-2024-09-12": 1.5,
"davinci-002": 1, // $0.002 / 1K tokens
"babbage-002": 0.2, // $0.0004 / 1K tokens
"text-ada-001": 0.2,
"text-babbage-001": 0.25,
"text-curie-001": 1,
"text-davinci-002": 10,
"text-davinci-003": 10,
"text-davinci-edit-001": 10,
"code-davinci-edit-001": 10,
"whisper-1": 15,
"tts-1": 7.5, // $0.015 / 1K characters
"tts-1-1106": 7.5,
"tts-1-hd": 15, // $0.030 / 1K characters
"tts-1-hd-1106": 15,
"davinci": 10,
"curie": 10,
"babbage": 10,
"ada": 10,
"text-embedding-ada-002": 0.05,
"text-embedding-3-small": 0.01,
"text-embedding-3-large": 0.065,
"text-search-ada-doc-001": 10,
"text-moderation-stable": 0.1,
"text-moderation-latest": 0.1,
"dall-e-2": 0.02 * USD, // $0.016 - $0.020 / image
"dall-e-3": 0.04 * USD, // $0.040 - $0.120 / image
// https://www.anthropic.com/api#pricing
"claude-instant-1.2": 0.8 / 1000 * USD,
"claude-2.0": 8.0 / 1000 * USD,
@@ -256,7 +262,6 @@ var ModelRatio = map[string]float64{
"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000000 * USD,
"llama3-groq-8b-8192-tool-use-preview": 0.19 / 1000000 * USD,
"mixtral-8x7b-32768": 0.24 / 1000000 * USD,
// https://platform.lingyiwanwu.com/docs#-计费单元
"yi-34b-chat-0205": 2.5 / 1000 * RMB,
"yi-34b-chat-200k": 12.0 / 1000 * RMB,
@@ -335,6 +340,69 @@ var ModelRatio = map[string]float64{
"mistralai/mixtral-8x7b-instruct-v0.1": 0.300 * USD,
}
// AudioRatio represents the price ratio between audio tokens and text tokens
var AudioRatio = map[string]float64{
"gpt-4o-audio-preview": 16,
"gpt-4o-audio-preview-2024-12-17": 16,
"gpt-4o-audio-preview-2024-10-01": 40,
}
// GetAudioPromptRatio returns the audio prompt ratio for the given model.
func GetAudioPromptRatio(actualModelName string) float64 {
var v float64
if ratio, ok := AudioRatio[actualModelName]; ok {
v = ratio
} else {
v = 16
}
return v
}
// AudioCompletionRatio is the completion ratio for audio models.
var AudioCompletionRatio = map[string]float64{
"whisper-1": 0,
"gpt-4o-audio-preview": 2,
"gpt-4o-audio-preview-2024-12-17": 2,
"gpt-4o-audio-preview-2024-10-01": 2,
}
// GetAudioCompletionRatio returns the completion ratio for audio models.
func GetAudioCompletionRatio(actualModelName string) float64 {
var v float64
if ratio, ok := AudioCompletionRatio[actualModelName]; ok {
v = ratio
} else {
v = 2
}
return v
}
// AudioTokensPerSecond is the number of audio tokens per second for each model.
var AudioPromptTokensPerSecond = map[string]float64{
// whisper 的 API 价格是 $0.0001/sec。one-api 的历史倍率为 15对应 $0.03/kilo_tokens。
// 那么换算后可得,每秒的 tokens 应该为 0.0001/0.03*1000 = 3.3333
"whisper-1": 0.0001 / 0.03 * 1000,
// gpt-4o-audio series processes 10 tokens per second
"gpt-4o-audio-preview": 10,
"gpt-4o-audio-preview-2024-12-17": 10,
"gpt-4o-audio-preview-2024-10-01": 10,
}
// GetAudioPromptTokensPerSecond returns the number of audio tokens per second
// for the given model.
func GetAudioPromptTokensPerSecond(actualModelName string) float64 {
var v float64
if tokensPerSecond, ok := AudioPromptTokensPerSecond[actualModelName]; ok {
v = tokensPerSecond
} else {
v = 10
}
return v
}
var CompletionRatio = map[string]float64{
// aws llama3
"llama3-8b-8192(33)": 0.0006 / 0.0003,
@@ -402,19 +470,21 @@ func GetModelRatio(name string, channelType int) float64 {
if strings.HasPrefix(name, "command-") && strings.HasSuffix(name, "-internet") {
name = strings.TrimSuffix(name, "-internet")
}
model := fmt.Sprintf("%s(%d)", name, channelType)
if ratio, ok := ModelRatio[model]; ok {
return ratio
}
if ratio, ok := DefaultModelRatio[model]; ok {
return ratio
}
if ratio, ok := ModelRatio[name]; ok {
return ratio
}
if ratio, ok := DefaultModelRatio[name]; ok {
return ratio
for _, targetName := range []string{model, name} {
for _, ratioMap := range []map[string]float64{
ModelRatio,
DefaultModelRatio,
AudioRatio,
} {
if ratio, ok := ratioMap[targetName]; ok {
return ratio
}
}
}
logger.SysError("model ratio not found: " + name)
return 30
}
@@ -437,18 +507,19 @@ func GetCompletionRatio(name string, channelType int) float64 {
name = strings.TrimSuffix(name, "-internet")
}
model := fmt.Sprintf("%s(%d)", name, channelType)
if ratio, ok := CompletionRatio[model]; ok {
return ratio
}
if ratio, ok := DefaultCompletionRatio[model]; ok {
return ratio
}
if ratio, ok := CompletionRatio[name]; ok {
return ratio
}
if ratio, ok := DefaultCompletionRatio[name]; ok {
return ratio
for _, targetName := range []string{model, name} {
for _, ratioMap := range []map[string]float64{
CompletionRatio,
DefaultCompletionRatio,
AudioCompletionRatio,
} {
if ratio, ok := ratioMap[targetName]; ok {
return ratio
}
}
}
if strings.HasPrefix(name, "gpt-3.5") {
if name == "gpt-3.5-turbo" || strings.HasSuffix(name, "0125") {
// https://openai.com/blog/new-embedding-models-and-api-updates

View File

@@ -3,4 +3,5 @@ package role
const (
System = "system"
Assistant = "assistant"
Developer = "developer"
)

View File

@@ -5,21 +5,24 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"math"
"mime/multipart"
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/pkg/errors"
"github.com/songquanpeng/one-api/common"
"github.com/songquanpeng/one-api/common/client"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/common/helper"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/model"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/billing"
"github.com/songquanpeng/one-api/relay/billing/ratio"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/meta"
@@ -27,6 +30,35 @@ import (
"github.com/songquanpeng/one-api/relay/relaymode"
)
type commonAudioRequest struct {
File *multipart.FileHeader `form:"file" binding:"required"`
}
func countAudioTokens(c *gin.Context) (float64, error) {
body, err := common.GetRequestBody(c)
if err != nil {
return 0, errors.WithStack(err)
}
reqBody := new(commonAudioRequest)
c.Request.Body = io.NopCloser(bytes.NewReader(body))
if err = c.ShouldBind(reqBody); err != nil {
return 0, errors.WithStack(err)
}
reqFp, err := reqBody.File.Open()
if err != nil {
return 0, errors.WithStack(err)
}
defer reqFp.Close()
ctxMeta := meta.GetByContext(c)
return helper.GetAudioTokens(c.Request.Context(),
reqFp,
ratio.GetAudioPromptTokensPerSecond(ctxMeta.ActualModelName))
}
func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatusCode {
ctx := c.Request.Context()
meta := meta.GetByContext(c)
@@ -63,9 +95,19 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
case relaymode.AudioSpeech:
preConsumedQuota = int64(float64(len(ttsRequest.Input)) * ratio)
quota = preConsumedQuota
case relaymode.AudioTranscription,
relaymode.AudioTranslation:
audioTokens, err := countAudioTokens(c)
if err != nil {
return openai.ErrorWrapper(err, "count_audio_tokens_failed", http.StatusInternalServerError)
}
preConsumedQuota = int64(math.Ceil(audioTokens * ratio))
quota = preConsumedQuota
default:
preConsumedQuota = int64(float64(config.PreConsumedQuota) * ratio)
return openai.ErrorWrapper(errors.New("unexpected_relay_mode"), "unexpected_relay_mode", http.StatusInternalServerError)
}
userQuota, err := model.CacheGetUserQuota(ctx, userId)
if err != nil {
return openai.ErrorWrapper(err, "get_user_quota_failed", http.StatusInternalServerError)
@@ -139,7 +181,7 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
return openai.ErrorWrapper(err, "new_request_body_failed", http.StatusInternalServerError)
}
c.Request.Body = io.NopCloser(bytes.NewBuffer(requestBody.Bytes()))
responseFormat := c.DefaultPostForm("response_format", "json")
// responseFormat := c.DefaultPostForm("response_format", "json")
req, err := http.NewRequest(c.Request.Method, fullRequestURL, requestBody)
if err != nil {
@@ -172,47 +214,53 @@ func RelayAudioHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
return openai.ErrorWrapper(err, "close_request_body_failed", http.StatusInternalServerError)
}
if relayMode != relaymode.AudioSpeech {
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return openai.ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError)
}
err = resp.Body.Close()
if err != nil {
return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError)
}
// https://github.com/Laisky/one-api/pull/21
// Commenting out the following code because Whisper's transcription
// only charges for the length of the input audio, not for the output.
// -------------------------------------
// if relayMode != relaymode.AudioSpeech {
// responseBody, err := io.ReadAll(resp.Body)
// if err != nil {
// return openai.ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError)
// }
// err = resp.Body.Close()
// if err != nil {
// return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError)
// }
var openAIErr openai.SlimTextResponse
if err = json.Unmarshal(responseBody, &openAIErr); err == nil {
if openAIErr.Error.Message != "" {
return openai.ErrorWrapper(fmt.Errorf("type %s, code %v, message %s", openAIErr.Error.Type, openAIErr.Error.Code, openAIErr.Error.Message), "request_error", http.StatusInternalServerError)
}
}
// var openAIErr openai.SlimTextResponse
// if err = json.Unmarshal(responseBody, &openAIErr); err == nil {
// if openAIErr.Error.Message != "" {
// return openai.ErrorWrapper(errors.Errorf("type %s, code %v, message %s", openAIErr.Error.Type, openAIErr.Error.Code, openAIErr.Error.Message), "request_error", http.StatusInternalServerError)
// }
// }
// var text string
// switch responseFormat {
// case "json":
// text, err = getTextFromJSON(responseBody)
// case "text":
// text, err = getTextFromText(responseBody)
// case "srt":
// text, err = getTextFromSRT(responseBody)
// case "verbose_json":
// text, err = getTextFromVerboseJSON(responseBody)
// case "vtt":
// text, err = getTextFromVTT(responseBody)
// default:
// return openai.ErrorWrapper(errors.New("unexpected_response_format"), "unexpected_response_format", http.StatusInternalServerError)
// }
// if err != nil {
// return openai.ErrorWrapper(err, "get_text_from_body_err", http.StatusInternalServerError)
// }
// quota = int64(openai.CountTokenText(text, audioModel))
// resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
// }
var text string
switch responseFormat {
case "json":
text, err = getTextFromJSON(responseBody)
case "text":
text, err = getTextFromText(responseBody)
case "srt":
text, err = getTextFromSRT(responseBody)
case "verbose_json":
text, err = getTextFromVerboseJSON(responseBody)
case "vtt":
text, err = getTextFromVTT(responseBody)
default:
return openai.ErrorWrapper(errors.New("unexpected_response_format"), "unexpected_response_format", http.StatusInternalServerError)
}
if err != nil {
return openai.ErrorWrapper(err, "get_text_from_body_err", http.StatusInternalServerError)
}
quota = int64(openai.CountTokenText(text, audioModel))
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
}
if resp.StatusCode != http.StatusOK {
return RelayErrorHandler(resp)
}
succeed = true
quotaDelta := quota - preConsumedQuota
defer func(ctx context.Context) {

View File

@@ -19,6 +19,7 @@ import (
"github.com/songquanpeng/one-api/relay/adaptor/openai"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/constant/role"
"github.com/songquanpeng/one-api/relay/controller/validator"
"github.com/songquanpeng/one-api/relay/meta"
relaymodel "github.com/songquanpeng/one-api/relay/model"
@@ -44,10 +45,10 @@ func getAndValidateTextRequest(c *gin.Context, relayMode int) (*relaymodel.Gener
return textRequest, nil
}
func getPromptTokens(textRequest *relaymodel.GeneralOpenAIRequest, relayMode int) int {
func getPromptTokens(ctx context.Context, textRequest *relaymodel.GeneralOpenAIRequest, relayMode int) int {
switch relayMode {
case relaymode.ChatCompletions:
return openai.CountTokenMessages(textRequest.Messages, textRequest.Model)
return openai.CountTokenMessages(ctx, textRequest.Messages, textRequest.Model)
case relaymode.Completions:
return openai.CountTokenInput(textRequest.Prompt, textRequest.Model)
case relaymode.Moderations:
@@ -131,17 +132,6 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
model.UpdateChannelUsedQuota(meta.ChannelId, quota)
}
func getMappedModelName(modelName string, mapping map[string]string) (string, bool) {
if mapping == nil {
return modelName, false
}
mappedModelName := mapping[modelName]
if mappedModelName != "" {
return mappedModelName, true
}
return modelName, false
}
func isErrorHappened(meta *meta.Meta, resp *http.Response) bool {
if resp == nil {
if meta.ChannelType == channeltype.AwsClaude {

View File

@@ -19,7 +19,7 @@ import (
"github.com/songquanpeng/one-api/relay/adaptor/openai"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/meta"
relaymeta "github.com/songquanpeng/one-api/relay/meta"
relaymodel "github.com/songquanpeng/one-api/relay/model"
)
@@ -66,7 +66,7 @@ func getImageSizeRatio(model string, size string) float64 {
return 1
}
func validateImageRequest(imageRequest *relaymodel.ImageRequest, _ *meta.Meta) *relaymodel.ErrorWithStatusCode {
func validateImageRequest(imageRequest *relaymodel.ImageRequest, _ *relaymeta.Meta) *relaymodel.ErrorWithStatusCode {
// check prompt length
if imageRequest.Prompt == "" {
return openai.ErrorWrapper(errors.New("prompt is required"), "prompt_missing", http.StatusBadRequest)
@@ -105,7 +105,7 @@ func getImageCostRatio(imageRequest *relaymodel.ImageRequest) (float64, error) {
func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatusCode {
ctx := c.Request.Context()
meta := meta.GetByContext(c)
meta := relaymeta.GetByContext(c)
imageRequest, err := getImageRequest(c, meta.Mode)
if err != nil {
logger.Errorf(ctx, "getImageRequest failed: %s", err.Error())
@@ -115,7 +115,8 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
// map model name
var isModelMapped bool
meta.OriginModelName = imageRequest.Model
imageRequest.Model, isModelMapped = getMappedModelName(imageRequest.Model, meta.ModelMapping)
imageRequest.Model = meta.ActualModelName
isModelMapped = meta.OriginModelName != meta.ActualModelName
meta.ActualModelName = imageRequest.Model
// model validation
@@ -131,7 +132,7 @@ func RelayImageHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatus
imageModel := imageRequest.Model
// Convert the original image model
imageRequest.Model, _ = getMappedModelName(imageRequest.Model, billingratio.ImageOriginModelName)
imageRequest.Model = relaymeta.GetMappedModelName(imageRequest.Model, billingratio.ImageOriginModelName)
c.Set("response_format", imageRequest.ResponseFormat)
var requestBody io.Reader

View File

@@ -4,11 +4,12 @@ import (
"bytes"
"encoding/json"
"fmt"
"github.com/songquanpeng/one-api/common/config"
"io"
"net/http"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/logger"
"github.com/songquanpeng/one-api/relay"
"github.com/songquanpeng/one-api/relay/adaptor"
@@ -17,13 +18,13 @@ import (
"github.com/songquanpeng/one-api/relay/billing"
billingratio "github.com/songquanpeng/one-api/relay/billing/ratio"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
relaymeta "github.com/songquanpeng/one-api/relay/meta"
relaymodel "github.com/songquanpeng/one-api/relay/model"
)
func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
func RelayTextHelper(c *gin.Context) *relaymodel.ErrorWithStatusCode {
ctx := c.Request.Context()
meta := meta.GetByContext(c)
meta := relaymeta.GetByContext(c)
// get & validate textRequest
textRequest, err := getAndValidateTextRequest(c, meta.Mode)
if err != nil {
@@ -34,7 +35,7 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
// map model name
meta.OriginModelName = textRequest.Model
textRequest.Model, _ = getMappedModelName(textRequest.Model, meta.ModelMapping)
textRequest.Model = meta.ActualModelName
meta.ActualModelName = textRequest.Model
// set system prompt if not empty
systemPromptReset := setSystemPrompt(ctx, textRequest, meta.SystemPrompt)
@@ -43,7 +44,7 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
groupRatio := billingratio.GetGroupRatio(meta.Group)
ratio := modelRatio * groupRatio
// pre-consume quota
promptTokens := getPromptTokens(textRequest, meta.Mode)
promptTokens := getPromptTokens(c.Request.Context(), textRequest, meta.Mode)
meta.PromptTokens = promptTokens
preConsumedQuota, bizErr := preConsumeQuota(ctx, textRequest, promptTokens, ratio, meta)
if bizErr != nil {
@@ -86,9 +87,12 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
return nil
}
func getRequestBody(c *gin.Context, meta *meta.Meta, textRequest *model.GeneralOpenAIRequest, adaptor adaptor.Adaptor) (io.Reader, error) {
if !config.EnforceIncludeUsage && meta.APIType == apitype.OpenAI && meta.OriginModelName == meta.ActualModelName && meta.ChannelType != channeltype.Baichuan {
// no need to convert request for openai
func getRequestBody(c *gin.Context, meta *relaymeta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, adaptor adaptor.Adaptor) (io.Reader, error) {
if !config.EnforceIncludeUsage &&
meta.APIType == apitype.OpenAI &&
meta.OriginModelName == meta.ActualModelName &&
meta.ChannelType != channeltype.OpenAI && // openai also need to convert request
meta.ChannelType != channeltype.Baichuan {
return c.Request.Body, nil
}

View File

@@ -1,12 +1,13 @@
package meta
import (
"strings"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/ctxkey"
"github.com/songquanpeng/one-api/model"
"github.com/songquanpeng/one-api/relay/channeltype"
"github.com/songquanpeng/one-api/relay/relaymode"
"strings"
)
type Meta struct {
@@ -33,6 +34,20 @@ type Meta struct {
SystemPrompt string
}
// GetMappedModelName returns the mapped model name and a bool indicating if the model name is mapped
func GetMappedModelName(modelName string, mapping map[string]string) string {
if mapping == nil {
return modelName
}
mappedModelName := mapping[modelName]
if mappedModelName != "" {
return mappedModelName
}
return modelName
}
func GetByContext(c *gin.Context) *Meta {
meta := Meta{
Mode: relaymode.GetByPath(c.Request.URL.Path),
@@ -44,6 +59,7 @@ func GetByContext(c *gin.Context) *Meta {
Group: c.GetString(ctxkey.Group),
ModelMapping: c.GetStringMapString(ctxkey.ModelMapping),
OriginModelName: c.GetString(ctxkey.RequestModel),
ActualModelName: c.GetString(ctxkey.RequestModel),
BaseURL: c.GetString(ctxkey.BaseURL),
APIKey: strings.TrimPrefix(c.Request.Header.Get("Authorization"), "Bearer "),
RequestURLPath: c.Request.URL.String(),
@@ -57,5 +73,13 @@ func GetByContext(c *gin.Context) *Meta {
meta.BaseURL = channeltype.ChannelBaseURLs[meta.ChannelType]
}
meta.APIType = channeltype.ToAPIType(meta.ChannelType)
meta.ActualModelName = GetMappedModelName(meta.OriginModelName, meta.ModelMapping)
Set2Context(c, &meta)
return &meta
}
func Set2Context(c *gin.Context, meta *Meta) {
c.Set(ctxkey.Meta, meta)
}

View File

@@ -23,36 +23,37 @@ type StreamOptions struct {
type GeneralOpenAIRequest struct {
// https://platform.openai.com/docs/api-reference/chat/create
Messages []Message `json:"messages,omitempty"`
Model string `json:"model,omitempty"`
Store *bool `json:"store,omitempty"`
Metadata any `json:"metadata,omitempty"`
FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
LogitBias any `json:"logit_bias,omitempty"`
Logprobs *bool `json:"logprobs,omitempty"`
TopLogprobs *int `json:"top_logprobs,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"`
N int `json:"n,omitempty"`
Modalities []string `json:"modalities,omitempty"`
Prediction any `json:"prediction,omitempty"`
Audio *Audio `json:"audio,omitempty"`
PresencePenalty *float64 `json:"presence_penalty,omitempty"`
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
Seed float64 `json:"seed,omitempty"`
ServiceTier *string `json:"service_tier,omitempty"`
Stop any `json:"stop,omitempty"`
Stream bool `json:"stream,omitempty"`
StreamOptions *StreamOptions `json:"stream_options,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
Tools []Tool `json:"tools,omitempty"`
ToolChoice any `json:"tool_choice,omitempty"`
ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
User string `json:"user,omitempty"`
FunctionCall any `json:"function_call,omitempty"`
Functions any `json:"functions,omitempty"`
Messages []Message `json:"messages,omitempty"`
Model string `json:"model,omitempty"`
Store *bool `json:"store,omitempty"`
Metadata any `json:"metadata,omitempty"`
FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
LogitBias any `json:"logit_bias,omitempty"`
Logprobs *bool `json:"logprobs,omitempty"`
TopLogprobs *int `json:"top_logprobs,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
MaxCompletionTokens *int `json:"max_completion_tokens,omitempty"`
N int `json:"n,omitempty"`
// Modalities currently the model only programmatically allows modalities = [“text”, “audio”]
Modalities []string `json:"modalities,omitempty"`
Prediction any `json:"prediction,omitempty"`
Audio *Audio `json:"audio,omitempty"`
PresencePenalty *float64 `json:"presence_penalty,omitempty"`
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
Seed float64 `json:"seed,omitempty"`
ServiceTier *string `json:"service_tier,omitempty"`
Stop any `json:"stop,omitempty"`
Stream bool `json:"stream,omitempty"`
StreamOptions *StreamOptions `json:"stream_options,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
Tools []Tool `json:"tools,omitempty"`
ToolChoice any `json:"tool_choice,omitempty"`
ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
User string `json:"user,omitempty"`
FunctionCall any `json:"function_call,omitempty"`
Functions any `json:"functions,omitempty"`
// https://platform.openai.com/docs/api-reference/embeddings/create
Input any `json:"input,omitempty"`
EncodingFormat string `json:"encoding_format,omitempty"`

View File

@@ -1,11 +1,26 @@
package model
import (
"context"
"github.com/songquanpeng/one-api/common/logger"
)
type Message struct {
Role string `json:"role,omitempty"`
Content any `json:"content,omitempty"`
Name *string `json:"name,omitempty"`
ToolCalls []Tool `json:"tool_calls,omitempty"`
ToolCallId string `json:"tool_call_id,omitempty"`
Role string `json:"role,omitempty"`
// Content is a string or a list of objects
Content any `json:"content,omitempty"`
Name *string `json:"name,omitempty"`
ToolCalls []Tool `json:"tool_calls,omitempty"`
ToolCallId string `json:"tool_call_id,omitempty"`
Audio *messageAudio `json:"audio,omitempty"`
}
type messageAudio struct {
Id string `json:"id"`
Data string `json:"data,omitempty"`
ExpiredAt int `json:"expired_at,omitempty"`
Transcript string `json:"transcript,omitempty"`
}
func (m Message) IsStringContent() bool {
@@ -26,6 +41,7 @@ func (m Message) StringContent() string {
if !ok {
continue
}
if contentMap["type"] == ContentTypeText {
if subStr, ok := contentMap["text"].(string); ok {
contentStr += subStr
@@ -34,6 +50,7 @@ func (m Message) StringContent() string {
}
return contentStr
}
return ""
}
@@ -47,6 +64,7 @@ func (m Message) ParseContent() []MessageContent {
})
return contentList
}
anyList, ok := m.Content.([]any)
if ok {
for _, contentItem := range anyList {
@@ -71,8 +89,21 @@ func (m Message) ParseContent() []MessageContent {
},
})
}
case ContentTypeInputAudio:
if subObj, ok := contentMap["input_audio"].(map[string]any); ok {
contentList = append(contentList, MessageContent{
Type: ContentTypeInputAudio,
InputAudio: &InputAudio{
Data: subObj["data"].(string),
Format: subObj["format"].(string),
},
})
}
default:
logger.Warnf(context.TODO(), "unknown content type: %s", contentMap["type"])
}
}
return contentList
}
return nil
@@ -84,7 +115,18 @@ type ImageURL struct {
}
type MessageContent struct {
Type string `json:"type,omitempty"`
Text string `json:"text"`
ImageURL *ImageURL `json:"image_url,omitempty"`
// Type should be one of the following: text/input_audio
Type string `json:"type,omitempty"`
Text string `json:"text"`
ImageURL *ImageURL `json:"image_url,omitempty"`
InputAudio *InputAudio `json:"input_audio,omitempty"`
}
type InputAudio struct {
// Data is the base64 encoded audio data
Data string `json:"data" binding:"required"`
// Format is the audio format, should be one of the
// following: mp3/mp4/mpeg/mpga/m4a/wav/webm/pcm16.
// When stream=true, format should be pcm16
Format string `json:"format"`
}

View File

@@ -4,6 +4,12 @@ type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
// PromptTokensDetails may be empty for some models
PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
// CompletionTokensDetails may be empty for some models
CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
ServiceTier string `gorm:"-" json:"service_tier,omitempty"`
SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"`
}
type Error struct {
@@ -17,3 +23,20 @@ type ErrorWithStatusCode struct {
Error
StatusCode int `json:"status_code"`
}
type usagePromptTokensDetails struct {
CachedTokens int `json:"cached_tokens"`
AudioTokens int `json:"audio_tokens"`
// TextTokens could be zero for pure text chats
TextTokens int `json:"text_tokens"`
ImageTokens int `json:"image_tokens"`
}
type usageCompletionTokensDetails struct {
ReasoningTokens int `json:"reasoning_tokens"`
AudioTokens int `json:"audio_tokens"`
AcceptedPredictionTokens int `json:"accepted_prediction_tokens"`
RejectedPredictionTokens int `json:"rejected_prediction_tokens"`
// TextTokens could be zero for pure text chats
TextTokens int `json:"text_tokens"`
}