diff --git a/common/model-ratio.go b/common/model-ratio.go index 77167f9..d866394 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -54,6 +54,7 @@ var defaultModelRatio = map[string]float64{ "gpt-4-turbo": 5, // $0.01 / 1K tokens "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens //"gpt-3.5-turbo-0301": 0.75, //deprecated + "gpt-3.5-turbo": 0.25, "gpt-3.5-turbo-0613": 0.75, "gpt-3.5-turbo-16k": 1.5, // $0.003 / 1K tokens "gpt-3.5-turbo-16k-0613": 1.5, diff --git a/dto/openai_response.go b/dto/openai_response.go index 5d13773..0f603ad 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -128,7 +128,9 @@ type CompletionsStreamResponse struct { } type Usage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"` + CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"` } diff --git a/dto/realtime.go b/dto/realtime.go index cca99f3..e28d813 100644 --- a/dto/realtime.go +++ b/dto/realtime.go @@ -47,6 +47,7 @@ type InputTokenDetails struct { CachedTokens int `json:"cached_tokens"` TextTokens int `json:"text_tokens"` AudioTokens int `json:"audio_tokens"` + ImageTokens int `json:"image_tokens"` } type OutputTokenDetails struct { diff --git a/relay/common/relay_info.go b/relay/common/relay_info.go index 21e3691..76ae036 100644 --- a/relay/common/relay_info.go +++ b/relay/common/relay_info.go @@ -41,6 +41,7 @@ type RelayInfo struct { OutputAudioFormat string RealtimeTools []dto.RealTimeTool IsFirstRequest bool + AudioUsage bool } func GenRelayInfoWs(c *gin.Context, ws *websocket.Conn) *RelayInfo { diff --git a/relay/relay-text.go b/relay/relay-text.go index 463947f..061a5e2 100644 --- a/relay/relay-text.go +++ b/relay/relay-text.go @@ -207,7 +207,12 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) { service.ResetStatusCode(openaiErr, statusCodeMappingStr) return openaiErr } - postConsumeQuota(c, relayInfo, textRequest.Model, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, getModelPriceSuccess, "") + + if strings.HasPrefix(relayInfo.UpstreamModelName, "gpt-4o-audio") { + service.PostAudioConsumeQuota(c, relayInfo, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, getModelPriceSuccess, "") + } else { + postConsumeQuota(c, relayInfo, textRequest.Model, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, getModelPriceSuccess, "") + } return nil } diff --git a/service/log.go b/service/log.go index e5354cd..4372130 100644 --- a/service/log.go +++ b/service/log.go @@ -19,12 +19,26 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m return other } -func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, modelPrice float64) map[string]interface{} { +func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} { info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) info["ws"] = true info["audio_input"] = usage.InputTokenDetails.AudioTokens info["audio_output"] = usage.OutputTokenDetails.AudioTokens info["text_input"] = usage.InputTokenDetails.TextTokens info["text_output"] = usage.OutputTokenDetails.TextTokens + info["audio_ratio"] = audioRatio + info["audio_completion_ratio"] = audioCompletionRatio + return info +} + +func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} { + info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) + info["audio"] = true + info["audio_input"] = usage.PromptTokensDetails.AudioTokens + info["audio_output"] = usage.CompletionTokenDetails.AudioTokens + info["text_input"] = usage.PromptTokensDetails.TextTokens + info["text_output"] = usage.CompletionTokenDetails.TextTokens + info["audio_ratio"] = audioRatio + info["audio_completion_ratio"] = audioCompletionRatio return info } diff --git a/service/quota.go b/service/quota.go index 1974ad7..695c073 100644 --- a/service/quota.go +++ b/service/quota.go @@ -9,7 +9,6 @@ import ( "one-api/dto" "one-api/model" relaycommon "one-api/relay/common" - "strings" "time" ) @@ -124,18 +123,79 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod } logModel := modelName - if strings.HasPrefix(logModel, "gpt-4-gizmo") { - logModel = "gpt-4-gizmo-*" - logContent += fmt.Sprintf(",模型 %s", modelName) - } - if strings.HasPrefix(logModel, "gpt-4o-gizmo") { - logModel = "gpt-4o-gizmo-*" - logContent += fmt.Sprintf(",模型 %s", modelName) - } if extraContent != "" { logContent += ", " + extraContent } - other := GenerateWssOtherInfo(ctx, relayInfo, usage, modelRatio, groupRatio, completionRatio, modelPrice) + other := GenerateWssOtherInfo(ctx, relayInfo, usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice) model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, usage.InputTokens, usage.OutputTokens, logModel, tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, other) } + +func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, + usage *dto.Usage, ratio float64, preConsumedQuota int, userQuota int, modelRatio float64, + groupRatio float64, + modelPrice float64, usePrice bool, extraContent string) { + + useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix() + textInputTokens := usage.PromptTokensDetails.TextTokens + textOutTokens := usage.CompletionTokenDetails.TextTokens + + audioInputTokens := usage.PromptTokensDetails.AudioTokens + audioOutTokens := usage.CompletionTokenDetails.AudioTokens + + tokenName := ctx.GetString("token_name") + completionRatio := common.GetCompletionRatio(relayInfo.UpstreamModelName) + audioRatio := common.GetAudioRatio(relayInfo.UpstreamModelName) + audioCompletionRatio := common.GetAudioCompletionRatio(relayInfo.UpstreamModelName) + + quota := 0 + if !usePrice { + quota = int(math.Round(float64(textInputTokens) + float64(textOutTokens)*completionRatio)) + quota += int(math.Round(float64(audioInputTokens)*audioRatio + float64(audioOutTokens)*audioRatio*audioCompletionRatio)) + quota = int(math.Round(float64(quota) * ratio)) + if ratio != 0 && quota <= 0 { + quota = 1 + } + } else { + quota = int(modelPrice * common.QuotaPerUnit * groupRatio) + } + totalTokens := usage.TotalTokens + var logContent string + if !usePrice { + logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,音频倍率 %.2f,音频补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, audioRatio, audioCompletionRatio, groupRatio) + } else { + logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio) + } + + // record all the consume log even if quota is 0 + if totalTokens == 0 { + // in this case, must be some error happened + // we cannot just return, because we may have to return the pre-consumed quota + quota = 0 + logContent += fmt.Sprintf("(可能是上游超时)") + common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+ + "tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.UpstreamModelName, preConsumedQuota)) + } else { + quotaDelta := quota - preConsumedQuota + if quotaDelta != 0 { + err := model.PostConsumeTokenQuota(relayInfo, userQuota, quotaDelta, preConsumedQuota, true) + if err != nil { + common.LogError(ctx, "error consuming token remain quota: "+err.Error()) + } + } + err := model.CacheUpdateUserQuota(relayInfo.UserId) + if err != nil { + common.LogError(ctx, "error update user quota cache: "+err.Error()) + } + model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota) + model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota) + } + + logModel := relayInfo.UpstreamModelName + if extraContent != "" { + logContent += ", " + extraContent + } + other := GenerateAudioOtherInfo(ctx, relayInfo, usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice) + model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, usage.PromptTokens, usage.CompletionTokens, logModel, + tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, other) +} diff --git a/web/src/components/LogsTable.js b/web/src/components/LogsTable.js index 93fa4f1..c88d73c 100644 --- a/web/src/components/LogsTable.js +++ b/web/src/components/LogsTable.js @@ -24,10 +24,11 @@ import { } from '@douyinfe/semi-ui'; import { ITEMS_PER_PAGE } from '../constants'; import { + renderAudioModelPrice, renderModelPrice, renderNumber, renderQuota, - stringToColor, + stringToColor } from '../helpers/render'; import Paragraph from '@douyinfe/semi-ui/lib/es/typography/paragraph'; import { getLogOther } from '../helpers/other.js'; @@ -537,7 +538,7 @@ const LogsTable = () => { // value: content, // }) } - if (other?.ws) { + if (other?.ws || other?.audio) { expandDataLocal.push({ key: '语音输入', value: other.audio_input, @@ -560,14 +561,30 @@ const LogsTable = () => { value: logs[i].content, }) if (logs[i].type === 2) { - let content = renderModelPrice( + let content = ''; + if (other?.ws || other?.audio) { + content = renderAudioModelPrice( + other.text_input, + other.text_output, + other.model_ratio, + other.model_price, + other.completion_ratio, + other.audio_input, + other.audio_output, + other?.audio_ratio, + other?.audio_completion_ratio, + other.group_ratio, + ); + } else { + content = renderModelPrice( logs[i].prompt_tokens, logs[i].completion_tokens, other.model_ratio, other.model_price, other.completion_ratio, other.group_ratio, - ); + ); + } expandDataLocal.push({ key: '计费过程', value: content, diff --git a/web/src/helpers/render.js b/web/src/helpers/render.js index 7e93967..d89f3f1 100644 --- a/web/src/helpers/render.js +++ b/web/src/helpers/render.js @@ -173,6 +173,59 @@ export function renderModelPrice( } } +export function renderAudioModelPrice( + inputTokens, + completionTokens, + modelRatio, + modelPrice = -1, + completionRatio, + audioInputTokens, + audioCompletionTokens, + audioRatio, + audioCompletionRatio, + groupRatio, +) { + // 1 ratio = $0.002 / 1K tokens + if (modelPrice !== -1) { + return '模型价格:$' + modelPrice + ' * 分组倍率:' + groupRatio + ' = $' + modelPrice * groupRatio; + } else { + if (completionRatio === undefined) { + completionRatio = 0; + } + // 这里的 *2 是因为 1倍率=0.002刀,请勿删除 + let inputRatioPrice = modelRatio * 2.0; + let completionRatioPrice = modelRatio * 2.0 * completionRatio; + let price = + (inputTokens / 1000000) * inputRatioPrice * groupRatio + + (completionTokens / 1000000) * completionRatioPrice * groupRatio + + (audioInputTokens / 1000000) * inputRatioPrice * audioRatio * groupRatio + + (audioCompletionTokens / 1000000) * inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio; + return ( + <> +
+

提示:${inputRatioPrice} * {groupRatio} = ${inputRatioPrice * groupRatio} / 1M tokens

+

补全:${completionRatioPrice} * {groupRatio} = ${completionRatioPrice * groupRatio} / 1M tokens

+

音频提示:${inputRatioPrice} * {groupRatio} * {audioRatio} = ${inputRatioPrice * audioRatio * groupRatio} / 1M tokens

+

音频补全:${inputRatioPrice} * {groupRatio} * {audioRatio} * {audioCompletionRatio} = ${inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio} / 1M tokens

+

+

+ 提示 {inputTokens} tokens / 1M tokens * ${inputRatioPrice} + 补全{' '} + {completionTokens} tokens / 1M tokens * ${completionRatioPrice} + +

+

+ 音频提示 {audioInputTokens} tokens / 1M tokens * ${inputRatioPrice} * {audioRatio} + 音频补全 {audioCompletionTokens} tokens / 1M tokens * ${inputRatioPrice} * {audioRatio} * {audioCompletionRatio} +

+

+ (文字 + 音频) * 分组 {groupRatio} = + ${price.toFixed(6)} +

+

仅供参考,以实际扣费为准

+
+ + ); + } +} + export function renderQuotaWithPrompt(quota, digits) { let displayInCurrency = localStorage.getItem('display_in_currency'); displayInCurrency = displayInCurrency === 'true';