From 6708eed8a0d00d9783b5eb02f384f7b1f90f190e Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Thu, 13 Mar 2025 09:33:03 +0000 Subject: [PATCH] fix: refactor cost calculation logic for web-search tools and improve quota handling --- relay/adaptor/openai/adaptor.go | 76 +++++++++++++++++---------------- relay/controller/helper.go | 18 ++++++-- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index 0e955f0a..9be0d353 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -199,44 +199,46 @@ func (a *Adaptor) DoResponse(c *gin.Context, // ------------------------------------- // calculate web-search tool cost // ------------------------------------- - searchContextSize := "medium" - var req *model.GeneralOpenAIRequest - if vi, ok := c.Get(ctxkey.ConvertedRequest); ok { - if req, ok = vi.(*model.GeneralOpenAIRequest); ok { - if req != nil && - req.WebSearchOptions != nil && - req.WebSearchOptions.SearchContextSize != nil { - searchContextSize = *req.WebSearchOptions.SearchContextSize - } - - switch { - case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"): - switch searchContextSize { - case "low": - usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) - case "medium": - usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd)) - case "high": - usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd)) - default: - return nil, ErrorWrapper( - errors.Errorf("invalid search context size %q", searchContextSize), - "invalid search context size: "+searchContextSize, - http.StatusBadRequest) + if usage != nil { + searchContextSize := "medium" + var req *model.GeneralOpenAIRequest + if vi, ok := c.Get(ctxkey.ConvertedRequest); ok { + if req, ok = vi.(*model.GeneralOpenAIRequest); ok { + if req != nil && + req.WebSearchOptions != nil && + req.WebSearchOptions.SearchContextSize != nil { + searchContextSize = *req.WebSearchOptions.SearchContextSize } - case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"): - switch searchContextSize { - case "low": - usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd)) - case "medium": - usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd)) - case "high": - usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) - default: - return nil, ErrorWrapper( - errors.Errorf("invalid search context size %q", searchContextSize), - "invalid search context size: "+searchContextSize, - http.StatusBadRequest) + + switch { + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) + case "medium": + usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd)) + case "high": + usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd)) + case "medium": + usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd)) + case "high": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } } } } diff --git a/relay/controller/helper.go b/relay/controller/helper.go index f623193f..3db90fd6 100644 --- a/relay/controller/helper.go +++ b/relay/controller/helper.go @@ -92,29 +92,37 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR return preConsumedQuota, nil } -func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) { +func postConsumeQuota(ctx context.Context, + usage *relaymodel.Usage, + meta *meta.Meta, + textRequest *relaymodel.GeneralOpenAIRequest, + ratio float64, + preConsumedQuota int64, + modelRatio float64, + groupRatio float64, + systemPromptReset bool) (quota int64) { if usage == nil { logger.Error(ctx, "usage is nil, which is unexpected") return } - var quota int64 completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType) promptTokens := usage.PromptTokens // It appears that DeepSeek's official service automatically merges ReasoningTokens into CompletionTokens, // but the behavior of third-party providers may differ, so for now we do not add them manually. // completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens completionTokens := usage.CompletionTokens - quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio)) + quota = int64(math.Ceil((float64(promptTokens)+float64(completionTokens)*completionRatio)*ratio)) + usage.ToolsCost if ratio != 0 && quota <= 0 { quota = 1 } + totalTokens := promptTokens + completionTokens if totalTokens == 0 { // in this case, must be some error happened // we cannot just return, because we may have to return the pre-consumed quota quota = 0 } - quotaDelta := quota - preConsumedQuota + usage.ToolsCost + quotaDelta := quota - preConsumedQuota err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta) if err != nil { logger.Error(ctx, "error consuming token remain quota: "+err.Error()) @@ -145,6 +153,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M }) model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota) model.UpdateChannelUsedQuota(meta.ChannelId, quota) + + return quota } func getMappedModelName(modelName string, mapping map[string]string) (string, bool) {