From 2f34798aa9558253cec816e3f540efb56705925a Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Thu, 13 Mar 2025 09:33:03 +0000 Subject: [PATCH] fix: refactor cost calculation logic for web-search tools and improve quota handling --- relay/adaptor/openai/adaptor.go | 76 +++++++++++++++++---------------- relay/controller/helper.go | 15 +++++-- 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go index aa0de7aa..b6efde3d 100644 --- a/relay/adaptor/openai/adaptor.go +++ b/relay/adaptor/openai/adaptor.go @@ -208,44 +208,46 @@ func (a *Adaptor) DoResponse(c *gin.Context, // ------------------------------------- // calculate web-search tool cost // ------------------------------------- - searchContextSize := "medium" - var req *model.GeneralOpenAIRequest - if vi, ok := c.Get(ctxkey.ConvertedRequest); ok { - if req, ok = vi.(*model.GeneralOpenAIRequest); ok { - if req != nil && - req.WebSearchOptions != nil && - req.WebSearchOptions.SearchContextSize != nil { - searchContextSize = *req.WebSearchOptions.SearchContextSize - } - - switch { - case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"): - switch searchContextSize { - case "low": - usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) - case "medium": - usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd)) - case "high": - usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd)) - default: - return nil, ErrorWrapper( - errors.Errorf("invalid search context size %q", searchContextSize), - "invalid search context size: "+searchContextSize, - http.StatusBadRequest) + if usage != nil { + searchContextSize := "medium" + var req *model.GeneralOpenAIRequest + if vi, ok := c.Get(ctxkey.ConvertedRequest); ok { + if req, ok = vi.(*model.GeneralOpenAIRequest); ok { + if req != nil && + req.WebSearchOptions != nil && + req.WebSearchOptions.SearchContextSize != nil { + searchContextSize = *req.WebSearchOptions.SearchContextSize } - case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"): - switch searchContextSize { - case "low": - usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd)) - case "medium": - usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd)) - case "high": - usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) - default: - return nil, ErrorWrapper( - errors.Errorf("invalid search context size %q", searchContextSize), - "invalid search context size: "+searchContextSize, - http.StatusBadRequest) + + switch { + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) + case "medium": + usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd)) + case "high": + usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } + case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"): + switch searchContextSize { + case "low": + usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd)) + case "medium": + usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd)) + case "high": + usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd)) + default: + return nil, ErrorWrapper( + errors.Errorf("invalid search context size %q", searchContextSize), + "invalid search context size: "+searchContextSize, + http.StatusBadRequest) + } } } } diff --git a/relay/controller/helper.go b/relay/controller/helper.go index 1b368998..737b3446 100644 --- a/relay/controller/helper.go +++ b/relay/controller/helper.go @@ -96,7 +96,15 @@ func preConsumeQuota(c *gin.Context, textRequest *relaymodel.GeneralOpenAIReques return preConsumedQuota, nil } -func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) (quota int64) { +func postConsumeQuota(ctx context.Context, + usage *relaymodel.Usage, + meta *meta.Meta, + textRequest *relaymodel.GeneralOpenAIRequest, + ratio float64, + preConsumedQuota int64, + modelRatio float64, + groupRatio float64, + systemPromptReset bool) (quota int64) { if usage == nil { logger.Error(ctx, "usage is nil, which is unexpected") return @@ -108,17 +116,18 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M // but the behavior of third-party providers may differ, so for now we do not add them manually. // completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens completionTokens := usage.CompletionTokens - quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio)) + quota = int64(math.Ceil((float64(promptTokens)+float64(completionTokens)*completionRatio)*ratio)) + usage.ToolsCost if ratio != 0 && quota <= 0 { quota = 1 } + totalTokens := promptTokens + completionTokens if totalTokens == 0 { // in this case, must be some error happened // we cannot just return, because we may have to return the pre-consumed quota quota = 0 } - quotaDelta := quota - preConsumedQuota + usage.ToolsCost + quotaDelta := quota - preConsumedQuota err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta) if err != nil { logger.Error(ctx, "error consuming token remain quota: "+err.Error())