From 6708eed8a0d00d9783b5eb02f384f7b1f90f190e Mon Sep 17 00:00:00 2001
From: "Laisky.Cai" <github@laisky.com>
Date: Thu, 13 Mar 2025 09:33:03 +0000
Subject: [PATCH] fix: refactor cost calculation logic for web-search tools and
 improve quota handling

---
 relay/adaptor/openai/adaptor.go | 76 +++++++++++++++++----------------
 relay/controller/helper.go      | 18 ++++++--
 2 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/relay/adaptor/openai/adaptor.go b/relay/adaptor/openai/adaptor.go
index 0e955f0a..9be0d353 100644
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -199,44 +199,46 @@ func (a *Adaptor) DoResponse(c *gin.Context,
 	// -------------------------------------
 	// calculate web-search tool cost
 	// -------------------------------------
-	searchContextSize := "medium"
-	var req *model.GeneralOpenAIRequest
-	if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
-		if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
-			if req != nil &&
-				req.WebSearchOptions != nil &&
-				req.WebSearchOptions.SearchContextSize != nil {
-				searchContextSize = *req.WebSearchOptions.SearchContextSize
-			}
-
-			switch {
-			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
-				switch searchContextSize {
-				case "low":
-					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
-				case "medium":
-					usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
-				case "high":
-					usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
-				default:
-					return nil, ErrorWrapper(
-						errors.Errorf("invalid search context size %q", searchContextSize),
-						"invalid search context size: "+searchContextSize,
-						http.StatusBadRequest)
+	if usage != nil {
+		searchContextSize := "medium"
+		var req *model.GeneralOpenAIRequest
+		if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
+			if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
+				if req != nil &&
+					req.WebSearchOptions != nil &&
+					req.WebSearchOptions.SearchContextSize != nil {
+					searchContextSize = *req.WebSearchOptions.SearchContextSize
 				}
-			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
-				switch searchContextSize {
-				case "low":
-					usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
-				case "medium":
-					usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
-				case "high":
-					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
-				default:
-					return nil, ErrorWrapper(
-						errors.Errorf("invalid search context size %q", searchContextSize),
-						"invalid search context size: "+searchContextSize,
-						http.StatusBadRequest)
+
+				switch {
+				case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
+					switch searchContextSize {
+					case "low":
+						usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
+					case "medium":
+						usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
+					case "high":
+						usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
+					default:
+						return nil, ErrorWrapper(
+							errors.Errorf("invalid search context size %q", searchContextSize),
+							"invalid search context size: "+searchContextSize,
+							http.StatusBadRequest)
+					}
+				case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
+					switch searchContextSize {
+					case "low":
+						usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
+					case "medium":
+						usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
+					case "high":
+						usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
+					default:
+						return nil, ErrorWrapper(
+							errors.Errorf("invalid search context size %q", searchContextSize),
+							"invalid search context size: "+searchContextSize,
+							http.StatusBadRequest)
+					}
 				}
 			}
 		}
diff --git a/relay/controller/helper.go b/relay/controller/helper.go
index f623193f..3db90fd6 100644
--- a/relay/controller/helper.go
+++ b/relay/controller/helper.go
@@ -92,29 +92,37 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR
 	return preConsumedQuota, nil
 }
 
-func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) {
+func postConsumeQuota(ctx context.Context,
+	usage *relaymodel.Usage,
+	meta *meta.Meta,
+	textRequest *relaymodel.GeneralOpenAIRequest,
+	ratio float64,
+	preConsumedQuota int64,
+	modelRatio float64,
+	groupRatio float64,
+	systemPromptReset bool) (quota int64) {
 	if usage == nil {
 		logger.Error(ctx, "usage is nil, which is unexpected")
 		return
 	}
-	var quota int64
 	completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType)
 	promptTokens := usage.PromptTokens
 	// It appears that DeepSeek's official service automatically merges ReasoningTokens into CompletionTokens,
 	// but the behavior of third-party providers may differ, so for now we do not add them manually.
 	// completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens
 	completionTokens := usage.CompletionTokens
-	quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
+	quota = int64(math.Ceil((float64(promptTokens)+float64(completionTokens)*completionRatio)*ratio)) + usage.ToolsCost
 	if ratio != 0 && quota <= 0 {
 		quota = 1
 	}
+
 	totalTokens := promptTokens + completionTokens
 	if totalTokens == 0 {
 		// in this case, must be some error happened
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
 	}
-	quotaDelta := quota - preConsumedQuota + usage.ToolsCost
+	quotaDelta := quota - preConsumedQuota
 	err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
 	if err != nil {
 		logger.Error(ctx, "error consuming token remain quota: "+err.Error())
@@ -145,6 +153,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
 	})
 	model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
 	model.UpdateChannelUsedQuota(meta.ChannelId, quota)
+
+	return quota
 }
 
 func getMappedModelName(modelName string, mapping map[string]string) (string, bool) {