fix: refactor cost calculation logic for web-search tools and improve quota handling

2025-12-27 02:05:56 +08:00 · 2025-03-13 09:33:03 +00:00
parent ad63c9e66f
commit 6708eed8a0
2 changed files with 53 additions and 41 deletions
--- a/relay/adaptor/openai/adaptor.go
+++ b/relay/adaptor/openai/adaptor.go
@@ -199,44 +199,46 @@ func (a *Adaptor) DoResponse(c *gin.Context,
 	// -------------------------------------
 	// calculate web-search tool cost
 	// -------------------------------------
-	searchContextSize := "medium"
+	if usage != nil {
-	var req *model.GeneralOpenAIRequest
+		searchContextSize := "medium"
-	if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
+		var req *model.GeneralOpenAIRequest
-		if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
+		if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
-			if req != nil &&
+			if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
-				req.WebSearchOptions != nil &&
+				if req != nil &&
-				req.WebSearchOptions.SearchContextSize != nil {
+					req.WebSearchOptions != nil &&
-				searchContextSize = *req.WebSearchOptions.SearchContextSize
+					req.WebSearchOptions.SearchContextSize != nil {
-			}
+					searchContextSize = *req.WebSearchOptions.SearchContextSize
 			switch {
 			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
 				switch searchContextSize {
 				case "low":
 					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
 				case "medium":
 					usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
 				case "high":
 					usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
 				default:
 					return nil, ErrorWrapper(
 						errors.Errorf("invalid search context size %q", searchContextSize),
 						"invalid search context size: "+searchContextSize,
 						http.StatusBadRequest)
 				}
-			case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
+
-				switch searchContextSize {
+				switch {
-				case "low":
+				case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
-					usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
+					switch searchContextSize {
-				case "medium":
+					case "low":
-					usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
+						usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
-				case "high":
+					case "medium":
-					usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
+						usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
-				default:
+					case "high":
-					return nil, ErrorWrapper(
+						usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
-						errors.Errorf("invalid search context size %q", searchContextSize),
+					default:
-						"invalid search context size: "+searchContextSize,
+						return nil, ErrorWrapper(
-						http.StatusBadRequest)
+							errors.Errorf("invalid search context size %q", searchContextSize),
 							"invalid search context size: "+searchContextSize,
 							http.StatusBadRequest)
 					}
 				case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
 					switch searchContextSize {
 					case "low":
 						usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
 					case "medium":
 						usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
 					case "high":
 						usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
 					default:
 						return nil, ErrorWrapper(
 							errors.Errorf("invalid search context size %q", searchContextSize),
 							"invalid search context size: "+searchContextSize,
 							http.StatusBadRequest)
 					}
 				}
 			}
 		}
--- a/relay/controller/helper.go
+++ b/relay/controller/helper.go
@@ -92,29 +92,37 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR
 	return preConsumedQuota, nil
 }
-func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) {
+func postConsumeQuota(ctx context.Context,
 	usage *relaymodel.Usage,
 	meta *meta.Meta,
 	textRequest *relaymodel.GeneralOpenAIRequest,
 	ratio float64,
 	preConsumedQuota int64,
 	modelRatio float64,
 	groupRatio float64,
 	systemPromptReset bool) (quota int64) {
 	if usage == nil {
 		logger.Error(ctx, "usage is nil, which is unexpected")
 		return
 	}
 	var quota int64
 	completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType)
 	promptTokens := usage.PromptTokens
 	// It appears that DeepSeek's official service automatically merges ReasoningTokens into CompletionTokens,
 	// but the behavior of third-party providers may differ, so for now we do not add them manually.
 	// completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens
 	completionTokens := usage.CompletionTokens
-	quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
+	quota = int64(math.Ceil((float64(promptTokens)+float64(completionTokens)*completionRatio)*ratio)) + usage.ToolsCost
 	if ratio != 0 && quota <= 0 {
 		quota = 1
 	}
 	totalTokens := promptTokens + completionTokens
 	if totalTokens == 0 {
 		// in this case, must be some error happened
 		// we cannot just return, because we may have to return the pre-consumed quota
 		quota = 0
 	}
-	quotaDelta := quota - preConsumedQuota + usage.ToolsCost
+	quotaDelta := quota - preConsumedQuota
 	err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
 	if err != nil {
 		logger.Error(ctx, "error consuming token remain quota: "+err.Error())
@@ -145,6 +153,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
 	})
 	model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
 	model.UpdateChannelUsedQuota(meta.ChannelId, quota)
 	return quota
 }
 func getMappedModelName(modelName string, mapping map[string]string) (string, bool) {