fix: refactor cost calculation logic for web-search tools and improve quota handling

This commit is contained in:
Laisky.Cai 2025-03-13 09:33:03 +00:00
parent ad63c9e66f
commit 6708eed8a0
2 changed files with 53 additions and 41 deletions

View File

@ -199,44 +199,46 @@ func (a *Adaptor) DoResponse(c *gin.Context,
// -------------------------------------
// calculate web-search tool cost
// -------------------------------------
searchContextSize := "medium"
var req *model.GeneralOpenAIRequest
if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
if req != nil &&
req.WebSearchOptions != nil &&
req.WebSearchOptions.SearchContextSize != nil {
searchContextSize = *req.WebSearchOptions.SearchContextSize
}
switch {
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
switch searchContextSize {
case "low":
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
case "medium":
usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
case "high":
usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
default:
return nil, ErrorWrapper(
errors.Errorf("invalid search context size %q", searchContextSize),
"invalid search context size: "+searchContextSize,
http.StatusBadRequest)
if usage != nil {
searchContextSize := "medium"
var req *model.GeneralOpenAIRequest
if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
if req != nil &&
req.WebSearchOptions != nil &&
req.WebSearchOptions.SearchContextSize != nil {
searchContextSize = *req.WebSearchOptions.SearchContextSize
}
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
switch searchContextSize {
case "low":
usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
case "medium":
usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
case "high":
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
default:
return nil, ErrorWrapper(
errors.Errorf("invalid search context size %q", searchContextSize),
"invalid search context size: "+searchContextSize,
http.StatusBadRequest)
switch {
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
switch searchContextSize {
case "low":
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
case "medium":
usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
case "high":
usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
default:
return nil, ErrorWrapper(
errors.Errorf("invalid search context size %q", searchContextSize),
"invalid search context size: "+searchContextSize,
http.StatusBadRequest)
}
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
switch searchContextSize {
case "low":
usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
case "medium":
usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
case "high":
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
default:
return nil, ErrorWrapper(
errors.Errorf("invalid search context size %q", searchContextSize),
"invalid search context size: "+searchContextSize,
http.StatusBadRequest)
}
}
}
}

View File

@ -92,29 +92,37 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR
return preConsumedQuota, nil
}
func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) {
func postConsumeQuota(ctx context.Context,
usage *relaymodel.Usage,
meta *meta.Meta,
textRequest *relaymodel.GeneralOpenAIRequest,
ratio float64,
preConsumedQuota int64,
modelRatio float64,
groupRatio float64,
systemPromptReset bool) (quota int64) {
if usage == nil {
logger.Error(ctx, "usage is nil, which is unexpected")
return
}
var quota int64
completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType)
promptTokens := usage.PromptTokens
// It appears that DeepSeek's official service automatically merges ReasoningTokens into CompletionTokens,
// but the behavior of third-party providers may differ, so for now we do not add them manually.
// completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens
completionTokens := usage.CompletionTokens
quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
quota = int64(math.Ceil((float64(promptTokens)+float64(completionTokens)*completionRatio)*ratio)) + usage.ToolsCost
if ratio != 0 && quota <= 0 {
quota = 1
}
totalTokens := promptTokens + completionTokens
if totalTokens == 0 {
// in this case, must be some error happened
// we cannot just return, because we may have to return the pre-consumed quota
quota = 0
}
quotaDelta := quota - preConsumedQuota + usage.ToolsCost
quotaDelta := quota - preConsumedQuota
err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
if err != nil {
logger.Error(ctx, "error consuming token remain quota: "+err.Error())
@ -145,6 +153,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
})
model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
model.UpdateChannelUsedQuota(meta.ChannelId, quota)
return quota
}
func getMappedModelName(modelName string, mapping map[string]string) (string, bool) {