mirror of
https://github.com/songquanpeng/one-api.git
synced 2025-09-17 17:16:38 +08:00
fix: refactor cost calculation logic for web-search tools and improve quota handling
This commit is contained in:
parent
ad63c9e66f
commit
6708eed8a0
@ -199,44 +199,46 @@ func (a *Adaptor) DoResponse(c *gin.Context,
|
|||||||
// -------------------------------------
|
// -------------------------------------
|
||||||
// calculate web-search tool cost
|
// calculate web-search tool cost
|
||||||
// -------------------------------------
|
// -------------------------------------
|
||||||
searchContextSize := "medium"
|
if usage != nil {
|
||||||
var req *model.GeneralOpenAIRequest
|
searchContextSize := "medium"
|
||||||
if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
|
var req *model.GeneralOpenAIRequest
|
||||||
if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
|
if vi, ok := c.Get(ctxkey.ConvertedRequest); ok {
|
||||||
if req != nil &&
|
if req, ok = vi.(*model.GeneralOpenAIRequest); ok {
|
||||||
req.WebSearchOptions != nil &&
|
if req != nil &&
|
||||||
req.WebSearchOptions.SearchContextSize != nil {
|
req.WebSearchOptions != nil &&
|
||||||
searchContextSize = *req.WebSearchOptions.SearchContextSize
|
req.WebSearchOptions.SearchContextSize != nil {
|
||||||
}
|
searchContextSize = *req.WebSearchOptions.SearchContextSize
|
||||||
|
|
||||||
switch {
|
|
||||||
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
|
|
||||||
switch searchContextSize {
|
|
||||||
case "low":
|
|
||||||
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
|
|
||||||
case "medium":
|
|
||||||
usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
|
|
||||||
case "high":
|
|
||||||
usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
|
|
||||||
default:
|
|
||||||
return nil, ErrorWrapper(
|
|
||||||
errors.Errorf("invalid search context size %q", searchContextSize),
|
|
||||||
"invalid search context size: "+searchContextSize,
|
|
||||||
http.StatusBadRequest)
|
|
||||||
}
|
}
|
||||||
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
|
|
||||||
switch searchContextSize {
|
switch {
|
||||||
case "low":
|
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-search"):
|
||||||
usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
|
switch searchContextSize {
|
||||||
case "medium":
|
case "low":
|
||||||
usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
|
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
|
||||||
case "high":
|
case "medium":
|
||||||
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
|
usage.ToolsCost += int64(math.Ceil(35 / 1000 * ratio.QuotaPerUsd))
|
||||||
default:
|
case "high":
|
||||||
return nil, ErrorWrapper(
|
usage.ToolsCost += int64(math.Ceil(40 / 1000 * ratio.QuotaPerUsd))
|
||||||
errors.Errorf("invalid search context size %q", searchContextSize),
|
default:
|
||||||
"invalid search context size: "+searchContextSize,
|
return nil, ErrorWrapper(
|
||||||
http.StatusBadRequest)
|
errors.Errorf("invalid search context size %q", searchContextSize),
|
||||||
|
"invalid search context size: "+searchContextSize,
|
||||||
|
http.StatusBadRequest)
|
||||||
|
}
|
||||||
|
case strings.HasPrefix(meta.ActualModelName, "gpt-4o-mini-search"):
|
||||||
|
switch searchContextSize {
|
||||||
|
case "low":
|
||||||
|
usage.ToolsCost += int64(math.Ceil(25 / 1000 * ratio.QuotaPerUsd))
|
||||||
|
case "medium":
|
||||||
|
usage.ToolsCost += int64(math.Ceil(27.5 / 1000 * ratio.QuotaPerUsd))
|
||||||
|
case "high":
|
||||||
|
usage.ToolsCost += int64(math.Ceil(30 / 1000 * ratio.QuotaPerUsd))
|
||||||
|
default:
|
||||||
|
return nil, ErrorWrapper(
|
||||||
|
errors.Errorf("invalid search context size %q", searchContextSize),
|
||||||
|
"invalid search context size: "+searchContextSize,
|
||||||
|
http.StatusBadRequest)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -92,29 +92,37 @@ func preConsumeQuota(ctx context.Context, textRequest *relaymodel.GeneralOpenAIR
|
|||||||
return preConsumedQuota, nil
|
return preConsumedQuota, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.Meta, textRequest *relaymodel.GeneralOpenAIRequest, ratio float64, preConsumedQuota int64, modelRatio float64, groupRatio float64, systemPromptReset bool) {
|
func postConsumeQuota(ctx context.Context,
|
||||||
|
usage *relaymodel.Usage,
|
||||||
|
meta *meta.Meta,
|
||||||
|
textRequest *relaymodel.GeneralOpenAIRequest,
|
||||||
|
ratio float64,
|
||||||
|
preConsumedQuota int64,
|
||||||
|
modelRatio float64,
|
||||||
|
groupRatio float64,
|
||||||
|
systemPromptReset bool) (quota int64) {
|
||||||
if usage == nil {
|
if usage == nil {
|
||||||
logger.Error(ctx, "usage is nil, which is unexpected")
|
logger.Error(ctx, "usage is nil, which is unexpected")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
var quota int64
|
|
||||||
completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType)
|
completionRatio := billingratio.GetCompletionRatio(textRequest.Model, meta.ChannelType)
|
||||||
promptTokens := usage.PromptTokens
|
promptTokens := usage.PromptTokens
|
||||||
// It appears that DeepSeek's official service automatically merges ReasoningTokens into CompletionTokens,
|
// It appears that DeepSeek's official service automatically merges ReasoningTokens into CompletionTokens,
|
||||||
// but the behavior of third-party providers may differ, so for now we do not add them manually.
|
// but the behavior of third-party providers may differ, so for now we do not add them manually.
|
||||||
// completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens
|
// completionTokens := usage.CompletionTokens + usage.CompletionTokensDetails.ReasoningTokens
|
||||||
completionTokens := usage.CompletionTokens
|
completionTokens := usage.CompletionTokens
|
||||||
quota = int64(math.Ceil((float64(promptTokens) + float64(completionTokens)*completionRatio) * ratio))
|
quota = int64(math.Ceil((float64(promptTokens)+float64(completionTokens)*completionRatio)*ratio)) + usage.ToolsCost
|
||||||
if ratio != 0 && quota <= 0 {
|
if ratio != 0 && quota <= 0 {
|
||||||
quota = 1
|
quota = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
totalTokens := promptTokens + completionTokens
|
totalTokens := promptTokens + completionTokens
|
||||||
if totalTokens == 0 {
|
if totalTokens == 0 {
|
||||||
// in this case, must be some error happened
|
// in this case, must be some error happened
|
||||||
// we cannot just return, because we may have to return the pre-consumed quota
|
// we cannot just return, because we may have to return the pre-consumed quota
|
||||||
quota = 0
|
quota = 0
|
||||||
}
|
}
|
||||||
quotaDelta := quota - preConsumedQuota + usage.ToolsCost
|
quotaDelta := quota - preConsumedQuota
|
||||||
err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
|
err := model.PostConsumeTokenQuota(meta.TokenId, quotaDelta)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error(ctx, "error consuming token remain quota: "+err.Error())
|
logger.Error(ctx, "error consuming token remain quota: "+err.Error())
|
||||||
@ -145,6 +153,8 @@ func postConsumeQuota(ctx context.Context, usage *relaymodel.Usage, meta *meta.M
|
|||||||
})
|
})
|
||||||
model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
|
model.UpdateUserUsedQuotaAndRequestCount(meta.UserId, quota)
|
||||||
model.UpdateChannelUsedQuota(meta.ChannelId, quota)
|
model.UpdateChannelUsedQuota(meta.ChannelId, quota)
|
||||||
|
|
||||||
|
return quota
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMappedModelName(modelName string, mapping map[string]string) (string, bool) {
|
func getMappedModelName(modelName string, mapping map[string]string) (string, bool) {
|
||||||
|
Loading…
Reference in New Issue
Block a user