From bdb695895f03d530a5a1c380345e54b9ac344033 Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Tue, 14 Jan 2025 13:37:00 +0000 Subject: [PATCH] fix: enhance token usage calculations and improve logging in OpenAI handler --- relay/adaptor/openai/main.go | 16 ++++++++++------ relay/controller/text.go | 13 ++++++++++--- relay/model/misc.go | 18 +++++++++++------- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/relay/adaptor/openai/main.go b/relay/adaptor/openai/main.go index 095a6adb..f986ed09 100644 --- a/relay/adaptor/openai/main.go +++ b/relay/adaptor/openai/main.go @@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st StatusCode: resp.StatusCode, }, nil } + // Reset response body resp.Body = io.NopCloser(bytes.NewBuffer(responseBody)) + logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody)) // We shouldn't set the header before we parse the response body, because the parse part may fail. // And then we will have to send an error response, but in this case, the header has already been set. @@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st CompletionTokens: completionTokens, TotalTokens: promptTokens + completionTokens, } - } else { - // Convert the more expensive audio tokens to uniformly priced text tokens - textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens + + } else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 { + // Convert the more expensive audio tokens to uniformly priced text tokens. + // Note that when there are no audio tokens in prompt and completion, + // OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading. + textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens + int(math.Ceil( - float64(textResponse.CompletionTokensDetails.AudioTokens)* + float64(textResponse.PromptTokensDetails.AudioTokens)* ratio.GetAudioPromptRatio(modelName), )) textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens + int(math.Ceil( float64(textResponse.CompletionTokensDetails.AudioTokens)* - ratio.GetAudioPromptRatio(modelName)* - ratio.GetAudioCompletionRatio(modelName), + ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName), )) + textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens + textResponse.Usage.CompletionTokens } diff --git a/relay/controller/text.go b/relay/controller/text.go index f3120664..0dc06019 100644 --- a/relay/controller/text.go +++ b/relay/controller/text.go @@ -2,9 +2,11 @@ package controller import ( "bytes" + "context" "encoding/json" "io" "net/http" + "time" "github.com/gin-gonic/gin" "github.com/pkg/errors" @@ -91,18 +93,23 @@ func RelayTextHelper(c *gin.Context) *relaymodel.ErrorWithStatusCode { } // post-consume quota + quotaId := c.GetInt(ctxkey.Id) + requestId := c.GetString(ctxkey.RequestId) go func() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + quota := postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset) // also update user request cost if quota != 0 { docu := model.NewUserRequestCost( - c.GetInt(ctxkey.Id), - c.GetString(ctxkey.RequestId), + quotaId, + requestId, quota, ) if err = docu.Insert(); err != nil { - logger.Errorf(c, "insert user request cost failed: %+v", err) + logger.Errorf(ctx, "insert user request cost failed: %+v", err) } } }() diff --git a/relay/model/misc.go b/relay/model/misc.go index ff3f061d..62c3fe6f 100644 --- a/relay/model/misc.go +++ b/relay/model/misc.go @@ -1,10 +1,12 @@ package model type Usage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` - PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"` + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + // PromptTokensDetails may be empty for some models + PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"` + // CompletionTokensDetails may be empty for some models CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"` ServiceTier string `gorm:"-" json:"service_tier,omitempty"` SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"` @@ -25,8 +27,9 @@ type ErrorWithStatusCode struct { type usagePromptTokensDetails struct { CachedTokens int `json:"cached_tokens"` AudioTokens int `json:"audio_tokens"` - TextTokens int `json:"text_tokens"` - ImageTokens int `json:"image_tokens"` + // TextTokens could be zero for pure text chats + TextTokens int `json:"text_tokens"` + ImageTokens int `json:"image_tokens"` } type usageCompletionTokensDetails struct { @@ -34,5 +37,6 @@ type usageCompletionTokensDetails struct { AudioTokens int `json:"audio_tokens"` AcceptedPredictionTokens int `json:"accepted_prediction_tokens"` RejectedPredictionTokens int `json:"rejected_prediction_tokens"` - TextTokens int `json:"text_tokens"` + // TextTokens could be zero for pure text chats + TextTokens int `json:"text_tokens"` }