From ca9aaaf07dec21a4537671b0a55e5d2ad57985e9 Mon Sep 17 00:00:00 2001 From: "Laisky.Cai" Date: Tue, 14 Jan 2025 13:37:00 +0000 Subject: [PATCH] fix: enhance token usage calculations and improve logging in OpenAI handler --- relay/adaptor/openai/main.go | 16 ++++++++++------ relay/model/misc.go | 18 +++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/relay/adaptor/openai/main.go b/relay/adaptor/openai/main.go index 095a6adb..f986ed09 100644 --- a/relay/adaptor/openai/main.go +++ b/relay/adaptor/openai/main.go @@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st StatusCode: resp.StatusCode, }, nil } + // Reset response body resp.Body = io.NopCloser(bytes.NewBuffer(responseBody)) + logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody)) // We shouldn't set the header before we parse the response body, because the parse part may fail. // And then we will have to send an error response, but in this case, the header has already been set. @@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st CompletionTokens: completionTokens, TotalTokens: promptTokens + completionTokens, } - } else { - // Convert the more expensive audio tokens to uniformly priced text tokens - textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens + + } else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 { + // Convert the more expensive audio tokens to uniformly priced text tokens. + // Note that when there are no audio tokens in prompt and completion, + // OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading. + textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens + int(math.Ceil( - float64(textResponse.CompletionTokensDetails.AudioTokens)* + float64(textResponse.PromptTokensDetails.AudioTokens)* ratio.GetAudioPromptRatio(modelName), )) textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens + int(math.Ceil( float64(textResponse.CompletionTokensDetails.AudioTokens)* - ratio.GetAudioPromptRatio(modelName)* - ratio.GetAudioCompletionRatio(modelName), + ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName), )) + textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens + textResponse.Usage.CompletionTokens } diff --git a/relay/model/misc.go b/relay/model/misc.go index ff3f061d..62c3fe6f 100644 --- a/relay/model/misc.go +++ b/relay/model/misc.go @@ -1,10 +1,12 @@ package model type Usage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` - PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"` + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + // PromptTokensDetails may be empty for some models + PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"` + // CompletionTokensDetails may be empty for some models CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"` ServiceTier string `gorm:"-" json:"service_tier,omitempty"` SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"` @@ -25,8 +27,9 @@ type ErrorWithStatusCode struct { type usagePromptTokensDetails struct { CachedTokens int `json:"cached_tokens"` AudioTokens int `json:"audio_tokens"` - TextTokens int `json:"text_tokens"` - ImageTokens int `json:"image_tokens"` + // TextTokens could be zero for pure text chats + TextTokens int `json:"text_tokens"` + ImageTokens int `json:"image_tokens"` } type usageCompletionTokensDetails struct { @@ -34,5 +37,6 @@ type usageCompletionTokensDetails struct { AudioTokens int `json:"audio_tokens"` AcceptedPredictionTokens int `json:"accepted_prediction_tokens"` RejectedPredictionTokens int `json:"rejected_prediction_tokens"` - TextTokens int `json:"text_tokens"` + // TextTokens could be zero for pure text chats + TextTokens int `json:"text_tokens"` }