From ca9aaaf07dec21a4537671b0a55e5d2ad57985e9 Mon Sep 17 00:00:00 2001
From: "Laisky.Cai" <github@laisky.com>
Date: Tue, 14 Jan 2025 13:37:00 +0000
Subject: [PATCH] fix: enhance token usage calculations and improve logging in
 OpenAI handler

---
 relay/adaptor/openai/main.go | 16 ++++++++++------
 relay/model/misc.go          | 18 +++++++++++-------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/relay/adaptor/openai/main.go b/relay/adaptor/openai/main.go
index 095a6adb..f986ed09 100644
--- a/relay/adaptor/openai/main.go
+++ b/relay/adaptor/openai/main.go
@@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
 			StatusCode: resp.StatusCode,
 		}, nil
 	}
+
 	// Reset response body
 	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
+	logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody))
 
 	// We shouldn't set the header before we parse the response body, because the parse part may fail.
 	// And then we will have to send an error response, but in this case, the header has already been set.
@@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
 			CompletionTokens: completionTokens,
 			TotalTokens:      promptTokens + completionTokens,
 		}
-	} else {
-		// Convert the more expensive audio tokens to uniformly priced text tokens
-		textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens +
+	} else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 {
+		// Convert the more expensive audio tokens to uniformly priced text tokens.
+		// Note that when there are no audio tokens in prompt and completion,
+		// OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading.
+		textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens +
 			int(math.Ceil(
-				float64(textResponse.CompletionTokensDetails.AudioTokens)*
+				float64(textResponse.PromptTokensDetails.AudioTokens)*
 					ratio.GetAudioPromptRatio(modelName),
 			))
 		textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
 			int(math.Ceil(
 				float64(textResponse.CompletionTokensDetails.AudioTokens)*
-					ratio.GetAudioPromptRatio(modelName)*
-					ratio.GetAudioCompletionRatio(modelName),
+					ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName),
 			))
+
 		textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
 			textResponse.Usage.CompletionTokens
 	}
diff --git a/relay/model/misc.go b/relay/model/misc.go
index ff3f061d..62c3fe6f 100644
--- a/relay/model/misc.go
+++ b/relay/model/misc.go
@@ -1,10 +1,12 @@
 package model
 
 type Usage struct {
-	PromptTokens            int                          `json:"prompt_tokens"`
-	CompletionTokens        int                          `json:"completion_tokens"`
-	TotalTokens             int                          `json:"total_tokens"`
-	PromptTokensDetails     usagePromptTokensDetails     `gorm:"-" json:"prompt_tokens_details,omitempty"`
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+	// PromptTokensDetails may be empty for some models
+	PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
+	// CompletionTokensDetails may be empty for some models
 	CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
 	ServiceTier             string                       `gorm:"-" json:"service_tier,omitempty"`
 	SystemFingerprint       string                       `gorm:"-" json:"system_fingerprint,omitempty"`
@@ -25,8 +27,9 @@ type ErrorWithStatusCode struct {
 type usagePromptTokensDetails struct {
 	CachedTokens int `json:"cached_tokens"`
 	AudioTokens  int `json:"audio_tokens"`
-	TextTokens   int `json:"text_tokens"`
-	ImageTokens  int `json:"image_tokens"`
+	// TextTokens could be zero for pure text chats
+	TextTokens  int `json:"text_tokens"`
+	ImageTokens int `json:"image_tokens"`
 }
 
 type usageCompletionTokensDetails struct {
@@ -34,5 +37,6 @@ type usageCompletionTokensDetails struct {
 	AudioTokens              int `json:"audio_tokens"`
 	AcceptedPredictionTokens int `json:"accepted_prediction_tokens"`
 	RejectedPredictionTokens int `json:"rejected_prediction_tokens"`
-	TextTokens               int `json:"text_tokens"`
+	// TextTokens could be zero for pure text chats
+	TextTokens int `json:"text_tokens"`
 }