From bdb695895f03d530a5a1c380345e54b9ac344033 Mon Sep 17 00:00:00 2001
From: "Laisky.Cai" <github@laisky.com>
Date: Tue, 14 Jan 2025 13:37:00 +0000
Subject: [PATCH] fix: enhance token usage calculations and improve logging in
 OpenAI handler

---
 relay/adaptor/openai/main.go | 16 ++++++++++------
 relay/controller/text.go     | 13 ++++++++++---
 relay/model/misc.go          | 18 +++++++++++-------
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/relay/adaptor/openai/main.go b/relay/adaptor/openai/main.go
index 095a6adb..f986ed09 100644
--- a/relay/adaptor/openai/main.go
+++ b/relay/adaptor/openai/main.go
@@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
 			StatusCode: resp.StatusCode,
 		}, nil
 	}
+
 	// Reset response body
 	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
+	logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody))
 
 	// We shouldn't set the header before we parse the response body, because the parse part may fail.
 	// And then we will have to send an error response, but in this case, the header has already been set.
@@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
 			CompletionTokens: completionTokens,
 			TotalTokens:      promptTokens + completionTokens,
 		}
-	} else {
-		// Convert the more expensive audio tokens to uniformly priced text tokens
-		textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens +
+	} else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 {
+		// Convert the more expensive audio tokens to uniformly priced text tokens.
+		// Note that when there are no audio tokens in prompt and completion,
+		// OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading.
+		textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens +
 			int(math.Ceil(
-				float64(textResponse.CompletionTokensDetails.AudioTokens)*
+				float64(textResponse.PromptTokensDetails.AudioTokens)*
 					ratio.GetAudioPromptRatio(modelName),
 			))
 		textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
 			int(math.Ceil(
 				float64(textResponse.CompletionTokensDetails.AudioTokens)*
-					ratio.GetAudioPromptRatio(modelName)*
-					ratio.GetAudioCompletionRatio(modelName),
+					ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName),
 			))
+
 		textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
 			textResponse.Usage.CompletionTokens
 	}
diff --git a/relay/controller/text.go b/relay/controller/text.go
index f3120664..0dc06019 100644
--- a/relay/controller/text.go
+++ b/relay/controller/text.go
@@ -2,9 +2,11 @@ package controller
 
 import (
 	"bytes"
+	"context"
 	"encoding/json"
 	"io"
 	"net/http"
+	"time"
 
 	"github.com/gin-gonic/gin"
 	"github.com/pkg/errors"
@@ -91,18 +93,23 @@ func RelayTextHelper(c *gin.Context) *relaymodel.ErrorWithStatusCode {
 	}
 
 	// post-consume quota
+	quotaId := c.GetInt(ctxkey.Id)
+	requestId := c.GetString(ctxkey.RequestId)
 	go func() {
+		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+		defer cancel()
+
 		quota := postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset)
 
 		// also update user request cost
 		if quota != 0 {
 			docu := model.NewUserRequestCost(
-				c.GetInt(ctxkey.Id),
-				c.GetString(ctxkey.RequestId),
+				quotaId,
+				requestId,
 				quota,
 			)
 			if err = docu.Insert(); err != nil {
-				logger.Errorf(c, "insert user request cost failed: %+v", err)
+				logger.Errorf(ctx, "insert user request cost failed: %+v", err)
 			}
 		}
 	}()
diff --git a/relay/model/misc.go b/relay/model/misc.go
index ff3f061d..62c3fe6f 100644
--- a/relay/model/misc.go
+++ b/relay/model/misc.go
@@ -1,10 +1,12 @@
 package model
 
 type Usage struct {
-	PromptTokens            int                          `json:"prompt_tokens"`
-	CompletionTokens        int                          `json:"completion_tokens"`
-	TotalTokens             int                          `json:"total_tokens"`
-	PromptTokensDetails     usagePromptTokensDetails     `gorm:"-" json:"prompt_tokens_details,omitempty"`
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+	// PromptTokensDetails may be empty for some models
+	PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
+	// CompletionTokensDetails may be empty for some models
 	CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
 	ServiceTier             string                       `gorm:"-" json:"service_tier,omitempty"`
 	SystemFingerprint       string                       `gorm:"-" json:"system_fingerprint,omitempty"`
@@ -25,8 +27,9 @@ type ErrorWithStatusCode struct {
 type usagePromptTokensDetails struct {
 	CachedTokens int `json:"cached_tokens"`
 	AudioTokens  int `json:"audio_tokens"`
-	TextTokens   int `json:"text_tokens"`
-	ImageTokens  int `json:"image_tokens"`
+	// TextTokens could be zero for pure text chats
+	TextTokens  int `json:"text_tokens"`
+	ImageTokens int `json:"image_tokens"`
 }
 
 type usageCompletionTokensDetails struct {
@@ -34,5 +37,6 @@ type usageCompletionTokensDetails struct {
 	AudioTokens              int `json:"audio_tokens"`
 	AcceptedPredictionTokens int `json:"accepted_prediction_tokens"`
 	RejectedPredictionTokens int `json:"rejected_prediction_tokens"`
-	TextTokens               int `json:"text_tokens"`
+	// TextTokens could be zero for pure text chats
+	TextTokens int `json:"text_tokens"`
 }