fix: enhance token usage calculations and improve logging in OpenAI handler

2025-11-12 03:13:41 +08:00 · 2025-01-14 13:37:00 +00:00
parent 40be30e36f
commit bdb695895f
3 changed files with 31 additions and 16 deletions
--- a/relay/adaptor/openai/main.go
+++ b/relay/adaptor/openai/main.go
@@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
 			StatusCode: resp.StatusCode,
 		}, nil
 	}
 	// Reset response body
 	resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
 	logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody))
 	// We shouldn't set the header before we parse the response body, because the parse part may fail.
 	// And then we will have to send an error response, but in this case, the header has already been set.
@@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
 			CompletionTokens: completionTokens,
 			TotalTokens:      promptTokens + completionTokens,
 		}
-	} else {
+	} else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 {
-		// Convert the more expensive audio tokens to uniformly priced text tokens
+		// Convert the more expensive audio tokens to uniformly priced text tokens.
-		textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens +
+		// Note that when there are no audio tokens in prompt and completion,
 		// OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading.
 		textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens +
 			int(math.Ceil(
-				float64(textResponse.CompletionTokensDetails.AudioTokens)*
+				float64(textResponse.PromptTokensDetails.AudioTokens)*
 					ratio.GetAudioPromptRatio(modelName),
 			))
 		textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
 			int(math.Ceil(
 				float64(textResponse.CompletionTokensDetails.AudioTokens)*
-					ratio.GetAudioPromptRatio(modelName)*
+					ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName),
 					ratio.GetAudioCompletionRatio(modelName),
 			))
 		textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
 			textResponse.Usage.CompletionTokens
 	}
--- a/relay/controller/text.go
+++ b/relay/controller/text.go
@@ -2,9 +2,11 @@ package controller
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"io"
 	"net/http"
 	"time"
 	"github.com/gin-gonic/gin"
 	"github.com/pkg/errors"
@@ -91,18 +93,23 @@ func RelayTextHelper(c *gin.Context) *relaymodel.ErrorWithStatusCode {
 	}
 	// post-consume quota
 	quotaId := c.GetInt(ctxkey.Id)
 	requestId := c.GetString(ctxkey.RequestId)
 	go func() {
 		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 		defer cancel()
 		quota := postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset)
 		// also update user request cost
 		if quota != 0 {
 			docu := model.NewUserRequestCost(
-				c.GetInt(ctxkey.Id),
+				quotaId,
-				c.GetString(ctxkey.RequestId),
+				requestId,
 				quota,
 			)
 			if err = docu.Insert(); err != nil {
-				logger.Errorf(c, "insert user request cost failed: %+v", err)
+				logger.Errorf(ctx, "insert user request cost failed: %+v", err)
 			}
 		}
 	}()
--- a/relay/model/misc.go
+++ b/relay/model/misc.go
@@ -4,7 +4,9 @@ type Usage struct {
 	PromptTokens     int `json:"prompt_tokens"`
 	CompletionTokens int `json:"completion_tokens"`
 	TotalTokens      int `json:"total_tokens"`
 	// PromptTokensDetails may be empty for some models
 	PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
 	// CompletionTokensDetails may be empty for some models
 	CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
 	ServiceTier             string                       `gorm:"-" json:"service_tier,omitempty"`
 	SystemFingerprint       string                       `gorm:"-" json:"system_fingerprint,omitempty"`
@@ -25,6 +27,7 @@ type ErrorWithStatusCode struct {
 type usagePromptTokensDetails struct {
 	CachedTokens int `json:"cached_tokens"`
 	AudioTokens  int `json:"audio_tokens"`
 	// TextTokens could be zero for pure text chats
 	TextTokens  int `json:"text_tokens"`
 	ImageTokens int `json:"image_tokens"`
 }
@@ -34,5 +37,6 @@ type usageCompletionTokensDetails struct {
 	AudioTokens              int `json:"audio_tokens"`
 	AcceptedPredictionTokens int `json:"accepted_prediction_tokens"`
 	RejectedPredictionTokens int `json:"rejected_prediction_tokens"`
 	// TextTokens could be zero for pure text chats
 	TextTokens int `json:"text_tokens"`
 }