fix: enhance token usage calculations and improve logging in OpenAI handler

This commit is contained in:
Laisky.Cai
2025-01-14 13:37:00 +00:00
parent 40be30e36f
commit bdb695895f
3 changed files with 31 additions and 16 deletions

View File

@@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
StatusCode: resp.StatusCode, StatusCode: resp.StatusCode,
}, nil }, nil
} }
// Reset response body // Reset response body
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody)) resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody))
// We shouldn't set the header before we parse the response body, because the parse part may fail. // We shouldn't set the header before we parse the response body, because the parse part may fail.
// And then we will have to send an error response, but in this case, the header has already been set. // And then we will have to send an error response, but in this case, the header has already been set.
@@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
CompletionTokens: completionTokens, CompletionTokens: completionTokens,
TotalTokens: promptTokens + completionTokens, TotalTokens: promptTokens + completionTokens,
} }
} else { } else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 {
// Convert the more expensive audio tokens to uniformly priced text tokens // Convert the more expensive audio tokens to uniformly priced text tokens.
textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens + // Note that when there are no audio tokens in prompt and completion,
// OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading.
textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens +
int(math.Ceil( int(math.Ceil(
float64(textResponse.CompletionTokensDetails.AudioTokens)* float64(textResponse.PromptTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName), ratio.GetAudioPromptRatio(modelName),
)) ))
textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens + textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
int(math.Ceil( int(math.Ceil(
float64(textResponse.CompletionTokensDetails.AudioTokens)* float64(textResponse.CompletionTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName)* ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName),
ratio.GetAudioCompletionRatio(modelName),
)) ))
textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens + textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
textResponse.Usage.CompletionTokens textResponse.Usage.CompletionTokens
} }

View File

@@ -2,9 +2,11 @@ package controller
import ( import (
"bytes" "bytes"
"context"
"encoding/json" "encoding/json"
"io" "io"
"net/http" "net/http"
"time"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/pkg/errors" "github.com/pkg/errors"
@@ -91,18 +93,23 @@ func RelayTextHelper(c *gin.Context) *relaymodel.ErrorWithStatusCode {
} }
// post-consume quota // post-consume quota
quotaId := c.GetInt(ctxkey.Id)
requestId := c.GetString(ctxkey.RequestId)
go func() { go func() {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
quota := postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset) quota := postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset)
// also update user request cost // also update user request cost
if quota != 0 { if quota != 0 {
docu := model.NewUserRequestCost( docu := model.NewUserRequestCost(
c.GetInt(ctxkey.Id), quotaId,
c.GetString(ctxkey.RequestId), requestId,
quota, quota,
) )
if err = docu.Insert(); err != nil { if err = docu.Insert(); err != nil {
logger.Errorf(c, "insert user request cost failed: %+v", err) logger.Errorf(ctx, "insert user request cost failed: %+v", err)
} }
} }
}() }()

View File

@@ -4,7 +4,9 @@ type Usage struct {
PromptTokens int `json:"prompt_tokens"` PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"` CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"` TotalTokens int `json:"total_tokens"`
// PromptTokensDetails may be empty for some models
PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"` PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
// CompletionTokensDetails may be empty for some models
CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"` CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
ServiceTier string `gorm:"-" json:"service_tier,omitempty"` ServiceTier string `gorm:"-" json:"service_tier,omitempty"`
SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"` SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"`
@@ -25,6 +27,7 @@ type ErrorWithStatusCode struct {
type usagePromptTokensDetails struct { type usagePromptTokensDetails struct {
CachedTokens int `json:"cached_tokens"` CachedTokens int `json:"cached_tokens"`
AudioTokens int `json:"audio_tokens"` AudioTokens int `json:"audio_tokens"`
// TextTokens could be zero for pure text chats
TextTokens int `json:"text_tokens"` TextTokens int `json:"text_tokens"`
ImageTokens int `json:"image_tokens"` ImageTokens int `json:"image_tokens"`
} }
@@ -34,5 +37,6 @@ type usageCompletionTokensDetails struct {
AudioTokens int `json:"audio_tokens"` AudioTokens int `json:"audio_tokens"`
AcceptedPredictionTokens int `json:"accepted_prediction_tokens"` AcceptedPredictionTokens int `json:"accepted_prediction_tokens"`
RejectedPredictionTokens int `json:"rejected_prediction_tokens"` RejectedPredictionTokens int `json:"rejected_prediction_tokens"`
// TextTokens could be zero for pure text chats
TextTokens int `json:"text_tokens"` TextTokens int `json:"text_tokens"`
} }