mirror of
https://github.com/songquanpeng/one-api.git
synced 2025-11-11 10:53:42 +08:00
fix: enhance token usage calculations and improve logging in OpenAI handler
This commit is contained in:
@@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
|
||||
StatusCode: resp.StatusCode,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Reset response body
|
||||
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
|
||||
logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody))
|
||||
|
||||
// We shouldn't set the header before we parse the response body, because the parse part may fail.
|
||||
// And then we will have to send an error response, but in this case, the header has already been set.
|
||||
@@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
|
||||
CompletionTokens: completionTokens,
|
||||
TotalTokens: promptTokens + completionTokens,
|
||||
}
|
||||
} else {
|
||||
// Convert the more expensive audio tokens to uniformly priced text tokens
|
||||
textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens +
|
||||
} else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 {
|
||||
// Convert the more expensive audio tokens to uniformly priced text tokens.
|
||||
// Note that when there are no audio tokens in prompt and completion,
|
||||
// OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading.
|
||||
textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens +
|
||||
int(math.Ceil(
|
||||
float64(textResponse.CompletionTokensDetails.AudioTokens)*
|
||||
float64(textResponse.PromptTokensDetails.AudioTokens)*
|
||||
ratio.GetAudioPromptRatio(modelName),
|
||||
))
|
||||
textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
|
||||
int(math.Ceil(
|
||||
float64(textResponse.CompletionTokensDetails.AudioTokens)*
|
||||
ratio.GetAudioPromptRatio(modelName)*
|
||||
ratio.GetAudioCompletionRatio(modelName),
|
||||
ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName),
|
||||
))
|
||||
|
||||
textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
|
||||
textResponse.Usage.CompletionTokens
|
||||
}
|
||||
|
||||
@@ -2,9 +2,11 @@ package controller
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/pkg/errors"
|
||||
@@ -91,18 +93,23 @@ func RelayTextHelper(c *gin.Context) *relaymodel.ErrorWithStatusCode {
|
||||
}
|
||||
|
||||
// post-consume quota
|
||||
quotaId := c.GetInt(ctxkey.Id)
|
||||
requestId := c.GetString(ctxkey.RequestId)
|
||||
go func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
quota := postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio, systemPromptReset)
|
||||
|
||||
// also update user request cost
|
||||
if quota != 0 {
|
||||
docu := model.NewUserRequestCost(
|
||||
c.GetInt(ctxkey.Id),
|
||||
c.GetString(ctxkey.RequestId),
|
||||
quotaId,
|
||||
requestId,
|
||||
quota,
|
||||
)
|
||||
if err = docu.Insert(); err != nil {
|
||||
logger.Errorf(c, "insert user request cost failed: %+v", err)
|
||||
logger.Errorf(ctx, "insert user request cost failed: %+v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
package model
|
||||
|
||||
type Usage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
// PromptTokensDetails may be empty for some models
|
||||
PromptTokensDetails usagePromptTokensDetails `gorm:"-" json:"prompt_tokens_details,omitempty"`
|
||||
// CompletionTokensDetails may be empty for some models
|
||||
CompletionTokensDetails usageCompletionTokensDetails `gorm:"-" json:"completion_tokens_details,omitempty"`
|
||||
ServiceTier string `gorm:"-" json:"service_tier,omitempty"`
|
||||
SystemFingerprint string `gorm:"-" json:"system_fingerprint,omitempty"`
|
||||
@@ -25,8 +27,9 @@ type ErrorWithStatusCode struct {
|
||||
type usagePromptTokensDetails struct {
|
||||
CachedTokens int `json:"cached_tokens"`
|
||||
AudioTokens int `json:"audio_tokens"`
|
||||
TextTokens int `json:"text_tokens"`
|
||||
ImageTokens int `json:"image_tokens"`
|
||||
// TextTokens could be zero for pure text chats
|
||||
TextTokens int `json:"text_tokens"`
|
||||
ImageTokens int `json:"image_tokens"`
|
||||
}
|
||||
|
||||
type usageCompletionTokensDetails struct {
|
||||
@@ -34,5 +37,6 @@ type usageCompletionTokensDetails struct {
|
||||
AudioTokens int `json:"audio_tokens"`
|
||||
AcceptedPredictionTokens int `json:"accepted_prediction_tokens"`
|
||||
RejectedPredictionTokens int `json:"rejected_prediction_tokens"`
|
||||
TextTokens int `json:"text_tokens"`
|
||||
// TextTokens could be zero for pure text chats
|
||||
TextTokens int `json:"text_tokens"`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user