fix: enhance token usage calculations and improve logging in OpenAI handler

This commit is contained in:
Laisky.Cai
2025-01-14 13:37:00 +00:00
parent 2fc6caaae5
commit ca9aaaf07d
2 changed files with 21 additions and 13 deletions

View File

@@ -118,8 +118,10 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
StatusCode: resp.StatusCode,
}, nil
}
// Reset response body
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
logger.Debugf(c.Request.Context(), "handler response: %s", string(responseBody))
// We shouldn't set the header before we parse the response body, because the parse part may fail.
// And then we will have to send an error response, but in this case, the header has already been set.
@@ -148,19 +150,21 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
CompletionTokens: completionTokens,
TotalTokens: promptTokens + completionTokens,
}
} else {
// Convert the more expensive audio tokens to uniformly priced text tokens
textResponse.Usage.PromptTokens = textResponse.CompletionTokensDetails.TextTokens +
} else if textResponse.PromptTokensDetails.AudioTokens+textResponse.CompletionTokensDetails.AudioTokens > 0 {
// Convert the more expensive audio tokens to uniformly priced text tokens.
// Note that when there are no audio tokens in prompt and completion,
// OpenAI will return empty PromptTokensDetails and CompletionTokensDetails, which can be misleading.
textResponse.Usage.PromptTokens = textResponse.PromptTokensDetails.TextTokens +
int(math.Ceil(
float64(textResponse.CompletionTokensDetails.AudioTokens)*
float64(textResponse.PromptTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName),
))
textResponse.Usage.CompletionTokens = textResponse.CompletionTokensDetails.TextTokens +
int(math.Ceil(
float64(textResponse.CompletionTokensDetails.AudioTokens)*
ratio.GetAudioPromptRatio(modelName)*
ratio.GetAudioCompletionRatio(modelName),
ratio.GetAudioPromptRatio(modelName)*ratio.GetAudioCompletionRatio(modelName),
))
textResponse.Usage.TotalTokens = textResponse.Usage.PromptTokens +
textResponse.Usage.CompletionTokens
}