mirror of
				https://github.com/songquanpeng/one-api.git
				synced 2025-11-04 07:43:41 +08:00 
			
		
		
		
	perf: use a goroutine to handle quota post consumption (#364)
This commit is contained in:
		@@ -305,51 +305,54 @@ func relayTextHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var textResponse TextResponse
 | 
			
		||||
	tokenName := c.GetString("token_name")
 | 
			
		||||
	channelId := c.GetInt("channel_id")
 | 
			
		||||
 | 
			
		||||
	defer func() {
 | 
			
		||||
		c.Writer.Flush()
 | 
			
		||||
		if consumeQuota {
 | 
			
		||||
			quota := 0
 | 
			
		||||
			completionRatio := 1.0
 | 
			
		||||
			if strings.HasPrefix(textRequest.Model, "gpt-3.5") {
 | 
			
		||||
				completionRatio = 1.333333
 | 
			
		||||
			}
 | 
			
		||||
			if strings.HasPrefix(textRequest.Model, "gpt-4") {
 | 
			
		||||
				completionRatio = 2
 | 
			
		||||
			}
 | 
			
		||||
		// c.Writer.Flush()
 | 
			
		||||
		go func() {
 | 
			
		||||
			if consumeQuota {
 | 
			
		||||
				quota := 0
 | 
			
		||||
				completionRatio := 1.0
 | 
			
		||||
				if strings.HasPrefix(textRequest.Model, "gpt-3.5") {
 | 
			
		||||
					completionRatio = 1.333333
 | 
			
		||||
				}
 | 
			
		||||
				if strings.HasPrefix(textRequest.Model, "gpt-4") {
 | 
			
		||||
					completionRatio = 2
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
			promptTokens = textResponse.Usage.PromptTokens
 | 
			
		||||
			completionTokens = textResponse.Usage.CompletionTokens
 | 
			
		||||
				promptTokens = textResponse.Usage.PromptTokens
 | 
			
		||||
				completionTokens = textResponse.Usage.CompletionTokens
 | 
			
		||||
 | 
			
		||||
			quota = promptTokens + int(float64(completionTokens)*completionRatio)
 | 
			
		||||
			quota = int(float64(quota) * ratio)
 | 
			
		||||
			if ratio != 0 && quota <= 0 {
 | 
			
		||||
				quota = 1
 | 
			
		||||
				quota = promptTokens + int(float64(completionTokens)*completionRatio)
 | 
			
		||||
				quota = int(float64(quota) * ratio)
 | 
			
		||||
				if ratio != 0 && quota <= 0 {
 | 
			
		||||
					quota = 1
 | 
			
		||||
				}
 | 
			
		||||
				totalTokens := promptTokens + completionTokens
 | 
			
		||||
				if totalTokens == 0 {
 | 
			
		||||
					// in this case, must be some error happened
 | 
			
		||||
					// we cannot just return, because we may have to return the pre-consumed quota
 | 
			
		||||
					quota = 0
 | 
			
		||||
				}
 | 
			
		||||
				quotaDelta := quota - preConsumedQuota
 | 
			
		||||
				err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					common.SysError("error consuming token remain quota: " + err.Error())
 | 
			
		||||
				}
 | 
			
		||||
				err = model.CacheUpdateUserQuota(userId)
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					common.SysError("error update user quota cache: " + err.Error())
 | 
			
		||||
				}
 | 
			
		||||
				if quota != 0 {
 | 
			
		||||
					logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio)
 | 
			
		||||
					model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
 | 
			
		||||
					model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
 | 
			
		||||
 | 
			
		||||
					model.UpdateChannelUsedQuota(channelId, quota)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			totalTokens := promptTokens + completionTokens
 | 
			
		||||
			if totalTokens == 0 {
 | 
			
		||||
				// in this case, must be some error happened
 | 
			
		||||
				// we cannot just return, because we may have to return the pre-consumed quota
 | 
			
		||||
				quota = 0
 | 
			
		||||
			}
 | 
			
		||||
			quotaDelta := quota - preConsumedQuota
 | 
			
		||||
			err := model.PostConsumeTokenQuota(tokenId, quotaDelta)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				common.SysError("error consuming token remain quota: " + err.Error())
 | 
			
		||||
			}
 | 
			
		||||
			err = model.CacheUpdateUserQuota(userId)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				common.SysError("error update user quota cache: " + err.Error())
 | 
			
		||||
			}
 | 
			
		||||
			if quota != 0 {
 | 
			
		||||
				tokenName := c.GetString("token_name")
 | 
			
		||||
				logContent := fmt.Sprintf("模型倍率 %.2f,分组倍率 %.2f", modelRatio, groupRatio)
 | 
			
		||||
				model.RecordConsumeLog(userId, promptTokens, completionTokens, textRequest.Model, tokenName, quota, logContent)
 | 
			
		||||
				model.UpdateUserUsedQuotaAndRequestCount(userId, quota)
 | 
			
		||||
				channelId := c.GetInt("channel_id")
 | 
			
		||||
				model.UpdateChannelUsedQuota(channelId, quota)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		}()
 | 
			
		||||
	}()
 | 
			
		||||
	switch apiType {
 | 
			
		||||
	case APITypeOpenAI:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user