diff --git a/model/ability.go b/model/ability.go index bc11a689..550d64fe 100644 --- a/model/ability.go +++ b/model/ability.go @@ -44,7 +44,7 @@ func GetRandomSatisfiedChannel(group string, model string, ignoreFirstPriority b err = channelQuery.Order("RAND()").First(&ability).Error } if err != nil { - return nil, err + return nil, errors.Wrap(err, "get random satisfied channel") } channel := Channel{} channel.Id = ability.ChannelId diff --git a/relay/adaptor/gemini/constants.go b/relay/adaptor/gemini/constants.go index d220b25f..a2a6bcbe 100644 --- a/relay/adaptor/gemini/constants.go +++ b/relay/adaptor/gemini/constants.go @@ -19,6 +19,9 @@ var ModelsSupportSystemInstruction = []string{ // "gemini-1.5-pro-experimental", "gemini-2.0-flash", "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-pro-exp-02-05", } // IsModelSupportSystemInstruction check if the model support system instruction. diff --git a/relay/adaptor/gemini/main.go b/relay/adaptor/gemini/main.go index ca6f1c16..91ffaf82 100644 --- a/relay/adaptor/gemini/main.go +++ b/relay/adaptor/gemini/main.go @@ -62,6 +62,9 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest { Temperature: textRequest.Temperature, TopP: textRequest.TopP, MaxOutputTokens: textRequest.MaxTokens, + ResponseModalities: []string{ + "TEXT", "IMAGE", + }, }, } if textRequest.ResponseFormat != nil { @@ -256,19 +259,52 @@ func responseGeminiChat2OpenAI(response *ChatResponse) *openai.TextResponse { if candidate.Content.Parts[0].FunctionCall != nil { choice.Message.ToolCalls = getToolCalls(&candidate) } else { + // Handle text and image content var builder strings.Builder + var contentItems []model.MessageContent + for _, part := range candidate.Content.Parts { - if i > 0 { - builder.WriteString("\n") + if part.Text != "" { + // For text parts + if i > 0 { + builder.WriteString("\n") + } + builder.WriteString(part.Text) + + // Add to content items + contentItems = append(contentItems, model.MessageContent{ + Type: model.ContentTypeText, + Text: part.Text, + }) + } + + if part.InlineData != nil && part.InlineData.MimeType != "" && part.InlineData.Data != "" { + // For inline image data + imageURL := &model.ImageURL{ + // The data is already base64 encoded + Url: fmt.Sprintf("data:%s;base64,%s", part.InlineData.MimeType, part.InlineData.Data), + } + + contentItems = append(contentItems, model.MessageContent{ + Type: model.ContentTypeImageURL, + ImageURL: imageURL, + }) } - builder.WriteString(part.Text) } - choice.Message.Content = builder.String() + + // If we have multiple content types, use structured content format + if len(contentItems) > 1 || (len(contentItems) == 1 && contentItems[0].Type != model.ContentTypeText) { + choice.Message.Content = contentItems + } else { + // Otherwise use the simple string content format + choice.Message.Content = builder.String() + } } } else { choice.Message.Content = "" choice.FinishReason = candidate.FinishReason } + fullTextResponse.Choices = append(fullTextResponse.Choices, choice) } return &fullTextResponse @@ -359,6 +395,7 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st if err != nil { return openai.ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil } + err = resp.Body.Close() if err != nil { return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil diff --git a/relay/adaptor/gemini/model.go b/relay/adaptor/gemini/model.go index cc668197..6ca21e65 100644 --- a/relay/adaptor/gemini/model.go +++ b/relay/adaptor/gemini/model.go @@ -6,6 +6,19 @@ type ChatRequest struct { GenerationConfig ChatGenerationConfig `json:"generation_config,omitempty"` Tools []ChatTools `json:"tools,omitempty"` SystemInstruction *ChatContent `json:"system_instruction,omitempty"` + ModelVersion string `json:"model_version,omitempty"` + UsageMetadata *UsageMetadata `json:"usage_metadata,omitempty"` +} + +type UsageMetadata struct { + PromptTokenCount int `json:"promptTokenCount,omitempty"` + TotalTokenCount int `json:"totalTokenCount,omitempty"` + PromptTokensDetails []PromptTokensDetails `json:"promptTokensDetails,omitempty"` +} + +type PromptTokensDetails struct { + Modality string `json:"modality,omitempty"` + TokenCount int `json:"tokenCount,omitempty"` } type EmbeddingRequest struct { @@ -66,14 +79,15 @@ type ChatTools struct { } type ChatGenerationConfig struct { - ResponseMimeType string `json:"responseMimeType,omitempty"` - ResponseSchema any `json:"responseSchema,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"topP,omitempty"` - TopK float64 `json:"topK,omitempty"` - MaxOutputTokens int `json:"maxOutputTokens,omitempty"` - CandidateCount int `json:"candidateCount,omitempty"` - StopSequences []string `json:"stopSequences,omitempty"` + ResponseMimeType string `json:"responseMimeType,omitempty"` + ResponseSchema any `json:"responseSchema,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"topP,omitempty"` + TopK float64 `json:"topK,omitempty"` + MaxOutputTokens int `json:"maxOutputTokens,omitempty"` + CandidateCount int `json:"candidateCount,omitempty"` + StopSequences []string `json:"stopSequences,omitempty"` + ResponseModalities []string `json:"responseModalities,omitempty"` } type FunctionCallingConfig struct { diff --git a/relay/adaptor/geminiv2/constants.go b/relay/adaptor/geminiv2/constants.go index 4274cc39..dbf75ced 100644 --- a/relay/adaptor/geminiv2/constants.go +++ b/relay/adaptor/geminiv2/constants.go @@ -12,4 +12,5 @@ var ModelList = []string{ "gemini-2.0-flash-lite", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05", + "gemini-2.0-flash-exp-image-generation", } diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go index bfb9af5d..b5063587 100644 --- a/relay/billing/ratio/model.go +++ b/relay/billing/ratio/model.go @@ -147,26 +147,27 @@ var ModelRatio = map[string]float64{ // "gemma-2-2b-it": 0, // "gemma-2-9b-it": 0, // "gemma-2-27b-it": 0, - "gemini-pro": 0.25 * MilliTokensUsd, // $0.00025 / 1k characters -> $0.001 / 1k tokens - "gemini-1.0-pro": 0.125 * MilliTokensUsd, - "gemini-1.0-pro-vision": 0.125 * MilliTokensUsd, - "gemini-1.5-pro": 1.25 * MilliTokensUsd, - "gemini-1.5-pro-001": 1.25 * MilliTokensUsd, - "gemini-1.5-pro-002": 1.25 * MilliTokensUsd, - "gemini-1.5-pro-experimental": 1.25 * MilliTokensUsd, - "gemini-1.5-flash": 0.075 * MilliTokensUsd, - "gemini-1.5-flash-001": 0.075 * MilliTokensUsd, - "gemini-1.5-flash-002": 0.075 * MilliTokensUsd, - "gemini-1.5-flash-8b": 0.0375 * MilliTokensUsd, - "gemini-2.0-flash": 0.15 * MilliTokensUsd, - "gemini-2.0-flash-exp": 0.075 * MilliTokensUsd, - "gemini-2.0-flash-001": 0.15 * MilliTokensUsd, - "gemini-2.0-flash-lite": 0.075 * MilliTokensUsd, - "gemini-2.0-flash-lite-001": 0.075 * MilliTokensUsd, - "gemini-2.0-flash-lite-preview-02-05": 0.075 * MilliTokensUsd, - "gemini-2.0-flash-thinking-exp-01-21": 0.075 * MilliTokensUsd, - "gemini-2.0-pro-exp-02-05": 1.25 * MilliTokensUsd, - "aqa": 1, + "gemini-pro": 0.25 * MilliTokensUsd, // $0.00025 / 1k characters -> $0.001 / 1k tokens + "gemini-1.0-pro": 0.125 * MilliTokensUsd, + "gemini-1.0-pro-vision": 0.125 * MilliTokensUsd, + "gemini-1.5-pro": 1.25 * MilliTokensUsd, + "gemini-1.5-pro-001": 1.25 * MilliTokensUsd, + "gemini-1.5-pro-002": 1.25 * MilliTokensUsd, + "gemini-1.5-pro-experimental": 1.25 * MilliTokensUsd, + "gemini-1.5-flash": 0.075 * MilliTokensUsd, + "gemini-1.5-flash-001": 0.075 * MilliTokensUsd, + "gemini-1.5-flash-002": 0.075 * MilliTokensUsd, + "gemini-1.5-flash-8b": 0.0375 * MilliTokensUsd, + "gemini-2.0-flash": 0.15 * MilliTokensUsd, + "gemini-2.0-flash-exp": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-001": 0.15 * MilliTokensUsd, + "gemini-2.0-flash-lite": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-lite-001": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-lite-preview-02-05": 0.075 * MilliTokensUsd, + "gemini-2.0-flash-thinking-exp-01-21": 0.075 * MilliTokensUsd, + "gemini-2.0-pro-exp-02-05": 1.25 * MilliTokensUsd, + "gemini-2.0-flash-exp-image-generation": 0.075 * MilliTokensUsd, + "aqa": 1, // https://open.bigmodel.cn/pricing "glm-zero-preview": 0.01 * KiloRmb, "glm-4-plus": 0.05 * KiloRmb,