feat: enhance Gemini API to support image response modalities and update model ratios

This commit is contained in:
Laisky.Cai
2025-03-17 01:22:33 +00:00
parent c893672635
commit 34c7523f01
4 changed files with 66 additions and 13 deletions

View File

@@ -7,6 +7,7 @@ import (
"gorm.io/gorm"
"github.com/pkg/errors"
"github.com/songquanpeng/one-api/common"
"github.com/songquanpeng/one-api/common/utils"
)
@@ -42,7 +43,7 @@ func GetRandomSatisfiedChannel(group string, model string, ignoreFirstPriority b
err = channelQuery.Order("RAND()").First(&ability).Error
}
if err != nil {
return nil, err
return nil, errors.Wrap(err, "get random satisfied channel")
}
channel := Channel{}
channel.Id = ability.ChannelId

View File

@@ -64,6 +64,9 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest {
Temperature: textRequest.Temperature,
TopP: textRequest.TopP,
MaxOutputTokens: textRequest.MaxTokens,
ResponseModalities: []string{
"TEXT", "IMAGE",
},
},
}
if textRequest.ResponseFormat != nil {
@@ -258,19 +261,52 @@ func responseGeminiChat2OpenAI(response *ChatResponse) *openai.TextResponse {
if candidate.Content.Parts[0].FunctionCall != nil {
choice.Message.ToolCalls = getToolCalls(&candidate)
} else {
// Handle text and image content
var builder strings.Builder
var contentItems []model.MessageContent
for _, part := range candidate.Content.Parts {
if i > 0 {
builder.WriteString("\n")
if part.Text != "" {
// For text parts
if i > 0 {
builder.WriteString("\n")
}
builder.WriteString(part.Text)
// Add to content items
contentItems = append(contentItems, model.MessageContent{
Type: model.ContentTypeText,
Text: part.Text,
})
}
if part.InlineData != nil && part.InlineData.MimeType != "" && part.InlineData.Data != "" {
// For inline image data
imageURL := &model.ImageURL{
// The data is already base64 encoded
Url: fmt.Sprintf("data:%s;base64,%s", part.InlineData.MimeType, part.InlineData.Data),
}
contentItems = append(contentItems, model.MessageContent{
Type: model.ContentTypeImageURL,
ImageURL: imageURL,
})
}
builder.WriteString(part.Text)
}
choice.Message.Content = builder.String()
// If we have multiple content types, use structured content format
if len(contentItems) > 1 || (len(contentItems) == 1 && contentItems[0].Type != model.ContentTypeText) {
choice.Message.Content = contentItems
} else {
// Otherwise use the simple string content format
choice.Message.Content = builder.String()
}
}
} else {
choice.Message.Content = ""
choice.FinishReason = candidate.FinishReason
}
fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
}
return &fullTextResponse
@@ -361,6 +397,7 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
if err != nil {
return openai.ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
}
err = resp.Body.Close()
if err != nil {
return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil

View File

@@ -6,6 +6,19 @@ type ChatRequest struct {
GenerationConfig ChatGenerationConfig `json:"generation_config,omitempty"`
Tools []ChatTools `json:"tools,omitempty"`
SystemInstruction *ChatContent `json:"system_instruction,omitempty"`
ModelVersion string `json:"model_version,omitempty"`
UsageMetadata *UsageMetadata `json:"usage_metadata,omitempty"`
}
type UsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount,omitempty"`
TotalTokenCount int `json:"totalTokenCount,omitempty"`
PromptTokensDetails []PromptTokensDetails `json:"promptTokensDetails,omitempty"`
}
type PromptTokensDetails struct {
Modality string `json:"modality,omitempty"`
TokenCount int `json:"tokenCount,omitempty"`
}
type EmbeddingRequest struct {
@@ -66,12 +79,13 @@ type ChatTools struct {
}
type ChatGenerationConfig struct {
ResponseMimeType string `json:"responseMimeType,omitempty"`
ResponseSchema any `json:"responseSchema,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"topP,omitempty"`
TopK float64 `json:"topK,omitempty"`
MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
CandidateCount int `json:"candidateCount,omitempty"`
StopSequences []string `json:"stopSequences,omitempty"`
ResponseMimeType string `json:"responseMimeType,omitempty"`
ResponseSchema any `json:"responseSchema,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"topP,omitempty"`
TopK float64 `json:"topK,omitempty"`
MaxOutputTokens int `json:"maxOutputTokens,omitempty"`
CandidateCount int `json:"candidateCount,omitempty"`
StopSequences []string `json:"stopSequences,omitempty"`
ResponseModalities []string `json:"responseModalities,omitempty"`
}

View File

@@ -12,4 +12,5 @@ var ModelList = []string{
"gemini-2.0-flash-lite-preview-02-05",
"gemini-2.0-flash-thinking-exp-01-21",
"gemini-2.0-pro-exp-02-05",
"gemini-2.0-flash-exp-image-generation",
}