mirror of
				https://github.com/songquanpeng/one-api.git
				synced 2025-11-04 07:43:41 +08:00 
			
		
		
		
	feat: enhance Gemini API to support image response modalities and update model ratios
This commit is contained in:
		@@ -7,6 +7,7 @@ import (
 | 
			
		||||
 | 
			
		||||
	"gorm.io/gorm"
 | 
			
		||||
 | 
			
		||||
	"github.com/pkg/errors"
 | 
			
		||||
	"github.com/songquanpeng/one-api/common"
 | 
			
		||||
	"github.com/songquanpeng/one-api/common/utils"
 | 
			
		||||
)
 | 
			
		||||
@@ -42,7 +43,7 @@ func GetRandomSatisfiedChannel(group string, model string, ignoreFirstPriority b
 | 
			
		||||
		err = channelQuery.Order("RAND()").First(&ability).Error
 | 
			
		||||
	}
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
		return nil, errors.Wrap(err, "get random satisfied channel")
 | 
			
		||||
	}
 | 
			
		||||
	channel := Channel{}
 | 
			
		||||
	channel.Id = ability.ChannelId
 | 
			
		||||
 
 | 
			
		||||
@@ -64,6 +64,9 @@ func ConvertRequest(textRequest model.GeneralOpenAIRequest) *ChatRequest {
 | 
			
		||||
			Temperature:     textRequest.Temperature,
 | 
			
		||||
			TopP:            textRequest.TopP,
 | 
			
		||||
			MaxOutputTokens: textRequest.MaxTokens,
 | 
			
		||||
			ResponseModalities: []string{
 | 
			
		||||
				"TEXT", "IMAGE",
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
	if textRequest.ResponseFormat != nil {
 | 
			
		||||
@@ -258,19 +261,52 @@ func responseGeminiChat2OpenAI(response *ChatResponse) *openai.TextResponse {
 | 
			
		||||
			if candidate.Content.Parts[0].FunctionCall != nil {
 | 
			
		||||
				choice.Message.ToolCalls = getToolCalls(&candidate)
 | 
			
		||||
			} else {
 | 
			
		||||
				// Handle text and image content
 | 
			
		||||
				var builder strings.Builder
 | 
			
		||||
				var contentItems []model.MessageContent
 | 
			
		||||
 | 
			
		||||
				for _, part := range candidate.Content.Parts {
 | 
			
		||||
					if i > 0 {
 | 
			
		||||
						builder.WriteString("\n")
 | 
			
		||||
					if part.Text != "" {
 | 
			
		||||
						// For text parts
 | 
			
		||||
						if i > 0 {
 | 
			
		||||
							builder.WriteString("\n")
 | 
			
		||||
						}
 | 
			
		||||
						builder.WriteString(part.Text)
 | 
			
		||||
 | 
			
		||||
						// Add to content items
 | 
			
		||||
						contentItems = append(contentItems, model.MessageContent{
 | 
			
		||||
							Type: model.ContentTypeText,
 | 
			
		||||
							Text: part.Text,
 | 
			
		||||
						})
 | 
			
		||||
					}
 | 
			
		||||
 | 
			
		||||
					if part.InlineData != nil && part.InlineData.MimeType != "" && part.InlineData.Data != "" {
 | 
			
		||||
						// For inline image data
 | 
			
		||||
						imageURL := &model.ImageURL{
 | 
			
		||||
							// The data is already base64 encoded
 | 
			
		||||
							Url: fmt.Sprintf("data:%s;base64,%s", part.InlineData.MimeType, part.InlineData.Data),
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						contentItems = append(contentItems, model.MessageContent{
 | 
			
		||||
							Type:     model.ContentTypeImageURL,
 | 
			
		||||
							ImageURL: imageURL,
 | 
			
		||||
						})
 | 
			
		||||
					}
 | 
			
		||||
					builder.WriteString(part.Text)
 | 
			
		||||
				}
 | 
			
		||||
				choice.Message.Content = builder.String()
 | 
			
		||||
 | 
			
		||||
				// If we have multiple content types, use structured content format
 | 
			
		||||
				if len(contentItems) > 1 || (len(contentItems) == 1 && contentItems[0].Type != model.ContentTypeText) {
 | 
			
		||||
					choice.Message.Content = contentItems
 | 
			
		||||
				} else {
 | 
			
		||||
					// Otherwise use the simple string content format
 | 
			
		||||
					choice.Message.Content = builder.String()
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			choice.Message.Content = ""
 | 
			
		||||
			choice.FinishReason = candidate.FinishReason
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
 | 
			
		||||
	}
 | 
			
		||||
	return &fullTextResponse
 | 
			
		||||
@@ -361,6 +397,7 @@ func Handler(c *gin.Context, resp *http.Response, promptTokens int, modelName st
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return openai.ErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	err = resp.Body.Close()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return openai.ErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,19 @@ type ChatRequest struct {
 | 
			
		||||
	GenerationConfig  ChatGenerationConfig `json:"generation_config,omitempty"`
 | 
			
		||||
	Tools             []ChatTools          `json:"tools,omitempty"`
 | 
			
		||||
	SystemInstruction *ChatContent         `json:"system_instruction,omitempty"`
 | 
			
		||||
	ModelVersion      string               `json:"model_version,omitempty"`
 | 
			
		||||
	UsageMetadata     *UsageMetadata       `json:"usage_metadata,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type UsageMetadata struct {
 | 
			
		||||
	PromptTokenCount    int                   `json:"promptTokenCount,omitempty"`
 | 
			
		||||
	TotalTokenCount     int                   `json:"totalTokenCount,omitempty"`
 | 
			
		||||
	PromptTokensDetails []PromptTokensDetails `json:"promptTokensDetails,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type PromptTokensDetails struct {
 | 
			
		||||
	Modality   string `json:"modality,omitempty"`
 | 
			
		||||
	TokenCount int    `json:"tokenCount,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type EmbeddingRequest struct {
 | 
			
		||||
@@ -66,12 +79,13 @@ type ChatTools struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type ChatGenerationConfig struct {
 | 
			
		||||
	ResponseMimeType string   `json:"responseMimeType,omitempty"`
 | 
			
		||||
	ResponseSchema   any      `json:"responseSchema,omitempty"`
 | 
			
		||||
	Temperature      *float64 `json:"temperature,omitempty"`
 | 
			
		||||
	TopP             *float64 `json:"topP,omitempty"`
 | 
			
		||||
	TopK             float64  `json:"topK,omitempty"`
 | 
			
		||||
	MaxOutputTokens  int      `json:"maxOutputTokens,omitempty"`
 | 
			
		||||
	CandidateCount   int      `json:"candidateCount,omitempty"`
 | 
			
		||||
	StopSequences    []string `json:"stopSequences,omitempty"`
 | 
			
		||||
	ResponseMimeType   string   `json:"responseMimeType,omitempty"`
 | 
			
		||||
	ResponseSchema     any      `json:"responseSchema,omitempty"`
 | 
			
		||||
	Temperature        *float64 `json:"temperature,omitempty"`
 | 
			
		||||
	TopP               *float64 `json:"topP,omitempty"`
 | 
			
		||||
	TopK               float64  `json:"topK,omitempty"`
 | 
			
		||||
	MaxOutputTokens    int      `json:"maxOutputTokens,omitempty"`
 | 
			
		||||
	CandidateCount     int      `json:"candidateCount,omitempty"`
 | 
			
		||||
	StopSequences      []string `json:"stopSequences,omitempty"`
 | 
			
		||||
	ResponseModalities []string `json:"responseModalities,omitempty"`
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -12,4 +12,5 @@ var ModelList = []string{
 | 
			
		||||
	"gemini-2.0-flash-lite-preview-02-05",
 | 
			
		||||
	"gemini-2.0-flash-thinking-exp-01-21",
 | 
			
		||||
	"gemini-2.0-pro-exp-02-05",
 | 
			
		||||
	"gemini-2.0-flash-exp-image-generation",
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user