one-api/relay/adaptor/vertexai/imagen/model.go

93 lines
4.3 KiB
Go

package imagen
// CreateImageRequest is the request body for the Imagen API.
//
// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api
type CreateImageRequest struct {
Instances []createImageInstance `json:"instances" binding:"required,min=1"`
Parameters createImageParameters `json:"parameters" binding:"required"`
}
type createImageInstance struct {
Prompt string `json:"prompt"`
ReferenceImages []ReferenceImage `json:"referenceImages,omitempty"`
Image *promptImage `json:"image,omitempty"` // Keeping for backward compatibility
}
// ReferenceImage represents a reference image for the Imagen edit API
//
// https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagen-3.0-capability-001?project=ai-ca-447000
type ReferenceImage struct {
ReferenceType string `json:"referenceType" binding:"required,oneof=REFERENCE_TYPE_RAW REFERENCE_TYPE_MASK"`
ReferenceId int `json:"referenceId"`
ReferenceImage ReferenceImageData `json:"referenceImage"`
// MaskImageConfig is used when ReferenceType is "REFERENCE_TYPE_MASK",
// to provide a mask image for the reference image.
MaskImageConfig *MaskImageConfig `json:"maskImageConfig,omitempty"`
}
// ReferenceImageData contains the actual image data
type ReferenceImageData struct {
BytesBase64Encoded string `json:"bytesBase64Encoded,omitempty"`
GcsUri *string `json:"gcsUri,omitempty"`
MimeType *string `json:"mimeType,omitempty" binding:"omitempty,oneof=image/jpeg image/png"`
}
// MaskImageConfig specifies how to use the mask image
type MaskImageConfig struct {
// MaskMode is used to mask mode for mask editing.
// Set MASK_MODE_USER_PROVIDED for input user provided mask in the B64_MASK_IMAGE,
// MASK_MODE_BACKGROUND for automatically mask out background without user provided mask,
// MASK_MODE_SEMANTIC for automatically generating semantic object masks by
// specifying a list of object class IDs in maskClasses.
MaskMode string `json:"maskMode" binding:"required,oneof=MASK_MODE_USER_PROVIDED MASK_MODE_BACKGROUND MASK_MODE_SEMANTIC"`
MaskClasses []int `json:"maskClasses,omitempty"` // Object class IDs when maskMode is MASK_MODE_SEMANTIC
Dilation *float64 `json:"dilation,omitempty"` // Determines the dilation percentage of the mask provided. Min: 0, Max: 1, Default: 0.03
}
// promptImage is the image to be used as a prompt for the Imagen API.
// It can be either a base64 encoded image or a GCS URI.
type promptImage struct {
BytesBase64Encoded *string `json:"bytesBase64Encoded,omitempty"`
GcsUri *string `json:"gcsUri,omitempty"`
MimeType *string `json:"mimeType,omitempty" binding:"omitempty,oneof=image/jpeg image/png"`
}
type createImageParameters struct {
SampleCount int `json:"sampleCount" binding:"required,min=1"`
Mode *string `json:"mode,omitempty" binding:"omitempty,oneof=upscaled"`
// EditMode set edit mode for mask editing.
// Set EDIT_MODE_INPAINT_REMOVAL for inpainting removal,
// EDIT_MODE_INPAINT_INSERTION for inpainting insert,
// EDIT_MODE_OUTPAINT for outpainting,
// EDIT_MODE_BGSWAP for background swap.
EditMode *string `json:"editMode,omitempty" binding:"omitempty,oneof=EDIT_MODE_INPAINT_REMOVAL EDIT_MODE_INPAINT_INSERTION EDIT_MODE_OUTPAINT EDIT_MODE_BGSWAP"`
UpscaleConfig *upscaleConfig `json:"upscaleConfig,omitempty"`
Seed *int64 `json:"seed,omitempty"`
}
type upscaleConfig struct {
// UpscaleFactor is the factor to which the image will be upscaled.
// If not specified, the upscale factor will be determined from
// the longer side of the input image and sampleImageSize.
// Available values: x2 or x4 .
UpscaleFactor *string `json:"upscaleFactor,omitempty" binding:"omitempty,oneof=2x 4x"`
}
// CreateImageResponse is the response body for the Imagen API.
type CreateImageResponse struct {
Predictions []createImageResponsePrediction `json:"predictions"`
}
type createImageResponsePrediction struct {
MimeType string `json:"mimeType"`
BytesBase64Encoded string `json:"bytesBase64Encoded"`
}
// VQARequest is the response body for the Visual Question Answering API.
//
// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/visual-question-answering
type VQAResponse struct {
Predictions []string `json:"predictions"`
}