From adba54acd34784bbf33a55259e10ebab51aa4263 Mon Sep 17 00:00:00 2001
From: "Laisky.Cai" <github@laisky.com>
Date: Tue, 16 Jul 2024 23:48:54 +0800
Subject: [PATCH 01/13] fix: implement improved headers for anthropic to
 support 8k outputs (#1654)

---
 common/config/config.go                |  1 -
 relay/adaptor/anthropic/adaptor.go     | 13 ++++++++++--
 relay/adaptor/vertexai/claude/model.go | 22 ++++++++++----------
 relay/adaptor/vertexai/registry.go     |  1 -
 relay/adaptor/vertexai/token.go        |  1 -
 relay/channeltype/url.go               |  2 +-
 relay/meta/relay_meta.go               | 28 ++++++++++++++------------
 7 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/common/config/config.go b/common/config/config.go
index 9b55e413..11da0b96 100644
--- a/common/config/config.go
+++ b/common/config/config.go
@@ -147,7 +147,6 @@ var InitialRootAccessToken = os.Getenv("INITIAL_ROOT_ACCESS_TOKEN")
 
 var GeminiVersion = env.String("GEMINI_VERSION", "v1")
 
-
 var OnlyOneLogFile = env.Bool("ONLY_ONE_LOG_FILE", false)
 
 var RelayProxy = env.String("RELAY_PROXY", "")
diff --git a/relay/adaptor/anthropic/adaptor.go b/relay/adaptor/anthropic/adaptor.go
index b1136e84..bd0949be 100644
--- a/relay/adaptor/anthropic/adaptor.go
+++ b/relay/adaptor/anthropic/adaptor.go
@@ -3,12 +3,14 @@ package anthropic
 import (
 	"errors"
 	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
 	"github.com/gin-gonic/gin"
 	"github.com/songquanpeng/one-api/relay/adaptor"
 	"github.com/songquanpeng/one-api/relay/meta"
 	"github.com/songquanpeng/one-api/relay/model"
-	"io"
-	"net/http"
 )
 
 type Adaptor struct {
@@ -31,6 +33,13 @@ func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *me
 	}
 	req.Header.Set("anthropic-version", anthropicVersion)
 	req.Header.Set("anthropic-beta", "messages-2023-12-15")
+
+	// https://x.com/alexalbert__/status/1812921642143900036
+	// claude-3-5-sonnet can support 8k context
+	if strings.HasPrefix(meta.ActualModelName, "claude-3-5-sonnet") {
+		req.Header.Set("anthropic-beta", "max-tokens-3-5-sonnet-2024-07-15")
+	}
+
 	return nil
 }
 
diff --git a/relay/adaptor/vertexai/claude/model.go b/relay/adaptor/vertexai/claude/model.go
index 2f13f598..e1bd5dd4 100644
--- a/relay/adaptor/vertexai/claude/model.go
+++ b/relay/adaptor/vertexai/claude/model.go
@@ -4,16 +4,16 @@ import "github.com/songquanpeng/one-api/relay/adaptor/anthropic"
 
 type Request struct {
 	// AnthropicVersion must be "vertex-2023-10-16"
-	AnthropicVersion string              `json:"anthropic_version"`
+	AnthropicVersion string `json:"anthropic_version"`
 	// Model            string              `json:"model"`
-	Messages         []anthropic.Message `json:"messages"`
-	System           string              `json:"system,omitempty"`
-	MaxTokens        int                 `json:"max_tokens,omitempty"`
-	StopSequences    []string            `json:"stop_sequences,omitempty"`
-	Stream           bool                `json:"stream,omitempty"`
-	Temperature      float64             `json:"temperature,omitempty"`
-	TopP             float64             `json:"top_p,omitempty"`
-	TopK             int                 `json:"top_k,omitempty"`
-	Tools            []anthropic.Tool    `json:"tools,omitempty"`
-	ToolChoice       any                 `json:"tool_choice,omitempty"`
+	Messages      []anthropic.Message `json:"messages"`
+	System        string              `json:"system,omitempty"`
+	MaxTokens     int                 `json:"max_tokens,omitempty"`
+	StopSequences []string            `json:"stop_sequences,omitempty"`
+	Stream        bool                `json:"stream,omitempty"`
+	Temperature   float64             `json:"temperature,omitempty"`
+	TopP          float64             `json:"top_p,omitempty"`
+	TopK          int                 `json:"top_k,omitempty"`
+	Tools         []anthropic.Tool    `json:"tools,omitempty"`
+	ToolChoice    any                 `json:"tool_choice,omitempty"`
 }
diff --git a/relay/adaptor/vertexai/registry.go b/relay/adaptor/vertexai/registry.go
index f9547ebf..41099f02 100644
--- a/relay/adaptor/vertexai/registry.go
+++ b/relay/adaptor/vertexai/registry.go
@@ -32,7 +32,6 @@ func init() {
 	}
 }
 
-
 type innerAIAdapter interface {
 	ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error)
 	DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode)
diff --git a/relay/adaptor/vertexai/token.go b/relay/adaptor/vertexai/token.go
index e5fa7b48..0a5e0aad 100644
--- a/relay/adaptor/vertexai/token.go
+++ b/relay/adaptor/vertexai/token.go
@@ -26,7 +26,6 @@ type ApplicationDefaultCredentials struct {
 	UniverseDomain          string `json:"universe_domain"`
 }
 
-
 var Cache = cache.New(50*time.Minute, 55*time.Minute)
 
 const defaultScope = "https://www.googleapis.com/auth/cloud-platform"
diff --git a/relay/channeltype/url.go b/relay/channeltype/url.go
index 47241063..20a24ab0 100644
--- a/relay/channeltype/url.go
+++ b/relay/channeltype/url.go
@@ -43,7 +43,7 @@ var ChannelBaseURLs = []string{
 	"https://api.together.xyz",                  // 39
 	"https://ark.cn-beijing.volces.com",         // 40
 	"https://api.novita.ai/v3/openai",           // 41
-	"",										     // 42
+	"",                                          // 42
 }
 
 func init() {
diff --git a/relay/meta/relay_meta.go b/relay/meta/relay_meta.go
index 9714ebb5..04977db5 100644
--- a/relay/meta/relay_meta.go
+++ b/relay/meta/relay_meta.go
@@ -10,20 +10,22 @@ import (
 )
 
 type Meta struct {
-	Mode            int
-	ChannelType     int
-	ChannelId       int
-	TokenId         int
-	TokenName       string
-	UserId          int
-	Group           string
-	ModelMapping    map[string]string
-	BaseURL         string
-	APIKey          string
-	APIType         int
-	Config          model.ChannelConfig
-	IsStream        bool
+	Mode         int
+	ChannelType  int
+	ChannelId    int
+	TokenId      int
+	TokenName    string
+	UserId       int
+	Group        string
+	ModelMapping map[string]string
+	BaseURL      string
+	APIKey       string
+	APIType      int
+	Config       model.ChannelConfig
+	IsStream     bool
+	// OriginModelName is the model name from the raw user request
 	OriginModelName string
+	// ActualModelName is the model name after mapping
 	ActualModelName string
 	RequestURLPath  string
 	PromptTokens    int // only for DoResponse

From 2a892c193702be2a42f1079f3832244928674575 Mon Sep 17 00:00:00 2001
From: JustSong <songquanpeng@foxmail.com>
Date: Wed, 17 Jul 2024 22:50:52 +0800
Subject: [PATCH 02/13] revert: feat: fast build linux/arm64 frontend (#1645)

This reverts commit 1c44d7e1cdd45f3ebf7dbd4378a5b2ead7ade8f5.
---
 ...image-en.yml => docker-image-amd64-en.yml} |  3 +-
 ...ocker-image.yml => docker-image-amd64.yml} |  3 +-
 .github/workflows/docker-image-arm64.yml      | 69 +++++++++++++++++++
 Dockerfile                                    |  2 +-
 4 files changed, 72 insertions(+), 5 deletions(-)
 rename .github/workflows/{docker-image-en.yml => docker-image-amd64-en.yml} (94%)
 rename .github/workflows/{docker-image.yml => docker-image-amd64.yml} (95%)
 create mode 100644 .github/workflows/docker-image-arm64.yml

diff --git a/.github/workflows/docker-image-en.yml b/.github/workflows/docker-image-amd64-en.yml
similarity index 94%
rename from .github/workflows/docker-image-en.yml
rename to .github/workflows/docker-image-amd64-en.yml
index e894448f..31c01e80 100644
--- a/.github/workflows/docker-image-en.yml
+++ b/.github/workflows/docker-image-amd64-en.yml
@@ -1,4 +1,4 @@
-name: Publish Docker image (English)
+name: Publish Docker image (amd64, English)
 
 on:
   push:
@@ -51,7 +51,6 @@ jobs:
         uses: docker/build-push-action@v3
         with:
           context: .
-          platforms: linux/amd64,linux/arm64
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image-amd64.yml
similarity index 95%
rename from .github/workflows/docker-image.yml
rename to .github/workflows/docker-image-amd64.yml
index 57224135..1b9983c6 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image-amd64.yml
@@ -1,4 +1,4 @@
-name: Publish Docker image
+name: Publish Docker image (amd64)
 
 on:
   push:
@@ -56,7 +56,6 @@ jobs:
         uses: docker/build-push-action@v3
         with:
           context: .
-          platforms: linux/amd64,linux/arm64
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/.github/workflows/docker-image-arm64.yml b/.github/workflows/docker-image-arm64.yml
new file mode 100644
index 00000000..dc2b4b97
--- /dev/null
+++ b/.github/workflows/docker-image-arm64.yml
@@ -0,0 +1,69 @@
+name: Publish Docker image (arm64)
+
+on:
+  push:
+    tags:
+      - 'v*.*.*'
+      - '!*-alpha*'
+  workflow_dispatch:
+    inputs:
+      name:
+        description: 'reason'
+        required: false
+jobs:
+  push_to_registries:
+    name: Push Docker image to multiple registries
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v3
+
+      - name: Check repository URL
+        run: |
+          REPO_URL=$(git config --get remote.origin.url)
+          if [[ $REPO_URL == *"pro" ]]; then
+            exit 1
+          fi
+
+      - name: Save version info
+        run: |
+          git describe --tags > VERSION 
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: |
+            justsong/one-api
+            ghcr.io/${{ github.repository }}
+
+      - name: Build and push Docker images
+        uses: docker/build-push-action@v3
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 8154e4dc..29b4ca71 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM --platform=$BUILDPLATFORM node:16 as builder
+FROM node:16 as builder
 
 WORKDIR /web
 COPY ./VERSION .

From 39383e553280f4987fa62824c5090c8d85acc392 Mon Sep 17 00:00:00 2001
From: igophper <34326532+igophper@users.noreply.github.com>
Date: Mon, 22 Jul 2024 22:38:50 +0800
Subject: [PATCH 03/13] fix: support embedding models for doubao (#1662)

Fixes #1594
---
 relay/adaptor/doubao/main.go |  6 +++-
 relay/controller/text.go     | 54 +++++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/relay/adaptor/doubao/main.go b/relay/adaptor/doubao/main.go
index ea26e6ba..dd43d06c 100644
--- a/relay/adaptor/doubao/main.go
+++ b/relay/adaptor/doubao/main.go
@@ -7,8 +7,12 @@ import (
 )
 
 func GetRequestURL(meta *meta.Meta) (string, error) {
-	if meta.Mode == relaymode.ChatCompletions {
+	switch meta.Mode {
+	case relaymode.ChatCompletions:
 		return fmt.Sprintf("%s/api/v3/chat/completions", meta.BaseURL), nil
+	case relaymode.Embeddings:
+		return fmt.Sprintf("%s/api/v3/embeddings", meta.BaseURL), nil
+	default:
 	}
 	return "", fmt.Errorf("unsupported relay mode %d for doubao", meta.Mode)
 }
diff --git a/relay/controller/text.go b/relay/controller/text.go
index 0d3c56b0..52ee9949 100644
--- a/relay/controller/text.go
+++ b/relay/controller/text.go
@@ -10,6 +10,7 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/songquanpeng/one-api/common/logger"
 	"github.com/songquanpeng/one-api/relay"
+	"github.com/songquanpeng/one-api/relay/adaptor"
 	"github.com/songquanpeng/one-api/relay/adaptor/openai"
 	"github.com/songquanpeng/one-api/relay/apitype"
 	"github.com/songquanpeng/one-api/relay/billing"
@@ -31,9 +32,8 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
 	meta.IsStream = textRequest.Stream
 
 	// map model name
-	var isModelMapped bool
 	meta.OriginModelName = textRequest.Model
-	textRequest.Model, isModelMapped = getMappedModelName(textRequest.Model, meta.ModelMapping)
+	textRequest.Model, _ = getMappedModelName(textRequest.Model, meta.ModelMapping)
 	meta.ActualModelName = textRequest.Model
 	// get model ratio & group ratio
 	modelRatio := billingratio.GetModelRatio(textRequest.Model, meta.ChannelType)
@@ -55,30 +55,9 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
 	adaptor.Init(meta)
 
 	// get request body
-	var requestBody io.Reader
-	if meta.APIType == apitype.OpenAI {
-		// no need to convert request for openai
-		shouldResetRequestBody := isModelMapped || meta.ChannelType == channeltype.Baichuan // frequency_penalty 0 is not acceptable for baichuan
-		if shouldResetRequestBody {
-			jsonStr, err := json.Marshal(textRequest)
-			if err != nil {
-				return openai.ErrorWrapper(err, "json_marshal_failed", http.StatusInternalServerError)
-			}
-			requestBody = bytes.NewBuffer(jsonStr)
-		} else {
-			requestBody = c.Request.Body
-		}
-	} else {
-		convertedRequest, err := adaptor.ConvertRequest(c, meta.Mode, textRequest)
-		if err != nil {
-			return openai.ErrorWrapper(err, "convert_request_failed", http.StatusInternalServerError)
-		}
-		jsonData, err := json.Marshal(convertedRequest)
-		if err != nil {
-			return openai.ErrorWrapper(err, "json_marshal_failed", http.StatusInternalServerError)
-		}
-		logger.Debugf(ctx, "converted request: \n%s", string(jsonData))
-		requestBody = bytes.NewBuffer(jsonData)
+	requestBody, err := getRequestBody(c, meta, textRequest, adaptor)
+	if err != nil {
+		return openai.ErrorWrapper(err, "convert_request_failed", http.StatusInternalServerError)
 	}
 
 	// do request
@@ -103,3 +82,26 @@ func RelayTextHelper(c *gin.Context) *model.ErrorWithStatusCode {
 	go postConsumeQuota(ctx, usage, meta, textRequest, ratio, preConsumedQuota, modelRatio, groupRatio)
 	return nil
 }
+
+func getRequestBody(c *gin.Context, meta *meta.Meta, textRequest *model.GeneralOpenAIRequest, adaptor adaptor.Adaptor) (io.Reader, error) {
+	if meta.APIType == apitype.OpenAI && meta.OriginModelName == meta.ActualModelName && meta.ChannelType != channeltype.Baichuan {
+		// no need to convert request for openai
+		return c.Request.Body, nil
+	}
+
+	// get request body
+	var requestBody io.Reader
+	convertedRequest, err := adaptor.ConvertRequest(c, meta.Mode, textRequest)
+	if err != nil {
+		logger.Debugf(c.Request.Context(), "converted request failed: %s\n", err.Error())
+		return nil, err
+	}
+	jsonData, err := json.Marshal(convertedRequest)
+	if err != nil {
+		logger.Debugf(c.Request.Context(), "converted request json_marshal_failed: %s\n", err.Error())
+		return nil, err
+	}
+	logger.Debugf(c.Request.Context(), "converted request: \n%s", string(jsonData))
+	requestBody = bytes.NewBuffer(jsonData)
+	return requestBody, nil
+}

From 5f03c856b486a9216371cc25957aa25cc9e337e9 Mon Sep 17 00:00:00 2001
From: zijiren <84728412+zijiren233@users.noreply.github.com>
Date: Mon, 22 Jul 2024 22:39:22 +0800
Subject: [PATCH 04/13] feat: fast build linux/arm64 frontend (#1663)

* feat: fast build linux/arm64 frontend

* fix: dockerfile as replace to AS

* fix: trim space
---
 .github/workflows/docker-image-amd64.yml      | 61 -------------------
 ...image-amd64-en.yml => docker-image-en.yml} | 10 ++-
 ...ocker-image-arm64.yml => docker-image.yml} |  3 +-
 Dockerfile                                    |  2 +-
 4 files changed, 11 insertions(+), 65 deletions(-)
 delete mode 100644 .github/workflows/docker-image-amd64.yml
 rename .github/workflows/{docker-image-amd64-en.yml => docker-image-en.yml} (85%)
 rename .github/workflows/{docker-image-arm64.yml => docker-image.yml} (96%)

diff --git a/.github/workflows/docker-image-amd64.yml b/.github/workflows/docker-image-amd64.yml
deleted file mode 100644
index 1b9983c6..00000000
--- a/.github/workflows/docker-image-amd64.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-name: Publish Docker image (amd64)
-
-on:
-  push:
-    tags:
-      - 'v*.*.*'
-  workflow_dispatch:
-    inputs:
-      name:
-        description: 'reason'
-        required: false
-jobs:
-  push_to_registries:
-    name: Push Docker image to multiple registries
-    runs-on: ubuntu-latest
-    permissions:
-      packages: write
-      contents: read
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v3
-
-      - name: Check repository URL
-        run: |
-          REPO_URL=$(git config --get remote.origin.url)
-          if [[ $REPO_URL == *"pro" ]]; then
-            exit 1
-          fi        
-
-      - name: Save version info
-        run: |
-          git describe --tags > VERSION 
-
-      - name: Log in to Docker Hub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@v2
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@v4
-        with:
-          images: |
-            justsong/one-api
-            ghcr.io/${{ github.repository }}
-
-      - name: Build and push Docker images
-        uses: docker/build-push-action@v3
-        with:
-          context: .
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/.github/workflows/docker-image-amd64-en.yml b/.github/workflows/docker-image-en.yml
similarity index 85%
rename from .github/workflows/docker-image-amd64-en.yml
rename to .github/workflows/docker-image-en.yml
index 31c01e80..30cd0e38 100644
--- a/.github/workflows/docker-image-amd64-en.yml
+++ b/.github/workflows/docker-image-en.yml
@@ -1,4 +1,4 @@
-name: Publish Docker image (amd64, English)
+name: Publish Docker image (English)
 
 on:
   push:
@@ -34,6 +34,13 @@ jobs:
       - name: Translate
         run: |
           python ./i18n/translate.py --repository_path . --json_file_path ./i18n/en.json
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
       - name: Log in to Docker Hub
         uses: docker/login-action@v2
         with:
@@ -51,6 +58,7 @@ jobs:
         uses: docker/build-push-action@v3
         with:
           context: .
+          platforms: linux/amd64,linux/arm64
           push: true
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/.github/workflows/docker-image-arm64.yml b/.github/workflows/docker-image.yml
similarity index 96%
rename from .github/workflows/docker-image-arm64.yml
rename to .github/workflows/docker-image.yml
index dc2b4b97..56f1d6ad 100644
--- a/.github/workflows/docker-image-arm64.yml
+++ b/.github/workflows/docker-image.yml
@@ -1,10 +1,9 @@
-name: Publish Docker image (arm64)
+name: Publish Docker image
 
 on:
   push:
     tags:
       - 'v*.*.*'
-      - '!*-alpha*'
   workflow_dispatch:
     inputs:
       name:
diff --git a/Dockerfile b/Dockerfile
index 29b4ca71..ade561e4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM node:16 as builder
+FROM --platform=$BUILDPLATFORM node:16 AS builder
 
 WORKDIR /web
 COPY ./VERSION .

From 296ab013b8877ff44cb13c53ae6f3a7ab3e905d8 Mon Sep 17 00:00:00 2001
From: TAKO <20227709+HynoR@users.noreply.github.com>
Date: Mon, 22 Jul 2024 22:44:08 +0800
Subject: [PATCH 05/13] feat: support gpt-4o mini (#1665)

* feat: support gpt-4o mini

* feat: fix gpt-4o mini image price
---
 relay/adaptor/openai/constants.go |  1 +
 relay/adaptor/openai/token.go     | 14 ++++++++++++--
 relay/billing/ratio/model.go      | 23 ++++++++++++++---------
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/relay/adaptor/openai/constants.go b/relay/adaptor/openai/constants.go
index 924f4b2a..156a50e7 100644
--- a/relay/adaptor/openai/constants.go
+++ b/relay/adaptor/openai/constants.go
@@ -8,6 +8,7 @@ var ModelList = []string{
 	"gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-0613",
 	"gpt-4-turbo-preview", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
 	"gpt-4o", "gpt-4o-2024-05-13",
+	"gpt-4o-mini", "gpt-4o-mini-2024-07-18",
 	"gpt-4-vision-preview",
 	"text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large",
 	"text-curie-001", "text-babbage-001", "text-ada-001", "text-davinci-002", "text-davinci-003",
diff --git a/relay/adaptor/openai/token.go b/relay/adaptor/openai/token.go
index 8378b264..7c8468b9 100644
--- a/relay/adaptor/openai/token.go
+++ b/relay/adaptor/openai/token.go
@@ -110,7 +110,7 @@ func CountTokenMessages(messages []model.Message, model string) int {
 						if imageUrl["detail"] != nil {
 							detail = imageUrl["detail"].(string)
 						}
-						imageTokens, err := countImageTokens(url, detail)
+						imageTokens, err := countImageTokens(url, detail, model)
 						if err != nil {
 							logger.SysError("error counting image tokens: " + err.Error())
 						} else {
@@ -134,11 +134,15 @@ const (
 	lowDetailCost         = 85
 	highDetailCostPerTile = 170
 	additionalCost        = 85
+	// gpt-4o-mini cost higher than other model
+	gpt4oMiniLowDetailCost  = 2833
+	gpt4oMiniHighDetailCost = 5667
+	gpt4oMiniAdditionalCost = 2833
 )
 
 // https://platform.openai.com/docs/guides/vision/calculating-costs
 // https://github.com/openai/openai-cookbook/blob/05e3f9be4c7a2ae7ecf029a7c32065b024730ebe/examples/How_to_count_tokens_with_tiktoken.ipynb
-func countImageTokens(url string, detail string) (_ int, err error) {
+func countImageTokens(url string, detail string, model string) (_ int, err error) {
 	var fetchSize = true
 	var width, height int
 	// Reference: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding
@@ -172,6 +176,9 @@ func countImageTokens(url string, detail string) (_ int, err error) {
 	}
 	switch detail {
 	case "low":
+		if strings.HasPrefix(model, "gpt-4o-mini") {
+			return gpt4oMiniLowDetailCost, nil
+		}
 		return lowDetailCost, nil
 	case "high":
 		if fetchSize {
@@ -191,6 +198,9 @@ func countImageTokens(url string, detail string) (_ int, err error) {
 			height = int(float64(height) * ratio)
 		}
 		numSquares := int(math.Ceil(float64(width)/512) * math.Ceil(float64(height)/512))
+		if strings.HasPrefix(model, "gpt-4o-mini") {
+			return numSquares*gpt4oMiniHighDetailCost + gpt4oMiniAdditionalCost, nil
+		}
 		result := numSquares*highDetailCostPerTile + additionalCost
 		return result, nil
 	default:
diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go
index 8a7d5743..799fa90f 100644
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -28,15 +28,17 @@ var ModelRatio = map[string]float64{
 	"gpt-4-32k":               30,
 	"gpt-4-32k-0314":          30,
 	"gpt-4-32k-0613":          30,
-	"gpt-4-1106-preview":      5,    // $0.01 / 1K tokens
-	"gpt-4-0125-preview":      5,    // $0.01 / 1K tokens
-	"gpt-4-turbo-preview":     5,    // $0.01 / 1K tokens
-	"gpt-4-turbo":             5,    // $0.01 / 1K tokens
-	"gpt-4-turbo-2024-04-09":  5,    // $0.01 / 1K tokens
-	"gpt-4o":                  2.5,  // $0.005 / 1K tokens
-	"gpt-4o-2024-05-13":       2.5,  // $0.005 / 1K tokens
-	"gpt-4-vision-preview":    5,    // $0.01 / 1K tokens
-	"gpt-3.5-turbo":           0.25, // $0.0005 / 1K tokens
+	"gpt-4-1106-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-0125-preview":      5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-preview":     5,     // $0.01 / 1K tokens
+	"gpt-4-turbo":             5,     // $0.01 / 1K tokens
+	"gpt-4-turbo-2024-04-09":  5,     // $0.01 / 1K tokens
+	"gpt-4o":                  2.5,   // $0.005 / 1K tokens
+	"gpt-4o-2024-05-13":       2.5,   // $0.005 / 1K tokens
+	"gpt-4o-mini":             0.075, // $0.00015 / 1K tokens
+	"gpt-4o-mini-2024-07-18":  0.075, // $0.00015 / 1K tokens
+	"gpt-4-vision-preview":    5,     // $0.01 / 1K tokens
+	"gpt-3.5-turbo":           0.25,  // $0.0005 / 1K tokens
 	"gpt-3.5-turbo-0301":      0.75,
 	"gpt-3.5-turbo-0613":      0.75,
 	"gpt-3.5-turbo-16k":       1.5, // $0.003 / 1K tokens
@@ -308,6 +310,9 @@ func GetCompletionRatio(name string, channelType int) float64 {
 		return 4.0 / 3.0
 	}
 	if strings.HasPrefix(name, "gpt-4") {
+		if strings.HasPrefix(name, "gpt-4o-mini") {
+			return 4
+		}
 		if strings.HasPrefix(name, "gpt-4-turbo") ||
 			strings.HasPrefix(name, "gpt-4o") ||
 			strings.HasSuffix(name, "preview") {

From c936198ac8cf3c51bb839d11efea0bfd06fedc57 Mon Sep 17 00:00:00 2001
From: "Laisky.Cai" <github@laisky.com>
Date: Mon, 22 Jul 2024 22:51:19 +0800
Subject: [PATCH 06/13] feat: add Proxy channel type and relay mode (#1678)

Add the Proxy channel type and relay mode to support proxying requests to custom upstream services.
---
 controller/relay.go                           |   5 +
 middleware/auth.go                            |   6 +
 relay/adaptor.go                              |   3 +
 relay/adaptor/proxy/adaptor.go                |  89 ++++++++++++
 relay/apitype/define.go                       |   1 +
 relay/channeltype/define.go                   |   1 +
 relay/channeltype/helper.go                   |   2 +
 relay/channeltype/url.go                      |   1 +
 relay/controller/proxy.go                     |  41 ++++++
 relay/meta/relay_meta.go                      |  11 +-
 relay/relaymode/define.go                     |   2 +
 relay/relaymode/helper.go                     |   2 +
 router/relay.go                               |   1 +
 web/air/src/constants/channel.constants.js    |  14 +-
 web/berry/src/constants/ChannelConstants.js   |   6 +
 .../src/constants/channel.constants.js        |  85 ++++++------
 web/default/src/pages/Channel/EditChannel.js  | 128 ++++++++++--------
 17 files changed, 292 insertions(+), 106 deletions(-)
 create mode 100644 relay/adaptor/proxy/adaptor.go
 create mode 100644 relay/controller/proxy.go

diff --git a/controller/relay.go b/controller/relay.go
index 932e023b..49358e25 100644
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -34,6 +34,8 @@ func relayHelper(c *gin.Context, relayMode int) *model.ErrorWithStatusCode {
 		fallthrough
 	case relaymode.AudioTranscription:
 		err = controller.RelayAudioHelper(c, relayMode)
+	case relaymode.Proxy:
+		err = controller.RelayProxyHelper(c, relayMode)
 	default:
 		err = controller.RelayTextHelper(c)
 	}
@@ -85,12 +87,15 @@ func Relay(c *gin.Context) {
 		channelId := c.GetInt(ctxkey.ChannelId)
 		lastFailedChannelId = channelId
 		channelName := c.GetString(ctxkey.ChannelName)
+		// BUG: bizErr is in race condition
 		go processChannelRelayError(ctx, userId, channelId, channelName, bizErr)
 	}
 	if bizErr != nil {
 		if bizErr.StatusCode == http.StatusTooManyRequests {
 			bizErr.Error.Message = "当前分组上游负载已饱和，请稍后再试"
 		}
+
+		// BUG: bizErr is in race condition
 		bizErr.Error.Message = helper.MessageWithRequestId(bizErr.Error.Message, requestId)
 		c.JSON(bizErr.StatusCode, gin.H{
 			"error": bizErr.Error,
diff --git a/middleware/auth.go b/middleware/auth.go
index 5cba490a..e0019838 100644
--- a/middleware/auth.go
+++ b/middleware/auth.go
@@ -140,6 +140,12 @@ func TokenAuth() func(c *gin.Context) {
 				return
 			}
 		}
+
+		// set channel id for proxy relay
+		if channelId := c.Param("channelid"); channelId != "" {
+			c.Set(ctxkey.SpecificChannelId, channelId)
+		}
+
 		c.Next()
 	}
 }
diff --git a/relay/adaptor.go b/relay/adaptor.go
index 7fc83651..711e63bd 100644
--- a/relay/adaptor.go
+++ b/relay/adaptor.go
@@ -15,6 +15,7 @@ import (
 	"github.com/songquanpeng/one-api/relay/adaptor/ollama"
 	"github.com/songquanpeng/one-api/relay/adaptor/openai"
 	"github.com/songquanpeng/one-api/relay/adaptor/palm"
+	"github.com/songquanpeng/one-api/relay/adaptor/proxy"
 	"github.com/songquanpeng/one-api/relay/adaptor/tencent"
 	"github.com/songquanpeng/one-api/relay/adaptor/vertexai"
 	"github.com/songquanpeng/one-api/relay/adaptor/xunfei"
@@ -58,6 +59,8 @@ func GetAdaptor(apiType int) adaptor.Adaptor {
 		return &deepl.Adaptor{}
 	case apitype.VertexAI:
 		return &vertexai.Adaptor{}
+	case apitype.Proxy:
+		return &proxy.Adaptor{}
 	}
 	return nil
 }
diff --git a/relay/adaptor/proxy/adaptor.go b/relay/adaptor/proxy/adaptor.go
new file mode 100644
index 00000000..670c7628
--- /dev/null
+++ b/relay/adaptor/proxy/adaptor.go
@@ -0,0 +1,89 @@
+package proxy
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/pkg/errors"
+	"github.com/songquanpeng/one-api/relay/adaptor"
+	channelhelper "github.com/songquanpeng/one-api/relay/adaptor"
+	"github.com/songquanpeng/one-api/relay/meta"
+	"github.com/songquanpeng/one-api/relay/model"
+	relaymodel "github.com/songquanpeng/one-api/relay/model"
+)
+
+var _ adaptor.Adaptor = new(Adaptor)
+
+const channelName = "proxy"
+
+type Adaptor struct{}
+
+func (a *Adaptor) Init(meta *meta.Meta) {
+}
+
+func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error) {
+	return nil, errors.New("notimplement")
+}
+
+func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
+	for k, v := range resp.Header {
+		for _, vv := range v {
+			c.Writer.Header().Set(k, vv)
+		}
+	}
+
+	c.Writer.WriteHeader(resp.StatusCode)
+	if _, gerr := io.Copy(c.Writer, resp.Body); gerr != nil {
+		return nil, &relaymodel.ErrorWithStatusCode{
+			StatusCode: http.StatusInternalServerError,
+			Error: relaymodel.Error{
+				Message: gerr.Error(),
+			},
+		}
+	}
+
+	return nil, nil
+}
+
+func (a *Adaptor) GetModelList() (models []string) {
+	return nil
+}
+
+func (a *Adaptor) GetChannelName() string {
+	return channelName
+}
+
+// GetRequestURL remove static prefix, and return the real request url to the upstream service
+func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
+	prefix := fmt.Sprintf("/v1/oneapi/proxy/%d", meta.ChannelId)
+	return meta.BaseURL + strings.TrimPrefix(meta.RequestURLPath, prefix), nil
+
+}
+
+func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *meta.Meta) error {
+	for k, v := range c.Request.Header {
+		req.Header.Set(k, v[0])
+	}
+
+	// remove unnecessary headers
+	req.Header.Del("Host")
+	req.Header.Del("Content-Length")
+	req.Header.Del("Accept-Encoding")
+	req.Header.Del("Connection")
+
+	// set authorization header
+	req.Header.Set("Authorization", meta.APIKey)
+
+	return nil
+}
+
+func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) {
+	return nil, errors.Errorf("not implement")
+}
+
+func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
+	return channelhelper.DoRequestHelper(a, c, meta, requestBody)
+}
diff --git a/relay/apitype/define.go b/relay/apitype/define.go
index 212a1b6b..cf7b6a0d 100644
--- a/relay/apitype/define.go
+++ b/relay/apitype/define.go
@@ -18,6 +18,7 @@ const (
 	Cloudflare
 	DeepL
 	VertexAI
+	Proxy
 
 	Dummy // this one is only for count, do not add any channel after this
 )
diff --git a/relay/channeltype/define.go b/relay/channeltype/define.go
index d1e7fcef..e3b0c98e 100644
--- a/relay/channeltype/define.go
+++ b/relay/channeltype/define.go
@@ -44,5 +44,6 @@ const (
 	Doubao
 	Novita
 	VertextAI
+	Proxy
 	Dummy
 )
diff --git a/relay/channeltype/helper.go b/relay/channeltype/helper.go
index 67270a67..fae3357f 100644
--- a/relay/channeltype/helper.go
+++ b/relay/channeltype/helper.go
@@ -37,6 +37,8 @@ func ToAPIType(channelType int) int {
 		apiType = apitype.DeepL
 	case VertextAI:
 		apiType = apitype.VertexAI
+	case Proxy:
+		apiType = apitype.Proxy
 	}
 
 	return apiType
diff --git a/relay/channeltype/url.go b/relay/channeltype/url.go
index 20a24ab0..b5026713 100644
--- a/relay/channeltype/url.go
+++ b/relay/channeltype/url.go
@@ -44,6 +44,7 @@ var ChannelBaseURLs = []string{
 	"https://ark.cn-beijing.volces.com",         // 40
 	"https://api.novita.ai/v3/openai",           // 41
 	"",                                          // 42
+	"",                                          // 43
 }
 
 func init() {
diff --git a/relay/controller/proxy.go b/relay/controller/proxy.go
new file mode 100644
index 00000000..dcaf15a9
--- /dev/null
+++ b/relay/controller/proxy.go
@@ -0,0 +1,41 @@
+// Package controller is a package for handling the relay controller
+package controller
+
+import (
+	"fmt"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/songquanpeng/one-api/common/logger"
+	"github.com/songquanpeng/one-api/relay"
+	"github.com/songquanpeng/one-api/relay/adaptor/openai"
+	"github.com/songquanpeng/one-api/relay/meta"
+	relaymodel "github.com/songquanpeng/one-api/relay/model"
+)
+
+// RelayProxyHelper is a helper function to proxy the request to the upstream service
+func RelayProxyHelper(c *gin.Context, relayMode int) *relaymodel.ErrorWithStatusCode {
+	ctx := c.Request.Context()
+	meta := meta.GetByContext(c)
+
+	adaptor := relay.GetAdaptor(meta.APIType)
+	if adaptor == nil {
+		return openai.ErrorWrapper(fmt.Errorf("invalid api type: %d", meta.APIType), "invalid_api_type", http.StatusBadRequest)
+	}
+	adaptor.Init(meta)
+
+	resp, err := adaptor.DoRequest(c, meta, c.Request.Body)
+	if err != nil {
+		logger.Errorf(ctx, "DoRequest failed: %s", err.Error())
+		return openai.ErrorWrapper(err, "do_request_failed", http.StatusInternalServerError)
+	}
+
+	// do response
+	_, respErr := adaptor.DoResponse(c, resp, meta)
+	if respErr != nil {
+		logger.Errorf(ctx, "respErr is not nil: %+v", respErr)
+		return respErr
+	}
+
+	return nil
+}
diff --git a/relay/meta/relay_meta.go b/relay/meta/relay_meta.go
index 04977db5..b1761e9a 100644
--- a/relay/meta/relay_meta.go
+++ b/relay/meta/relay_meta.go
@@ -18,11 +18,12 @@ type Meta struct {
 	UserId       int
 	Group        string
 	ModelMapping map[string]string
-	BaseURL      string
-	APIKey       string
-	APIType      int
-	Config       model.ChannelConfig
-	IsStream     bool
+	// BaseURL is the proxy url set in the channel config
+	BaseURL  string
+	APIKey   string
+	APIType  int
+	Config   model.ChannelConfig
+	IsStream bool
 	// OriginModelName is the model name from the raw user request
 	OriginModelName string
 	// ActualModelName is the model name after mapping
diff --git a/relay/relaymode/define.go b/relay/relaymode/define.go
index 96d09438..aa771205 100644
--- a/relay/relaymode/define.go
+++ b/relay/relaymode/define.go
@@ -11,4 +11,6 @@ const (
 	AudioSpeech
 	AudioTranscription
 	AudioTranslation
+	// Proxy is a special relay mode for proxying requests to custom upstream
+	Proxy
 )
diff --git a/relay/relaymode/helper.go b/relay/relaymode/helper.go
index 926dd42e..2cde5b85 100644
--- a/relay/relaymode/helper.go
+++ b/relay/relaymode/helper.go
@@ -24,6 +24,8 @@ func GetByPath(path string) int {
 		relayMode = AudioTranscription
 	} else if strings.HasPrefix(path, "/v1/audio/translations") {
 		relayMode = AudioTranslation
+	} else if strings.HasPrefix(path, "/v1/oneapi/proxy") {
+		relayMode = Proxy
 	}
 	return relayMode
 }
diff --git a/router/relay.go b/router/relay.go
index 65072c86..094ea5fb 100644
--- a/router/relay.go
+++ b/router/relay.go
@@ -19,6 +19,7 @@ func SetRelayRouter(router *gin.Engine) {
 	relayV1Router := router.Group("/v1")
 	relayV1Router.Use(middleware.RelayPanicRecover(), middleware.TokenAuth(), middleware.Distribute())
 	{
+		relayV1Router.Any("/oneapi/proxy/:channelid/*target", controller.Relay)
 		relayV1Router.POST("/completions", controller.Relay)
 		relayV1Router.POST("/chat/completions", controller.Relay)
 		relayV1Router.POST("/edits", controller.Relay)
diff --git a/web/air/src/constants/channel.constants.js b/web/air/src/constants/channel.constants.js
index 4bf035f9..18293f5f 100644
--- a/web/air/src/constants/channel.constants.js
+++ b/web/air/src/constants/channel.constants.js
@@ -1,10 +1,13 @@
 export const CHANNEL_OPTIONS = [
   { key: 1, text: 'OpenAI', value: 1, color: 'green' },
   { key: 14, text: 'Anthropic Claude', value: 14, color: 'black' },
+  { key: 33, text: 'AWS', value: 33, color: 'black' },
   { key: 3, text: 'Azure OpenAI', value: 3, color: 'olive' },
   { key: 11, text: 'Google PaLM2', value: 11, color: 'orange' },
   { key: 24, text: 'Google Gemini', value: 24, color: 'orange' },
   { key: 28, text: 'Mistral AI', value: 28, color: 'orange' },
+  { key: 41, text: 'Novita', value: 41, color: 'purple' },
+  { key: 40, text: '字节跳动豆包', value: 40, color: 'blue' },
   { key: 15, text: '百度文心千帆', value: 15, color: 'blue' },
   { key: 17, text: '阿里通义千问', value: 17, color: 'orange' },
   { key: 18, text: '讯飞星火认知', value: 18, color: 'blue' },
@@ -17,6 +20,15 @@ export const CHANNEL_OPTIONS = [
   { key: 29, text: 'Groq', value: 29, color: 'orange' },
   { key: 30, text: 'Ollama', value: 30, color: 'black' },
   { key: 31, text: '零一万物', value: 31, color: 'green' },
+  { key: 32, text: '阶跃星辰', value: 32, color: 'blue' },
+  { key: 34, text: 'Coze', value: 34, color: 'blue' },
+  { key: 35, text: 'Cohere', value: 35, color: 'blue' },
+  { key: 36, text: 'DeepSeek', value: 36, color: 'black' },
+  { key: 37, text: 'Cloudflare', value: 37, color: 'orange' },
+  { key: 38, text: 'DeepL', value: 38, color: 'black' },
+  { key: 39, text: 'together.ai', value: 39, color: 'blue' },
+  { key: 42, text: 'VertexAI', value: 42, color: 'blue' },
+  { key: 43, text: 'Proxy', value: 43, color: 'blue' },
   { key: 8, text: '自定义渠道', value: 8, color: 'pink' },
   { key: 22, text: '知识库：FastGPT', value: 22, color: 'blue' },
   { key: 21, text: '知识库：AI Proxy', value: 21, color: 'purple' },
@@ -34,4 +46,4 @@ export const CHANNEL_OPTIONS = [
 
 for (let i = 0; i < CHANNEL_OPTIONS.length; i++) {
   CHANNEL_OPTIONS[i].label = CHANNEL_OPTIONS[i].text;
-}
\ No newline at end of file
+}
diff --git a/web/berry/src/constants/ChannelConstants.js b/web/berry/src/constants/ChannelConstants.js
index ac2e73a6..acfda37b 100644
--- a/web/berry/src/constants/ChannelConstants.js
+++ b/web/berry/src/constants/ChannelConstants.js
@@ -167,6 +167,12 @@ export const CHANNEL_OPTIONS = {
     value: 42,
     color: 'primary'
   },
+  43: {
+    key: 43,
+    text: 'Proxy',
+    value: 43,
+    color: 'primary'
+  },
   41: {
     key: 41,
     text: 'Novita',
diff --git a/web/default/src/constants/channel.constants.js b/web/default/src/constants/channel.constants.js
index b17f56c0..b2a71016 100644
--- a/web/default/src/constants/channel.constants.js
+++ b/web/default/src/constants/channel.constants.js
@@ -1,44 +1,45 @@
 export const CHANNEL_OPTIONS = [
-    {key: 1, text: 'OpenAI', value: 1, color: 'green'},
-    {key: 14, text: 'Anthropic Claude', value: 14, color: 'black'},
-    {key: 33, text: 'AWS', value: 33, color: 'black'},
-    {key: 3, text: 'Azure OpenAI', value: 3, color: 'olive'},
-    {key: 11, text: 'Google PaLM2', value: 11, color: 'orange'},
-    {key: 24, text: 'Google Gemini', value: 24, color: 'orange'},
-    {key: 28, text: 'Mistral AI', value: 28, color: 'orange'},
-    {key: 41, text: 'Novita', value: 41, color: 'purple'},
-    {key: 40, text: '字节跳动豆包', value: 40, color: 'blue'},
-    {key: 15, text: '百度文心千帆', value: 15, color: 'blue'},
-    {key: 17, text: '阿里通义千问', value: 17, color: 'orange'},
-    {key: 18, text: '讯飞星火认知', value: 18, color: 'blue'},
-    {key: 16, text: '智谱 ChatGLM', value: 16, color: 'violet'},
-    {key: 19, text: '360 智脑', value: 19, color: 'blue'},
-    {key: 25, text: 'Moonshot AI', value: 25, color: 'black'},
-    {key: 23, text: '腾讯混元', value: 23, color: 'teal'},
-    {key: 26, text: '百川大模型', value: 26, color: 'orange'},
-    {key: 27, text: 'MiniMax', value: 27, color: 'red'},
-    {key: 29, text: 'Groq', value: 29, color: 'orange'},
-    {key: 30, text: 'Ollama', value: 30, color: 'black'},
-    {key: 31, text: '零一万物', value: 31, color: 'green'},
-    {key: 32, text: '阶跃星辰', value: 32, color: 'blue'},
-    {key: 34, text: 'Coze', value: 34, color: 'blue'},
-    {key: 35, text: 'Cohere', value: 35, color: 'blue'},
-    {key: 36, text: 'DeepSeek', value: 36, color: 'black'},
-    {key: 37, text: 'Cloudflare', value: 37, color: 'orange'},
-    {key: 38, text: 'DeepL', value: 38, color: 'black'},
-    {key: 39, text: 'together.ai', value: 39, color: 'blue'},
-    {key: 42, text: 'VertexAI', value: 42, color: 'blue'},
-    {key: 8, text: '自定义渠道', value: 8, color: 'pink'},
-    {key: 22, text: '知识库：FastGPT', value: 22, color: 'blue'},
-    {key: 21, text: '知识库：AI Proxy', value: 21, color: 'purple'},
-    {key: 20, text: '代理：OpenRouter', value: 20, color: 'black'},
-    {key: 2, text: '代理：API2D', value: 2, color: 'blue'},
-    {key: 5, text: '代理：OpenAI-SB', value: 5, color: 'brown'},
-    {key: 7, text: '代理：OhMyGPT', value: 7, color: 'purple'},
-    {key: 10, text: '代理：AI Proxy', value: 10, color: 'purple'},
-    {key: 4, text: '代理：CloseAI', value: 4, color: 'teal'},
-    {key: 6, text: '代理：OpenAI Max', value: 6, color: 'violet'},
-    {key: 9, text: '代理：AI.LS', value: 9, color: 'yellow'},
-    {key: 12, text: '代理：API2GPT', value: 12, color: 'blue'},
-    {key: 13, text: '代理：AIGC2D', value: 13, color: 'purple'}
+    { key: 1, text: 'OpenAI', value: 1, color: 'green' },
+    { key: 14, text: 'Anthropic Claude', value: 14, color: 'black' },
+    { key: 33, text: 'AWS', value: 33, color: 'black' },
+    { key: 3, text: 'Azure OpenAI', value: 3, color: 'olive' },
+    { key: 11, text: 'Google PaLM2', value: 11, color: 'orange' },
+    { key: 24, text: 'Google Gemini', value: 24, color: 'orange' },
+    { key: 28, text: 'Mistral AI', value: 28, color: 'orange' },
+    { key: 41, text: 'Novita', value: 41, color: 'purple' },
+    { key: 40, text: '字节跳动豆包', value: 40, color: 'blue' },
+    { key: 15, text: '百度文心千帆', value: 15, color: 'blue' },
+    { key: 17, text: '阿里通义千问', value: 17, color: 'orange' },
+    { key: 18, text: '讯飞星火认知', value: 18, color: 'blue' },
+    { key: 16, text: '智谱 ChatGLM', value: 16, color: 'violet' },
+    { key: 19, text: '360 智脑', value: 19, color: 'blue' },
+    { key: 25, text: 'Moonshot AI', value: 25, color: 'black' },
+    { key: 23, text: '腾讯混元', value: 23, color: 'teal' },
+    { key: 26, text: '百川大模型', value: 26, color: 'orange' },
+    { key: 27, text: 'MiniMax', value: 27, color: 'red' },
+    { key: 29, text: 'Groq', value: 29, color: 'orange' },
+    { key: 30, text: 'Ollama', value: 30, color: 'black' },
+    { key: 31, text: '零一万物', value: 31, color: 'green' },
+    { key: 32, text: '阶跃星辰', value: 32, color: 'blue' },
+    { key: 34, text: 'Coze', value: 34, color: 'blue' },
+    { key: 35, text: 'Cohere', value: 35, color: 'blue' },
+    { key: 36, text: 'DeepSeek', value: 36, color: 'black' },
+    { key: 37, text: 'Cloudflare', value: 37, color: 'orange' },
+    { key: 38, text: 'DeepL', value: 38, color: 'black' },
+    { key: 39, text: 'together.ai', value: 39, color: 'blue' },
+    { key: 42, text: 'VertexAI', value: 42, color: 'blue' },
+    { key: 43, text: 'Proxy', value: 43, color: 'blue' },
+    { key: 8, text: '自定义渠道', value: 8, color: 'pink' },
+    { key: 22, text: '知识库：FastGPT', value: 22, color: 'blue' },
+    { key: 21, text: '知识库：AI Proxy', value: 21, color: 'purple' },
+    { key: 20, text: '代理：OpenRouter', value: 20, color: 'black' },
+    { key: 2, text: '代理：API2D', value: 2, color: 'blue' },
+    { key: 5, text: '代理：OpenAI-SB', value: 5, color: 'brown' },
+    { key: 7, text: '代理：OhMyGPT', value: 7, color: 'purple' },
+    { key: 10, text: '代理：AI Proxy', value: 10, color: 'purple' },
+    { key: 4, text: '代理：CloseAI', value: 4, color: 'teal' },
+    { key: 6, text: '代理：OpenAI Max', value: 6, color: 'violet' },
+    { key: 9, text: '代理：AI.LS', value: 9, color: 'yellow' },
+    { key: 12, text: '代理：API2GPT', value: 12, color: 'blue' },
+    { key: 13, text: '代理：AIGC2D', value: 13, color: 'purple' }
 ];
diff --git a/web/default/src/pages/Channel/EditChannel.js b/web/default/src/pages/Channel/EditChannel.js
index 64ff22f5..b967907e 100644
--- a/web/default/src/pages/Channel/EditChannel.js
+++ b/web/default/src/pages/Channel/EditChannel.js
@@ -170,7 +170,7 @@ const EditChannel = () => {
       showInfo('请填写渠道名称和渠道密钥！');
       return;
     }
-    if (inputs.models.length === 0) {
+    if (inputs.type !== 43 && inputs.models.length === 0) {
       showInfo('请至少选择一个模型！');
       return;
     }
@@ -370,63 +370,75 @@ const EditChannel = () => {
               </Message>
             )
           }
-          <Form.Field>
-            <Form.Dropdown
-              label='模型'
-              placeholder={'请选择该渠道所支持的模型'}
-              name='models'
-              required
-              fluid
-              multiple
-              search
-              onLabelClick={(e, { value }) => {
-                copy(value).then();
-              }}
-              selection
-              onChange={handleInputChange}
-              value={inputs.models}
-              autoComplete='new-password'
-              options={modelOptions}
-            />
-          </Form.Field>
-          <div style={{ lineHeight: '40px', marginBottom: '12px' }}>
-            <Button type={'button'} onClick={() => {
-              handleInputChange(null, { name: 'models', value: basicModels });
-            }}>填入相关模型</Button>
-            <Button type={'button'} onClick={() => {
-              handleInputChange(null, { name: 'models', value: fullModels });
-            }}>填入所有模型</Button>
-            <Button type={'button'} onClick={() => {
-              handleInputChange(null, { name: 'models', value: [] });
-            }}>清除所有模型</Button>
-            <Input
-              action={
-                <Button type={'button'} onClick={addCustomModel}>填入</Button>
-              }
-              placeholder='输入自定义模型名称'
-              value={customModel}
-              onChange={(e, { value }) => {
-                setCustomModel(value);
-              }}
-              onKeyDown={(e) => {
-                if (e.key === 'Enter') {
-                  addCustomModel();
-                  e.preventDefault();
-                }
-              }}
-            />
-          </div>
-          <Form.Field>
-            <Form.TextArea
-              label='模型重定向'
-              placeholder={`此项可选，用于修改请求体中的模型名称，为一个 JSON 字符串，键为请求中模型名称，值为要替换的模型名称，例如：\n${JSON.stringify(MODEL_MAPPING_EXAMPLE, null, 2)}`}
-              name='model_mapping'
-              onChange={handleInputChange}
-              value={inputs.model_mapping}
-              style={{ minHeight: 150, fontFamily: 'JetBrains Mono, Consolas' }}
-              autoComplete='new-password'
-            />
-          </Form.Field>
+          {
+            inputs.type !== 43 && (
+              <Form.Field>
+                <Form.Dropdown
+                  label='模型'
+                  placeholder={'请选择该渠道所支持的模型'}
+                  name='models'
+                  required
+                  fluid
+                  multiple
+                  search
+                  onLabelClick={(e, { value }) => {
+                    copy(value).then();
+                  }}
+                  selection
+                  onChange={handleInputChange}
+                  value={inputs.models}
+                  autoComplete='new-password'
+                  options={modelOptions}
+                />
+              </Form.Field>
+            )
+          }
+          {
+            inputs.type !== 43 && (
+              <div style={{ lineHeight: '40px', marginBottom: '12px' }}>
+                <Button type={'button'} onClick={() => {
+                  handleInputChange(null, { name: 'models', value: basicModels });
+                }}>填入相关模型</Button>
+                <Button type={'button'} onClick={() => {
+                  handleInputChange(null, { name: 'models', value: fullModels });
+                }}>填入所有模型</Button>
+                <Button type={'button'} onClick={() => {
+                  handleInputChange(null, { name: 'models', value: [] });
+                }}>清除所有模型</Button>
+                <Input
+                  action={
+                    <Button type={'button'} onClick={addCustomModel}>填入</Button>
+                  }
+                  placeholder='输入自定义模型名称'
+                  value={customModel}
+                  onChange={(e, { value }) => {
+                    setCustomModel(value);
+                  }}
+                  onKeyDown={(e) => {
+                    if (e.key === 'Enter') {
+                      addCustomModel();
+                      e.preventDefault();
+                    }
+                  }}
+                />
+              </div>
+            )
+          }
+          {
+          inputs.type !== 43 && (
+              <Form.Field>
+                <Form.TextArea
+                  label='模型重定向'
+                  placeholder={`此项可选，用于修改请求体中的模型名称，为一个 JSON 字符串，键为请求中模型名称，值为要替换的模型名称，例如：\n${JSON.stringify(MODEL_MAPPING_EXAMPLE, null, 2)}`}
+                  name='model_mapping'
+                  onChange={handleInputChange}
+                  value={inputs.model_mapping}
+                  style={{ minHeight: 150, fontFamily: 'JetBrains Mono, Consolas' }}
+                  autoComplete='new-password'
+                />
+              </Form.Field>
+            )
+          }
           {
             inputs.type === 33 && (
               <Form.Field>

From 36039e329e0cef61056b34ab364ee9072ca08501 Mon Sep 17 00:00:00 2001
From: Junyan Qin <1010553892@qq.com>
Date: Tue, 6 Aug 2024 23:33:43 +0800
Subject: [PATCH 07/13] docs: update introduction for QChatGPT (#1707)

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 987fde7d..f8225121 100644
--- a/README.md
+++ b/README.md
@@ -251,9 +251,9 @@ docker run --name chatgpt-web -d -p 3002:3002 -e OPENAI_API_BASE_URL=https://ope
 #### QChatGPT - QQ机器人
 项目主页：https://github.com/RockChinQ/QChatGPT
 
-根据文档完成部署后，在`config.py`设置配置项`openai_config`的`reverse_proxy`为 One API 后端地址，设置`api_key`为 One API 生成的key，并在配置项`completion_api_params`的`model`参数设置为 One API 支持的模型名称。
+根据[文档](https://qchatgpt.rockchin.top)完成部署后，在 `data/provider.json`设置`requester.openai-chat-completions.base-url`为 One API 实例地址，并填写 API Key 到 `keys.openai` 组中，设置 `model` 为要使用的模型名称。
 
-可安装 [Switcher 插件](https://github.com/RockChinQ/Switcher)在运行时切换所使用的模型。
+运行期间可以通过`!model`命令查看、切换可用模型。
 
 ### 部署到第三方平台
 <details>

From 402fcf7f79e22697a00a10f4e9dc176799cb28ba Mon Sep 17 00:00:00 2001
From: Shenghang Tsai <jackalcooper@gmail.com>
Date: Tue, 6 Aug 2024 23:42:25 +0800
Subject: [PATCH 08/13] feat: add SiliconFlow (#1717)

* Add SiliconFlow

* Update README.md

* Update README.md

* Update channel.constants.js

* Update ChannelConstants.js

* Update channel.constants.js

* Update ChannelConstants.js

* Update compatible.go

* Update README.md
---
 README.md                                     |  1 +
 relay/adaptor/openai/compatible.go            |  4 +++
 relay/adaptor/siliconflow/constants.go        | 36 +++++++++++++++++++
 relay/channeltype/define.go                   |  1 +
 relay/channeltype/url.go                      |  1 +
 web/air/src/constants/channel.constants.js    |  1 +
 web/berry/src/constants/ChannelConstants.js   |  6 ++++
 .../src/constants/channel.constants.js        |  1 +
 8 files changed, 51 insertions(+)
 create mode 100644 relay/adaptor/siliconflow/constants.go

diff --git a/README.md b/README.md
index f8225121..5f9947b0 100644
--- a/README.md
+++ b/README.md
@@ -89,6 +89,7 @@ _✨ 通过标准的 OpenAI API 格式访问所有的大模型，开箱即用 
    + [x] [DeepL](https://www.deepl.com/)
    + [x] [together.ai](https://www.together.ai/)
    + [x] [novita.ai](https://www.novita.ai/)
+   + [x] [硅基流动 SiliconCloud](https://siliconflow.cn/siliconcloud)
 2. 支持配置镜像以及众多[第三方代理服务](https://iamazing.cn/page/openai-api-third-party-services)。
 3. 支持通过**负载均衡**的方式访问多个渠道。
 4. 支持 **stream 模式**，可以通过流式传输实现打字机效果。
diff --git a/relay/adaptor/openai/compatible.go b/relay/adaptor/openai/compatible.go
index 3445249c..0512f05c 100644
--- a/relay/adaptor/openai/compatible.go
+++ b/relay/adaptor/openai/compatible.go
@@ -13,6 +13,7 @@ import (
 	"github.com/songquanpeng/one-api/relay/adaptor/novita"
 	"github.com/songquanpeng/one-api/relay/adaptor/stepfun"
 	"github.com/songquanpeng/one-api/relay/adaptor/togetherai"
+	"github.com/songquanpeng/one-api/relay/adaptor/siliconflow"
 	"github.com/songquanpeng/one-api/relay/channeltype"
 )
 
@@ -30,6 +31,7 @@ var CompatibleChannels = []int{
 	channeltype.DeepSeek,
 	channeltype.TogetherAI,
 	channeltype.Novita,
+	channeltype.SiliconFlow,
 }
 
 func GetCompatibleChannelMeta(channelType int) (string, []string) {
@@ -60,6 +62,8 @@ func GetCompatibleChannelMeta(channelType int) (string, []string) {
 		return "doubao", doubao.ModelList
 	case channeltype.Novita:
 		return "novita", novita.ModelList
+	case channeltype.SiliconFlow:
+		return "siliconflow", siliconflow.ModelList
 	default:
 		return "openai", ModelList
 	}
diff --git a/relay/adaptor/siliconflow/constants.go b/relay/adaptor/siliconflow/constants.go
new file mode 100644
index 00000000..0bf54761
--- /dev/null
+++ b/relay/adaptor/siliconflow/constants.go
@@ -0,0 +1,36 @@
+package siliconflow
+
+// https://docs.siliconflow.cn/docs/getting-started
+
+var ModelList = []string{
+	"deepseek-ai/deepseek-llm-67b-chat",
+	"Qwen/Qwen1.5-14B-Chat",
+	"Qwen/Qwen1.5-7B-Chat",
+	"Qwen/Qwen1.5-110B-Chat",
+	"Qwen/Qwen1.5-32B-Chat",
+	"01-ai/Yi-1.5-6B-Chat",
+	"01-ai/Yi-1.5-9B-Chat-16K",
+	"01-ai/Yi-1.5-34B-Chat-16K",
+	"THUDM/chatglm3-6b",
+	"deepseek-ai/DeepSeek-V2-Chat",
+	"THUDM/glm-4-9b-chat",
+	"Qwen/Qwen2-72B-Instruct",
+	"Qwen/Qwen2-7B-Instruct",
+	"Qwen/Qwen2-57B-A14B-Instruct",
+	"deepseek-ai/DeepSeek-Coder-V2-Instruct",
+	"Qwen/Qwen2-1.5B-Instruct",
+	"internlm/internlm2_5-7b-chat",
+	"BAAI/bge-large-en-v1.5",
+	"BAAI/bge-large-zh-v1.5",
+	"Pro/Qwen/Qwen2-7B-Instruct",
+	"Pro/Qwen/Qwen2-1.5B-Instruct",
+	"Pro/Qwen/Qwen1.5-7B-Chat",
+	"Pro/THUDM/glm-4-9b-chat",
+	"Pro/THUDM/chatglm3-6b",
+	"Pro/01-ai/Yi-1.5-9B-Chat-16K",
+	"Pro/01-ai/Yi-1.5-6B-Chat",
+	"Pro/google/gemma-2-9b-it",
+	"Pro/internlm/internlm2_5-7b-chat",
+	"Pro/meta-llama/Meta-Llama-3-8B-Instruct",
+	"Pro/mistralai/Mistral-7B-Instruct-v0.2",
+}
diff --git a/relay/channeltype/define.go b/relay/channeltype/define.go
index e3b0c98e..a261cff8 100644
--- a/relay/channeltype/define.go
+++ b/relay/channeltype/define.go
@@ -45,5 +45,6 @@ const (
 	Novita
 	VertextAI
 	Proxy
+	SiliconFlow
 	Dummy
 )
diff --git a/relay/channeltype/url.go b/relay/channeltype/url.go
index b5026713..8727faea 100644
--- a/relay/channeltype/url.go
+++ b/relay/channeltype/url.go
@@ -45,6 +45,7 @@ var ChannelBaseURLs = []string{
 	"https://api.novita.ai/v3/openai",           // 41
 	"",                                          // 42
 	"",                                          // 43
+	"https://api.siliconflow.cn",                 // 44
 }
 
 func init() {
diff --git a/web/air/src/constants/channel.constants.js b/web/air/src/constants/channel.constants.js
index 18293f5f..04fe94f1 100644
--- a/web/air/src/constants/channel.constants.js
+++ b/web/air/src/constants/channel.constants.js
@@ -29,6 +29,7 @@ export const CHANNEL_OPTIONS = [
   { key: 39, text: 'together.ai', value: 39, color: 'blue' },
   { key: 42, text: 'VertexAI', value: 42, color: 'blue' },
   { key: 43, text: 'Proxy', value: 43, color: 'blue' },
+  { key: 44, text: 'SiliconFlow', value: 44, color: 'blue' },
   { key: 8, text: '自定义渠道', value: 8, color: 'pink' },
   { key: 22, text: '知识库：FastGPT', value: 22, color: 'blue' },
   { key: 21, text: '知识库：AI Proxy', value: 21, color: 'purple' },
diff --git a/web/berry/src/constants/ChannelConstants.js b/web/berry/src/constants/ChannelConstants.js
index acfda37b..98ea7ca5 100644
--- a/web/berry/src/constants/ChannelConstants.js
+++ b/web/berry/src/constants/ChannelConstants.js
@@ -173,6 +173,12 @@ export const CHANNEL_OPTIONS = {
     value: 43,
     color: 'primary'
   },
+  44: {
+    key: 44,
+    text: 'SiliconFlow',
+    value: 44,
+    color: 'primary'
+  },
   41: {
     key: 41,
     text: 'Novita',
diff --git a/web/default/src/constants/channel.constants.js b/web/default/src/constants/channel.constants.js
index b2a71016..04e361a1 100644
--- a/web/default/src/constants/channel.constants.js
+++ b/web/default/src/constants/channel.constants.js
@@ -29,6 +29,7 @@ export const CHANNEL_OPTIONS = [
     { key: 39, text: 'together.ai', value: 39, color: 'blue' },
     { key: 42, text: 'VertexAI', value: 42, color: 'blue' },
     { key: 43, text: 'Proxy', value: 43, color: 'blue' },
+    { key: 44, text: 'SiliconFlow', value: 44, color: 'blue' },
     { key: 8, text: '自定义渠道', value: 8, color: 'pink' },
     { key: 22, text: '知识库：FastGPT', value: 22, color: 'blue' },
     { key: 21, text: '知识库：AI Proxy', value: 21, color: 'purple' },

From e7e99e558afab7423f70f6ebd6a9949fcb616dbd Mon Sep 17 00:00:00 2001
From: SLKun <summerslyb@gmail.com>
Date: Tue, 6 Aug 2024 23:43:20 +0800
Subject: [PATCH 09/13] feat: update Ollama embedding API to latest version
 with multi-text embedding support (#1715)

---
 relay/adaptor/ollama/adaptor.go |  2 +-
 relay/adaptor/ollama/main.go    | 25 +++++++++++++++++--------
 relay/adaptor/ollama/model.go   | 12 ++++++++----
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/relay/adaptor/ollama/adaptor.go b/relay/adaptor/ollama/adaptor.go
index 66702c5d..ad1f8983 100644
--- a/relay/adaptor/ollama/adaptor.go
+++ b/relay/adaptor/ollama/adaptor.go
@@ -24,7 +24,7 @@ func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
 	// https://github.com/ollama/ollama/blob/main/docs/api.md
 	fullRequestURL := fmt.Sprintf("%s/api/chat", meta.BaseURL)
 	if meta.Mode == relaymode.Embeddings {
-		fullRequestURL = fmt.Sprintf("%s/api/embeddings", meta.BaseURL)
+		fullRequestURL = fmt.Sprintf("%s/api/embed", meta.BaseURL)
 	}
 	return fullRequestURL, nil
 }
diff --git a/relay/adaptor/ollama/main.go b/relay/adaptor/ollama/main.go
index 936a7e14..6a1d334d 100644
--- a/relay/adaptor/ollama/main.go
+++ b/relay/adaptor/ollama/main.go
@@ -157,8 +157,15 @@ func StreamHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusC
 
 func ConvertEmbeddingRequest(request model.GeneralOpenAIRequest) *EmbeddingRequest {
 	return &EmbeddingRequest{
-		Model:  request.Model,
-		Prompt: strings.Join(request.ParseInput(), " "),
+		Model: request.Model,
+		Input: request.ParseInput(),
+		Options: &Options{
+			Seed:             int(request.Seed),
+			Temperature:      request.Temperature,
+			TopP:             request.TopP,
+			FrequencyPenalty: request.FrequencyPenalty,
+			PresencePenalty:  request.PresencePenalty,
+		},
 	}
 }
 
@@ -201,15 +208,17 @@ func embeddingResponseOllama2OpenAI(response *EmbeddingResponse) *openai.Embeddi
 	openAIEmbeddingResponse := openai.EmbeddingResponse{
 		Object: "list",
 		Data:   make([]openai.EmbeddingResponseItem, 0, 1),
-		Model:  "text-embedding-v1",
+		Model:  response.Model,
 		Usage:  model.Usage{TotalTokens: 0},
 	}
 
-	openAIEmbeddingResponse.Data = append(openAIEmbeddingResponse.Data, openai.EmbeddingResponseItem{
-		Object:    `embedding`,
-		Index:     0,
-		Embedding: response.Embedding,
-	})
+	for i, embedding := range response.Embeddings {
+		openAIEmbeddingResponse.Data = append(openAIEmbeddingResponse.Data, openai.EmbeddingResponseItem{
+			Object:    `embedding`,
+			Index:     i,
+			Embedding: embedding,
+		})
+	}
 	return &openAIEmbeddingResponse
 }
 
diff --git a/relay/adaptor/ollama/model.go b/relay/adaptor/ollama/model.go
index 8baf56a0..29430e1c 100644
--- a/relay/adaptor/ollama/model.go
+++ b/relay/adaptor/ollama/model.go
@@ -37,11 +37,15 @@ type ChatResponse struct {
 }
 
 type EmbeddingRequest struct {
-	Model  string `json:"model"`
-	Prompt string `json:"prompt"`
+	Model string   `json:"model"`
+	Input []string `json:"input"`
+	// Truncate  bool     `json:"truncate,omitempty"`
+	Options *Options `json:"options,omitempty"`
+	// KeepAlive string   `json:"keep_alive,omitempty"`
 }
 
 type EmbeddingResponse struct {
-	Error     string    `json:"error,omitempty"`
-	Embedding []float64 `json:"embedding,omitempty"`
+	Error      string      `json:"error,omitempty"`
+	Model      string      `json:"model"`
+	Embeddings [][]float64 `json:"embeddings"`
 }

From b4bfa418a8cfde3fe0dec14e90e4d43feed3cfe1 Mon Sep 17 00:00:00 2001
From: longkeyy <longkeyy@gmail.com>
Date: Tue, 6 Aug 2024 23:43:33 +0800
Subject: [PATCH 10/13] feat: update gemini model and price (#1705)

---
 relay/adaptor/gemini/constants.go |  3 +--
 relay/adaptor/groq/constants.go   |  9 +++++++--
 relay/billing/ratio/model.go      | 27 +++++++++++++++------------
 3 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/relay/adaptor/gemini/constants.go b/relay/adaptor/gemini/constants.go
index f65e6bfc..b0f84dfc 100644
--- a/relay/adaptor/gemini/constants.go
+++ b/relay/adaptor/gemini/constants.go
@@ -3,6 +3,5 @@ package gemini
 // https://ai.google.dev/models/gemini
 
 var ModelList = []string{
-	"gemini-pro", "gemini-1.0-pro-001", "gemini-1.5-pro",
-	"gemini-pro-vision", "gemini-1.0-pro-vision-001", "embedding-001", "text-embedding-004",
+	"gemini-pro", "gemini-1.0-pro", "gemini-1.5-flash", "gemini-1.5-pro", "text-embedding-004", "aqa",
 }
diff --git a/relay/adaptor/groq/constants.go b/relay/adaptor/groq/constants.go
index 1aa2574b..559851ee 100644
--- a/relay/adaptor/groq/constants.go
+++ b/relay/adaptor/groq/constants.go
@@ -4,9 +4,14 @@ package groq
 
 var ModelList = []string{
 	"gemma-7b-it",
-	"llama2-7b-2048",
-	"llama2-70b-4096",
 	"mixtral-8x7b-32768",
 	"llama3-8b-8192",
 	"llama3-70b-8192",
+	"gemma2-9b-it",
+	"llama-3.1-405b-reasoning",
+	"llama-3.1-70b-versatile",
+	"llama-3.1-8b-instant",
+	"llama3-groq-70b-8192-tool-use-preview",
+	"llama3-groq-8b-8192-tool-use-preview",
+	"whisper-large-v3",
 }
diff --git a/relay/billing/ratio/model.go b/relay/billing/ratio/model.go
index 799fa90f..7bc6cd54 100644
--- a/relay/billing/ratio/model.go
+++ b/relay/billing/ratio/model.go
@@ -98,12 +98,11 @@ var ModelRatio = map[string]float64{
 	"bge-large-en":       0.002 * RMB,
 	"tao-8k":             0.002 * RMB,
 	// https://ai.google.dev/pricing
-	"PaLM-2":                    1,
-	"gemini-pro":                1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-pro-vision":         1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
-	"gemini-1.0-pro-vision-001": 1,
-	"gemini-1.0-pro-001":        1,
-	"gemini-1.5-pro":            1,
+	"gemini-pro":       1, // $0.00025 / 1k characters -> $0.001 / 1k tokens
+	"gemini-1.0-pro":   1,
+	"gemini-1.5-flash": 1,
+	"gemini-1.5-pro":   1,
+	"aqa":              1,
 	// https://open.bigmodel.cn/pricing
 	"glm-4":         0.1 * RMB,
 	"glm-4v":        0.1 * RMB,
@@ -158,12 +157,16 @@ var ModelRatio = map[string]float64{
 	"mistral-large-latest":  8.0 / 1000 * USD,
 	"mistral-embed":         0.1 / 1000 * USD,
 	// https://wow.groq.com/#:~:text=inquiries%C2%A0here.-,Model,-Current%20Speed
-	"llama3-70b-8192":    0.59 / 1000 * USD,
-	"mixtral-8x7b-32768": 0.27 / 1000 * USD,
-	"llama3-8b-8192":     0.05 / 1000 * USD,
-	"gemma-7b-it":        0.1 / 1000 * USD,
-	"llama2-70b-4096":    0.64 / 1000 * USD,
-	"llama2-7b-2048":     0.1 / 1000 * USD,
+	"gemma-7b-it":                           0.07 / 1000000 * USD,
+	"mixtral-8x7b-32768":                    0.24 / 1000000 * USD,
+	"llama3-8b-8192":                        0.05 / 1000000 * USD,
+	"llama3-70b-8192":                       0.59 / 1000000 * USD,
+	"gemma2-9b-it":                          0.20 / 1000000 * USD,
+	"llama-3.1-405b-reasoning":              0.89 / 1000000 * USD,
+	"llama-3.1-70b-versatile":               0.59 / 1000000 * USD,
+	"llama-3.1-8b-instant":                  0.05 / 1000000 * USD,
+	"llama3-groq-70b-8192-tool-use-preview": 0.89 / 1000000 * USD,
+	"llama3-groq-8b-8192-tool-use-preview":  0.19 / 1000000 * USD,
 	// https://platform.lingyiwanwu.com/docs#-计费单元
 	"yi-34b-chat-0205": 2.5 / 1000 * RMB,
 	"yi-34b-chat-200k": 12.0 / 1000 * RMB,

From 04bb3ef3923ba4b0931f0940e65f06b29cd53df8 Mon Sep 17 00:00:00 2001
From: MotorBottle <71703952+MotorBottle@users.noreply.github.com>
Date: Tue, 6 Aug 2024 23:44:37 +0800
Subject: [PATCH 11/13] feat: add Max Tokens and Context Window Setting Options
 for Ollama Channel (#1694)

* Update main.go with max_tokens param

* Update model.go with max_tokens param

* Update model.go

* Update main.go

* Update main.go

* Adds num_ctx param for Ollama Channel

* Added num_ctx param for ollama adapter

* Added num_ctx param for ollama adapter

* Improved data process logic
---
 relay/adaptor/ollama/main.go  | 8 ++++++--
 relay/adaptor/ollama/model.go | 2 ++
 relay/model/general.go        | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/relay/adaptor/ollama/main.go b/relay/adaptor/ollama/main.go
index 6a1d334d..43317ff6 100644
--- a/relay/adaptor/ollama/main.go
+++ b/relay/adaptor/ollama/main.go
@@ -31,6 +31,8 @@ func ConvertRequest(request model.GeneralOpenAIRequest) *ChatRequest {
 			TopP:             request.TopP,
 			FrequencyPenalty: request.FrequencyPenalty,
 			PresencePenalty:  request.PresencePenalty,
+			NumPredict:  	  request.MaxTokens,
+			NumCtx:  	  request.NumCtx,
 		},
 		Stream: request.Stream,
 	}
@@ -118,8 +120,10 @@ func StreamHandler(c *gin.Context, resp *http.Response) (*model.ErrorWithStatusC
 	common.SetEventStreamHeaders(c)
 
 	for scanner.Scan() {
-		data := strings.TrimPrefix(scanner.Text(), "}")
-		data = data + "}"
+		data := scanner.Text()
+		if strings.HasPrefix(data, "}") {
+		    data = strings.TrimPrefix(data, "}") + "}"
+		}
 
 		var ollamaResponse ChatResponse
 		err := json.Unmarshal([]byte(data), &ollamaResponse)
diff --git a/relay/adaptor/ollama/model.go b/relay/adaptor/ollama/model.go
index 29430e1c..7039984f 100644
--- a/relay/adaptor/ollama/model.go
+++ b/relay/adaptor/ollama/model.go
@@ -7,6 +7,8 @@ type Options struct {
 	TopP             float64 `json:"top_p,omitempty"`
 	FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
 	PresencePenalty  float64 `json:"presence_penalty,omitempty"`
+	NumPredict  	 int 	 `json:"num_predict,omitempty"`
+	NumCtx  	 int 	 `json:"num_ctx,omitempty"`
 }
 
 type Message struct {
diff --git a/relay/model/general.go b/relay/model/general.go
index 229a61c1..c34c1c2d 100644
--- a/relay/model/general.go
+++ b/relay/model/general.go
@@ -29,6 +29,7 @@ type GeneralOpenAIRequest struct {
 	Dimensions       int             `json:"dimensions,omitempty"`
 	Instruction      string          `json:"instruction,omitempty"`
 	Size             string          `json:"size,omitempty"`
+	NumCtx           int         	 `json:"num_ctx,omitempty"`
 }
 
 func (r GeneralOpenAIRequest) ParseInput() []string {

From 2af6f6a166604f346a1a326fb481cf1741eadd31 Mon Sep 17 00:00:00 2001
From: TAKO <20227709+HynoR@users.noreply.github.com>
Date: Tue, 6 Aug 2024 23:45:15 +0800
Subject: [PATCH 12/13] feat: add Cloudflare New Free Model Llama 3.1 8b
 (#1703)

---
 relay/adaptor/cloudflare/constant.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/relay/adaptor/cloudflare/constant.go b/relay/adaptor/cloudflare/constant.go
index dee79a76..54052aa6 100644
--- a/relay/adaptor/cloudflare/constant.go
+++ b/relay/adaptor/cloudflare/constant.go
@@ -1,6 +1,7 @@
 package cloudflare
 
 var ModelList = []string{
+	"@cf/meta/llama-3.1-8b-instruct",
 	"@cf/meta/llama-2-7b-chat-fp16",
 	"@cf/meta/llama-2-7b-chat-int8",
 	"@cf/mistral/mistral-7b-instruct-v0.1",

From f9774698e9a59823a65c0ede475e28c29638f396 Mon Sep 17 00:00:00 2001
From: longkeyy <longkeyy@gmail.com>
Date: Tue, 6 Aug 2024 23:51:08 +0800
Subject: [PATCH 13/13] feat: synchronize with the official release of the groq
 model (#1677)

update groq add gemma2-9b-it llama3.1 family fixup price k/token -> m/token