From 61f4cb2f6568be5ac065e6d776165771ea68bf31 Mon Sep 17 00:00:00 2001 From: RockChinQ <1010553892@qq.com> Date: Tue, 30 Jan 2024 16:58:11 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E5=AE=8C=E5=96=84=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pkg/provider/requester/modelmgr.py | 111 ++++++++++++++++++----------- 1 file changed, 69 insertions(+), 42 deletions(-) diff --git a/pkg/provider/requester/modelmgr.py b/pkg/provider/requester/modelmgr.py index 6510c1de..ab5ff9e4 100644 --- a/pkg/provider/requester/modelmgr.py +++ b/pkg/provider/requester/modelmgr.py @@ -47,95 +47,113 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=16385 ), entities.LLMModelInfo( name="gpt-3.5-turbo-16k", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=16385 ), entities.LLMModelInfo( name="gpt-3.5-turbo-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=4096 ), entities.LLMModelInfo( name="gpt-3.5-turbo-16k-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=16385 ), entities.LLMModelInfo( name="gpt-3.5-turbo-0301", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=4096 + ) + ] + + self.model_list.extend(model_list) + + gpt4_model_list = [ + entities.LLMModelInfo( + name="gpt-4-0125-preview", + token_mgr=openai_token_mgr, + requester=openai_chat_completion, + tool_call_supported=True, + tokenizer=tiktoken_tokenizer, + max_tokens=128000 + ), + entities.LLMModelInfo( + name="gpt-4-turbo-preview", + token_mgr=openai_token_mgr, + requester=openai_chat_completion, + tool_call_supported=True, + tokenizer=tiktoken_tokenizer, + max_tokens=128000 ), entities.LLMModelInfo( name="gpt-4-1106-preview", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=128000 ), entities.LLMModelInfo( name="gpt-4-vision-preview", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=128000 ), entities.LLMModelInfo( name="gpt-4", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer - ), - entities.LLMModelInfo( - name="gpt-4-32k", - token_mgr=openai_token_mgr, - requester=openai_chat_completion, - tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=8192 ), entities.LLMModelInfo( name="gpt-4-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=8192 + ), + entities.LLMModelInfo( + name="gpt-4-32k", + token_mgr=openai_token_mgr, + requester=openai_chat_completion, + tool_call_supported=True, + tokenizer=tiktoken_tokenizer, + max_tokens=32768 ), entities.LLMModelInfo( name="gpt-4-32k-0613", token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=True, - tokenizer=tiktoken_tokenizer - ), - entities.LLMModelInfo( - name="gpt-4-0314", - token_mgr=openai_token_mgr, - requester=openai_chat_completion, - tool_call_supported=True, - tokenizer=tiktoken_tokenizer - ), - entities.LLMModelInfo( - name="gpt-4-32k-0314", - token_mgr=openai_token_mgr, - requester=openai_chat_completion, - tool_call_supported=True, - tokenizer=tiktoken_tokenizer - ), + tokenizer=tiktoken_tokenizer, + max_tokens=32768 + ) ] - self.model_list.extend(model_list) + self.model_list.extend(gpt4_model_list) one_api_model_list = [ entities.LLMModelInfo( @@ -144,7 +162,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=8192 ), entities.LLMModelInfo( name="OneAPI/chatglm_pro", @@ -152,7 +171,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=128000 ), entities.LLMModelInfo( name="OneAPI/chatglm_std", @@ -160,7 +180,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=128000 ), entities.LLMModelInfo( name="OneAPI/chatglm_lite", @@ -168,7 +189,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=128000 ), entities.LLMModelInfo( name="OneAPI/qwen-v1", @@ -176,7 +198,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=6000 ), entities.LLMModelInfo( name="OneAPI/qwen-plus-v1", @@ -184,7 +207,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=30000 ), entities.LLMModelInfo( name="OneAPI/ERNIE-Bot", @@ -192,7 +216,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=2000 ), entities.LLMModelInfo( name="OneAPI/ERNIE-Bot-turbo", @@ -200,7 +225,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=7000 ), entities.LLMModelInfo( name="OneAPI/gemini-pro", @@ -208,7 +234,8 @@ class ModelManager: token_mgr=openai_token_mgr, requester=openai_chat_completion, tool_call_supported=False, - tokenizer=tiktoken_tokenizer + tokenizer=tiktoken_tokenizer, + max_tokens=30720 ), ]