mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-11-16 14:03:43 +08:00
fix: #352
This commit is contained in:
@@ -31,13 +31,13 @@ async function handle(req: NextRequest) {
|
||||
// const pineconeIndex = pinecone.Index(serverConfig.pineconeIndex!);
|
||||
const apiKey = getOpenAIApiKey(token);
|
||||
const baseUrl = getOpenAIBaseUrl(reqBody.baseUrl);
|
||||
const embeddings = new OpenAIEmbeddings(
|
||||
{
|
||||
modelName: serverConfig.ragEmbeddingModel ?? "text-embedding-3-large",
|
||||
openAIApiKey: apiKey,
|
||||
const embeddings = new OpenAIEmbeddings({
|
||||
modelName: serverConfig.ragEmbeddingModel ?? "text-embedding-3-large",
|
||||
openAIApiKey: apiKey,
|
||||
configuration: {
|
||||
baseURL: baseUrl,
|
||||
},
|
||||
{ basePath: baseUrl },
|
||||
);
|
||||
});
|
||||
// const vectorStore = await PineconeStore.fromExistingIndex(embeddings, {
|
||||
// pineconeIndex,
|
||||
// });
|
||||
|
||||
@@ -94,13 +94,11 @@ async function handle(req: NextRequest) {
|
||||
baseUrl: process.env.OLLAMA_BASE_URL,
|
||||
});
|
||||
} else {
|
||||
embeddings = new OpenAIEmbeddings(
|
||||
{
|
||||
modelName: serverConfig.ragEmbeddingModel,
|
||||
openAIApiKey: apiKey,
|
||||
},
|
||||
{ basePath: baseUrl },
|
||||
);
|
||||
embeddings = new OpenAIEmbeddings({
|
||||
modelName: serverConfig.ragEmbeddingModel,
|
||||
openAIApiKey: apiKey,
|
||||
configuration: { baseURL: baseUrl },
|
||||
});
|
||||
}
|
||||
// https://js.langchain.com/docs/integrations/vectorstores/pinecone
|
||||
// https://js.langchain.com/docs/integrations/vectorstores/qdrant
|
||||
@@ -181,12 +179,10 @@ async function handle(req: NextRequest) {
|
||||
}
|
||||
|
||||
function bufferToBlob(buffer: Buffer, mimeType?: string): Blob {
|
||||
const arrayBuffer: ArrayBuffer = buffer.buffer.slice(
|
||||
buffer.byteOffset,
|
||||
buffer.byteOffset + buffer.byteLength,
|
||||
);
|
||||
const arrayBuffer = new Uint8Array(buffer).buffer;
|
||||
return new Blob([arrayBuffer], { type: mimeType || "" });
|
||||
}
|
||||
|
||||
function getOpenAIApiKey(token: string) {
|
||||
const serverConfig = getServerSideConfig();
|
||||
const isApiKey = !token.startsWith(ACCESS_CODE_PREFIX);
|
||||
|
||||
@@ -39,7 +39,11 @@ import {
|
||||
ChatPromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
} from "@langchain/core/prompts";
|
||||
import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
|
||||
import {
|
||||
AzureChatOpenAI,
|
||||
ChatOpenAI,
|
||||
OpenAIEmbeddings,
|
||||
} from "@langchain/openai";
|
||||
import { ChatAnthropic } from "@langchain/anthropic";
|
||||
import {
|
||||
BaseMessage,
|
||||
@@ -74,6 +78,8 @@ export interface RequestBody {
|
||||
returnIntermediateSteps: boolean;
|
||||
useTools: (undefined | string)[];
|
||||
provider: ServiceProvider;
|
||||
max_tokens?: number;
|
||||
max_completion_tokens?: number;
|
||||
}
|
||||
|
||||
export class ResponseBody {
|
||||
@@ -254,14 +260,14 @@ export class AgentApi {
|
||||
},
|
||||
});
|
||||
}
|
||||
return new ChatOpenAI(
|
||||
{
|
||||
temperature: 0,
|
||||
modelName: reqBody.model,
|
||||
openAIApiKey: apiKey,
|
||||
return new ChatOpenAI({
|
||||
temperature: 0,
|
||||
modelName: reqBody.model,
|
||||
openAIApiKey: apiKey,
|
||||
configuration: {
|
||||
baseURL: baseUrl,
|
||||
},
|
||||
{ basePath: baseUrl },
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
getToolEmbeddings(reqBody: RequestBody, apiKey: string, baseUrl: string) {
|
||||
@@ -275,19 +281,19 @@ export class AgentApi {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return new OpenAIEmbeddings(
|
||||
{
|
||||
openAIApiKey: apiKey,
|
||||
return new OpenAIEmbeddings({
|
||||
openAIApiKey: apiKey,
|
||||
configuration: {
|
||||
baseURL: baseUrl,
|
||||
},
|
||||
{ basePath: baseUrl },
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
getLLM(reqBody: RequestBody, apiKey: string, baseUrl: string) {
|
||||
const serverConfig = getServerSideConfig();
|
||||
if (reqBody.isAzure || serverConfig.isAzure) {
|
||||
console.log("[use Azure ChatOpenAI]");
|
||||
return new ChatOpenAI({
|
||||
return new AzureChatOpenAI({
|
||||
temperature: reqBody.temperature,
|
||||
streaming: reqBody.stream,
|
||||
topP: reqBody.top_p,
|
||||
@@ -299,22 +305,26 @@ export class AgentApi {
|
||||
: serverConfig.azureApiVersion,
|
||||
azureOpenAIApiDeploymentName: reqBody.model,
|
||||
azureOpenAIBasePath: baseUrl,
|
||||
maxTokens: reqBody.max_tokens,
|
||||
maxCompletionTokens: reqBody.max_completion_tokens,
|
||||
});
|
||||
}
|
||||
if (reqBody.provider === ServiceProvider.OpenAI) {
|
||||
console.log("[use ChatOpenAI]");
|
||||
return new ChatOpenAI(
|
||||
{
|
||||
modelName: reqBody.model,
|
||||
openAIApiKey: apiKey,
|
||||
temperature: reqBody.temperature,
|
||||
streaming: reqBody.stream,
|
||||
topP: reqBody.top_p,
|
||||
presencePenalty: reqBody.presence_penalty,
|
||||
frequencyPenalty: reqBody.frequency_penalty,
|
||||
return new ChatOpenAI({
|
||||
modelName: reqBody.model,
|
||||
openAIApiKey: apiKey,
|
||||
temperature: reqBody.temperature,
|
||||
streaming: reqBody.stream,
|
||||
topP: reqBody.top_p,
|
||||
presencePenalty: reqBody.presence_penalty,
|
||||
frequencyPenalty: reqBody.frequency_penalty,
|
||||
maxTokens: reqBody.max_tokens,
|
||||
maxCompletionTokens: reqBody.max_completion_tokens,
|
||||
configuration: {
|
||||
baseURL: baseUrl,
|
||||
},
|
||||
{ basePath: baseUrl },
|
||||
);
|
||||
});
|
||||
}
|
||||
if (reqBody.provider === ServiceProvider.Anthropic) {
|
||||
console.log("[use ChatAnthropic]");
|
||||
@@ -439,11 +449,16 @@ export class AgentApi {
|
||||
});
|
||||
|
||||
const pastMessages = new Array();
|
||||
|
||||
const isO1OrO3 =
|
||||
reqBody.model.startsWith("o1") || reqBody.model.startsWith("o3");
|
||||
reqBody.messages
|
||||
.slice(0, reqBody.messages.length - 1)
|
||||
.forEach((message) => {
|
||||
if (message.role === "system" && typeof message.content === "string")
|
||||
if (
|
||||
!isO1OrO3 &&
|
||||
message.role === "system" &&
|
||||
typeof message.content === "string"
|
||||
)
|
||||
pastMessages.push(new SystemMessage(message.content));
|
||||
if (message.role === "user")
|
||||
typeof message.content === "string"
|
||||
@@ -458,6 +473,15 @@ export class AgentApi {
|
||||
pastMessages.push(new AIMessage(message.content));
|
||||
});
|
||||
|
||||
reqBody.temperature = !isO1OrO3 ? reqBody.temperature : 1;
|
||||
reqBody.presence_penalty = !isO1OrO3 ? reqBody.presence_penalty : 0;
|
||||
reqBody.frequency_penalty = !isO1OrO3 ? reqBody.frequency_penalty : 0;
|
||||
reqBody.top_p = !isO1OrO3 ? reqBody.top_p : 1;
|
||||
|
||||
if (isO1OrO3) {
|
||||
reqBody.max_completion_tokens = reqBody.max_tokens;
|
||||
}
|
||||
|
||||
let llm = this.getLLM(reqBody, apiKey, baseUrl);
|
||||
|
||||
const MEMORY_KEY = "chat_history";
|
||||
|
||||
@@ -49,14 +49,11 @@ async function handle(req: NextRequest) {
|
||||
baseUrl: process.env.OLLAMA_BASE_URL,
|
||||
});
|
||||
} else {
|
||||
ragEmbeddings = new OpenAIEmbeddings(
|
||||
{
|
||||
modelName:
|
||||
process.env.RAG_EMBEDDING_MODEL ?? "text-embedding-3-large",
|
||||
openAIApiKey: apiKey,
|
||||
},
|
||||
{ basePath: baseUrl },
|
||||
);
|
||||
ragEmbeddings = new OpenAIEmbeddings({
|
||||
modelName: process.env.RAG_EMBEDDING_MODEL ?? "text-embedding-3-large",
|
||||
openAIApiKey: apiKey,
|
||||
configuration: { baseURL: baseUrl },
|
||||
});
|
||||
}
|
||||
|
||||
var dalleCallback = async (data: string) => {
|
||||
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
preProcessImageAndWebReferenceContent,
|
||||
preProcessImageContent,
|
||||
stream,
|
||||
streamWithThink,
|
||||
} from "@/app/utils/chat";
|
||||
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
|
||||
import { DalleSize, DalleQuality, DalleStyle } from "@/app/typing";
|
||||
@@ -49,6 +50,7 @@ import {
|
||||
isVisionModel,
|
||||
isDalle3 as _isDalle3,
|
||||
getWebReferenceMessageTextContent,
|
||||
getTimeoutMSByModel,
|
||||
} from "@/app/utils";
|
||||
|
||||
export interface OpenAIListModelResponse {
|
||||
@@ -73,6 +75,7 @@ export interface RequestPayload {
|
||||
frequency_penalty: number;
|
||||
top_p: number;
|
||||
max_tokens?: number;
|
||||
max_completion_tokens?: number;
|
||||
}
|
||||
|
||||
export interface DalleRequestPayload {
|
||||
@@ -223,7 +226,9 @@ export class ChatGPTApi implements LLMApi {
|
||||
let requestPayload: RequestPayload | DalleRequestPayload;
|
||||
|
||||
const isDalle3 = _isDalle3(options.config.model);
|
||||
const isO1 = options.config.model.startsWith("o1");
|
||||
const isO1OrO3 =
|
||||
options.config.model.startsWith("o1") ||
|
||||
options.config.model.startsWith("o3");
|
||||
if (isDalle3) {
|
||||
const prompt = getMessageTextContent(
|
||||
options.messages.slice(-1)?.pop() as any,
|
||||
@@ -245,23 +250,28 @@ export class ChatGPTApi implements LLMApi {
|
||||
const content = visionModel
|
||||
? await preProcessImageAndWebReferenceContent(v)
|
||||
: getWebReferenceMessageTextContent(v);
|
||||
if (!(isO1 && v.role === "system"))
|
||||
if (!(isO1OrO3 && v.role === "system"))
|
||||
messages.push({ role: v.role, content });
|
||||
}
|
||||
|
||||
// O1 not support image, tools (plugin in ChatGPTNextWeb) and system, stream, logprobs, temperature, top_p, n, presence_penalty, frequency_penalty yet.
|
||||
requestPayload = {
|
||||
messages,
|
||||
stream: !isO1 ? options.config.stream : false,
|
||||
stream: options.config.stream,
|
||||
model: modelConfig.model,
|
||||
temperature: !isO1 ? modelConfig.temperature : 1,
|
||||
presence_penalty: !isO1 ? modelConfig.presence_penalty : 0,
|
||||
frequency_penalty: !isO1 ? modelConfig.frequency_penalty : 0,
|
||||
top_p: !isO1 ? modelConfig.top_p : 1,
|
||||
temperature: !isO1OrO3 ? modelConfig.temperature : 1,
|
||||
presence_penalty: !isO1OrO3 ? modelConfig.presence_penalty : 0,
|
||||
frequency_penalty: !isO1OrO3 ? modelConfig.frequency_penalty : 0,
|
||||
top_p: !isO1OrO3 ? modelConfig.top_p : 1,
|
||||
// max_tokens: Math.max(modelConfig.max_tokens, 1024),
|
||||
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
|
||||
};
|
||||
|
||||
// O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
|
||||
if (isO1OrO3) {
|
||||
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
|
||||
}
|
||||
|
||||
// add max_tokens to vision model
|
||||
if (visionModel) {
|
||||
requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000);
|
||||
@@ -270,7 +280,7 @@ export class ChatGPTApi implements LLMApi {
|
||||
|
||||
console.log("[Request] openai payload: ", requestPayload);
|
||||
|
||||
const shouldStream = !isDalle3 && !!options.config.stream && !isO1;
|
||||
const shouldStream = !isDalle3 && !!options.config.stream;
|
||||
const controller = new AbortController();
|
||||
options.onController?.(controller);
|
||||
|
||||
@@ -307,15 +317,16 @@ export class ChatGPTApi implements LLMApi {
|
||||
);
|
||||
}
|
||||
if (shouldStream) {
|
||||
let index = -1;
|
||||
// const [tools, funcs] = usePluginStore
|
||||
// .getState()
|
||||
// .getAsTools(
|
||||
// useChatStore.getState().currentSession().mask?.plugin || [],
|
||||
// );
|
||||
// console.log("getAsTools", tools, funcs);
|
||||
const tools = null;
|
||||
const funcs: Record<string, Function> = {};
|
||||
stream(
|
||||
// console.log("getAsTools", tools, funcs);
|
||||
streamWithThink(
|
||||
chatPath,
|
||||
requestPayload,
|
||||
getHeaders(),
|
||||
@@ -330,14 +341,18 @@ export class ChatGPTApi implements LLMApi {
|
||||
delta: {
|
||||
content: string;
|
||||
tool_calls: ChatMessageTool[];
|
||||
reasoning_content: string | null;
|
||||
};
|
||||
}>;
|
||||
|
||||
if (!choices?.length) return { isThinking: false, content: "" };
|
||||
|
||||
const tool_calls = choices[0]?.delta?.tool_calls;
|
||||
if (tool_calls?.length > 0) {
|
||||
const index = tool_calls[0]?.index;
|
||||
const id = tool_calls[0]?.id;
|
||||
const args = tool_calls[0]?.function?.arguments;
|
||||
if (id) {
|
||||
index += 1;
|
||||
runTools.push({
|
||||
id,
|
||||
type: tool_calls[0]?.type,
|
||||
@@ -351,7 +366,37 @@ export class ChatGPTApi implements LLMApi {
|
||||
runTools[index]["function"]["arguments"] += args;
|
||||
}
|
||||
}
|
||||
return choices[0]?.delta?.content;
|
||||
|
||||
const reasoning = choices[0]?.delta?.reasoning_content;
|
||||
const content = choices[0]?.delta?.content;
|
||||
|
||||
// Skip if both content and reasoning_content are empty or null
|
||||
if (
|
||||
(!reasoning || reasoning.length === 0) &&
|
||||
(!content || content.length === 0)
|
||||
) {
|
||||
return {
|
||||
isThinking: false,
|
||||
content: "",
|
||||
};
|
||||
}
|
||||
|
||||
if (reasoning && reasoning.length > 0) {
|
||||
return {
|
||||
isThinking: true,
|
||||
content: reasoning,
|
||||
};
|
||||
} else if (content && content.length > 0) {
|
||||
return {
|
||||
isThinking: false,
|
||||
content: content,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
isThinking: false,
|
||||
content: "",
|
||||
};
|
||||
},
|
||||
// processToolMessage, include tool_calls message and tool call results
|
||||
(
|
||||
@@ -359,6 +404,8 @@ export class ChatGPTApi implements LLMApi {
|
||||
toolCallMessage: any,
|
||||
toolCallResult: any[],
|
||||
) => {
|
||||
// reset index value
|
||||
index = -1;
|
||||
// @ts-ignore
|
||||
requestPayload?.messages?.splice(
|
||||
// @ts-ignore
|
||||
@@ -381,7 +428,7 @@ export class ChatGPTApi implements LLMApi {
|
||||
// make a fetch request
|
||||
const requestTimeoutId = setTimeout(
|
||||
() => controller.abort(),
|
||||
isDalle3 || isO1 ? REQUEST_TIMEOUT_MS * 2 : REQUEST_TIMEOUT_MS, // dalle3 using b64_json is slow.
|
||||
getTimeoutMSByModel(options.config.model),
|
||||
);
|
||||
|
||||
const res = await fetch(chatPath, chatPayload);
|
||||
|
||||
Reference in New Issue
Block a user