This commit is contained in:
Hk-Gosuto
2025-03-07 16:20:06 +08:00
parent f704e7a271
commit 48fe84024f
7 changed files with 293 additions and 103 deletions

View File

@@ -21,6 +21,7 @@ import {
preProcessImageAndWebReferenceContent,
preProcessImageContent,
stream,
streamWithThink,
} from "@/app/utils/chat";
import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare";
import { DalleSize, DalleQuality, DalleStyle } from "@/app/typing";
@@ -49,6 +50,7 @@ import {
isVisionModel,
isDalle3 as _isDalle3,
getWebReferenceMessageTextContent,
getTimeoutMSByModel,
} from "@/app/utils";
export interface OpenAIListModelResponse {
@@ -73,6 +75,7 @@ export interface RequestPayload {
frequency_penalty: number;
top_p: number;
max_tokens?: number;
max_completion_tokens?: number;
}
export interface DalleRequestPayload {
@@ -223,7 +226,9 @@ export class ChatGPTApi implements LLMApi {
let requestPayload: RequestPayload | DalleRequestPayload;
const isDalle3 = _isDalle3(options.config.model);
const isO1 = options.config.model.startsWith("o1");
const isO1OrO3 =
options.config.model.startsWith("o1") ||
options.config.model.startsWith("o3");
if (isDalle3) {
const prompt = getMessageTextContent(
options.messages.slice(-1)?.pop() as any,
@@ -245,23 +250,28 @@ export class ChatGPTApi implements LLMApi {
const content = visionModel
? await preProcessImageAndWebReferenceContent(v)
: getWebReferenceMessageTextContent(v);
if (!(isO1 && v.role === "system"))
if (!(isO1OrO3 && v.role === "system"))
messages.push({ role: v.role, content });
}
// O1 not support image, tools (plugin in ChatGPTNextWeb) and system, stream, logprobs, temperature, top_p, n, presence_penalty, frequency_penalty yet.
requestPayload = {
messages,
stream: !isO1 ? options.config.stream : false,
stream: options.config.stream,
model: modelConfig.model,
temperature: !isO1 ? modelConfig.temperature : 1,
presence_penalty: !isO1 ? modelConfig.presence_penalty : 0,
frequency_penalty: !isO1 ? modelConfig.frequency_penalty : 0,
top_p: !isO1 ? modelConfig.top_p : 1,
temperature: !isO1OrO3 ? modelConfig.temperature : 1,
presence_penalty: !isO1OrO3 ? modelConfig.presence_penalty : 0,
frequency_penalty: !isO1OrO3 ? modelConfig.frequency_penalty : 0,
top_p: !isO1OrO3 ? modelConfig.top_p : 1,
// max_tokens: Math.max(modelConfig.max_tokens, 1024),
// Please do not ask me why not send max_tokens, no reason, this param is just shit, I dont want to explain anymore.
};
// O1 使用 max_completion_tokens 控制token数 (https://platform.openai.com/docs/guides/reasoning#controlling-costs)
if (isO1OrO3) {
requestPayload["max_completion_tokens"] = modelConfig.max_tokens;
}
// add max_tokens to vision model
if (visionModel) {
requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000);
@@ -270,7 +280,7 @@ export class ChatGPTApi implements LLMApi {
console.log("[Request] openai payload: ", requestPayload);
const shouldStream = !isDalle3 && !!options.config.stream && !isO1;
const shouldStream = !isDalle3 && !!options.config.stream;
const controller = new AbortController();
options.onController?.(controller);
@@ -307,15 +317,16 @@ export class ChatGPTApi implements LLMApi {
);
}
if (shouldStream) {
let index = -1;
// const [tools, funcs] = usePluginStore
// .getState()
// .getAsTools(
// useChatStore.getState().currentSession().mask?.plugin || [],
// );
// console.log("getAsTools", tools, funcs);
const tools = null;
const funcs: Record<string, Function> = {};
stream(
// console.log("getAsTools", tools, funcs);
streamWithThink(
chatPath,
requestPayload,
getHeaders(),
@@ -330,14 +341,18 @@ export class ChatGPTApi implements LLMApi {
delta: {
content: string;
tool_calls: ChatMessageTool[];
reasoning_content: string | null;
};
}>;
if (!choices?.length) return { isThinking: false, content: "" };
const tool_calls = choices[0]?.delta?.tool_calls;
if (tool_calls?.length > 0) {
const index = tool_calls[0]?.index;
const id = tool_calls[0]?.id;
const args = tool_calls[0]?.function?.arguments;
if (id) {
index += 1;
runTools.push({
id,
type: tool_calls[0]?.type,
@@ -351,7 +366,37 @@ export class ChatGPTApi implements LLMApi {
runTools[index]["function"]["arguments"] += args;
}
}
return choices[0]?.delta?.content;
const reasoning = choices[0]?.delta?.reasoning_content;
const content = choices[0]?.delta?.content;
// Skip if both content and reasoning_content are empty or null
if (
(!reasoning || reasoning.length === 0) &&
(!content || content.length === 0)
) {
return {
isThinking: false,
content: "",
};
}
if (reasoning && reasoning.length > 0) {
return {
isThinking: true,
content: reasoning,
};
} else if (content && content.length > 0) {
return {
isThinking: false,
content: content,
};
}
return {
isThinking: false,
content: "",
};
},
// processToolMessage, include tool_calls message and tool call results
(
@@ -359,6 +404,8 @@ export class ChatGPTApi implements LLMApi {
toolCallMessage: any,
toolCallResult: any[],
) => {
// reset index value
index = -1;
// @ts-ignore
requestPayload?.messages?.splice(
// @ts-ignore
@@ -381,7 +428,7 @@ export class ChatGPTApi implements LLMApi {
// make a fetch request
const requestTimeoutId = setTimeout(
() => controller.abort(),
isDalle3 || isO1 ? REQUEST_TIMEOUT_MS * 2 : REQUEST_TIMEOUT_MS, // dalle3 using b64_json is slow.
getTimeoutMSByModel(options.config.model),
);
const res = await fetch(chatPath, chatPayload);