mirror of
https://github.com/ChatGPTNextWeb/ChatGPT-Next-Web.git
synced 2025-11-13 04:33:42 +08:00
fix: 修复 vercel 部署接口超时的问题
This commit is contained in:
534
app/api/langchain-tools/duckduckgo_search.ts
Normal file
534
app/api/langchain-tools/duckduckgo_search.ts
Normal file
@@ -0,0 +1,534 @@
|
||||
import { decode } from "html-entities";
|
||||
import { convert as htmlToText } from "html-to-text";
|
||||
import { Tool } from "langchain/tools";
|
||||
|
||||
const SEARCH_REGEX =
|
||||
/DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'/;
|
||||
const IMAGES_REGEX =
|
||||
/;DDG\.duckbar\.load\('images', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('news/;
|
||||
const NEWS_REGEX =
|
||||
/;DDG\.duckbar\.load\('news', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('videos/;
|
||||
const VIDEOS_REGEX =
|
||||
/;DDG\.duckbar\.load\('videos', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.loadModule\('related_searches/;
|
||||
const RELATED_SEARCHES_REGEX =
|
||||
/DDG\.duckbar\.loadModule\('related_searches', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('products/;
|
||||
const VQD_REGEX = /vqd=['"](\d+-\d+(?:-\d+)?)['"]/;
|
||||
|
||||
interface CallbackSearchResult {
|
||||
/** Website description */
|
||||
a: string;
|
||||
/** Unknown */
|
||||
ae: null;
|
||||
/** ddg!bang information (ex. w Wikipedia en.wikipedia.org) */
|
||||
b?: string;
|
||||
/** URL */
|
||||
c: string;
|
||||
/** URL of some sort. */
|
||||
d: string;
|
||||
/** Class name associations. */
|
||||
da?: string;
|
||||
/** Unknown */
|
||||
h: number;
|
||||
/** Website hostname */
|
||||
i: string;
|
||||
/** Unknown */
|
||||
k: null;
|
||||
/** Unknown */
|
||||
m: number;
|
||||
/** Unknown */
|
||||
o: number;
|
||||
/** Unknown */
|
||||
p: number;
|
||||
/** Unknown */
|
||||
s: string;
|
||||
/** Website Title */
|
||||
t: string;
|
||||
/** Website URL */
|
||||
u: string;
|
||||
}
|
||||
|
||||
interface CallbackNextSearch {
|
||||
/** URL to the next page of results */
|
||||
n: string;
|
||||
}
|
||||
|
||||
interface CallbackDuckbarPayload<T> {
|
||||
ads: null | any[];
|
||||
query: string;
|
||||
queryEncoded: string;
|
||||
response_type: string;
|
||||
results: T[];
|
||||
vqd: {
|
||||
[query: string]: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface DuckbarImageResult {
|
||||
/** The height of the image in pixels. */
|
||||
height: number;
|
||||
/** The image URL. */
|
||||
image: string;
|
||||
/** The source of the image. */
|
||||
source: string;
|
||||
/** The thumbnail URL. */
|
||||
thumbnail: string;
|
||||
/** The title (or caption) of the image. */
|
||||
title: string;
|
||||
/** The website URL of where the image came from. */
|
||||
url: string;
|
||||
/** The width of the image in pixels. */
|
||||
width: number;
|
||||
}
|
||||
|
||||
interface DuckbarVideoResult {
|
||||
/** URL of the video */
|
||||
content: string;
|
||||
/** Description of the video */
|
||||
description: string;
|
||||
/** Duration of the video */
|
||||
duration: string;
|
||||
/** Embed HTML for the video */
|
||||
embed_html: string;
|
||||
/** Embed URL for the video */
|
||||
embed_url: string;
|
||||
/** Thumbnail images of the video */
|
||||
images: {
|
||||
large: string;
|
||||
medium: string;
|
||||
motion: string;
|
||||
small: string;
|
||||
};
|
||||
/** Where this search result came from */
|
||||
provider: string;
|
||||
/** ISO timestamp of the upload */
|
||||
published: string;
|
||||
/** What site the video was on */
|
||||
publisher: string;
|
||||
/** Various statistics */
|
||||
statistics: {
|
||||
/** View count of the video */
|
||||
viewCount: number | null;
|
||||
};
|
||||
/** Title of the video */
|
||||
title: string;
|
||||
/** Name of the video uploader(?) */
|
||||
uploader: string;
|
||||
}
|
||||
|
||||
interface DuckbarRelatedSearch {
|
||||
display_text: string;
|
||||
text: string;
|
||||
web_search_url: string;
|
||||
}
|
||||
|
||||
interface DuckbarNewsResult {
|
||||
date: number;
|
||||
excerpt: string;
|
||||
image?: string;
|
||||
relative_time: string;
|
||||
syndicate: string;
|
||||
title: string;
|
||||
url: string;
|
||||
use_relevancy: number;
|
||||
is_old?: number;
|
||||
fetch_image?: number;
|
||||
}
|
||||
|
||||
interface SearchResults {
|
||||
/** Whether there were no results found. */
|
||||
noResults: boolean;
|
||||
/** The VQD of the search query. */
|
||||
vqd: string;
|
||||
/** The web results of the search. */
|
||||
results: SearchResult[];
|
||||
/** The image results of the search. */
|
||||
images?: DuckbarImageResult[];
|
||||
/** The news article results of the search. */
|
||||
news?: NewsResult[];
|
||||
/** The video results of the search. */
|
||||
videos?: VideoResult[];
|
||||
/** The related searches of the query. */
|
||||
related?: RelatedResult[];
|
||||
}
|
||||
|
||||
interface VideoResult {
|
||||
/** The URL of the video. */
|
||||
url: string;
|
||||
/** The title of the video. */
|
||||
title: string;
|
||||
/** The description of the video. */
|
||||
description: string;
|
||||
/** The image URL of the video. */
|
||||
image: string;
|
||||
/** The duration of the video. (i.e. "9:20") */
|
||||
duration: string;
|
||||
/** The ISO timestamp of when the video was published. */
|
||||
published: string;
|
||||
/** Where the video was publised on. (i.e. "YouTube") */
|
||||
publishedOn: string;
|
||||
/** The name of who uploaded the video. */
|
||||
publisher: string;
|
||||
/** The view count of the video. */
|
||||
viewCount?: number;
|
||||
}
|
||||
|
||||
interface NewsResult {
|
||||
/** The timestamp of when the article was created. */
|
||||
date: number;
|
||||
/** An except of the article. */
|
||||
excerpt: string;
|
||||
/** The image URL used in the article. */
|
||||
image?: string;
|
||||
/** The relative time of when the article was posted, in human readable format. */
|
||||
relativeTime: string;
|
||||
/** Where this article was indexed from. */
|
||||
syndicate: string;
|
||||
/** The title of the article. */
|
||||
title: string;
|
||||
/** The URL of the article. */
|
||||
url: string;
|
||||
/** Whether this article is classified as old. */
|
||||
isOld: boolean;
|
||||
}
|
||||
|
||||
interface SearchResult {
|
||||
/** The hostname of the website. (i.e. "google.com") */
|
||||
hostname: string;
|
||||
/** The URL of the result. */
|
||||
url: string;
|
||||
/** The title of the result. */
|
||||
title: string;
|
||||
/**
|
||||
* The sanitized description of the result.
|
||||
* Bold tags will still be present in this string.
|
||||
*/
|
||||
description: string;
|
||||
/** The description of the result. */
|
||||
rawDescription: string;
|
||||
/** The icon of the website. */
|
||||
icon: string;
|
||||
/** The ddg!bang information of the website, if any. */
|
||||
bang?: SearchResultBang;
|
||||
}
|
||||
|
||||
interface SearchResultBang {
|
||||
/** The prefix of the bang. (i.e. "w" for !w) */
|
||||
prefix: string;
|
||||
/** The title of the bang. */
|
||||
title: string;
|
||||
/** The domain of the bang. */
|
||||
domain: string;
|
||||
}
|
||||
|
||||
interface RelatedResult {
|
||||
text: string;
|
||||
raw: string;
|
||||
}
|
||||
|
||||
enum SearchTimeType {
|
||||
/** From any time. */
|
||||
ALL = "a",
|
||||
/** From the past day. */
|
||||
DAY = "d",
|
||||
/** From the past week. */
|
||||
WEEK = "w",
|
||||
/** From the past month. */
|
||||
MONTH = "m",
|
||||
/** From the past year. */
|
||||
YEAR = "y",
|
||||
}
|
||||
|
||||
interface SearchOptions {
|
||||
/** The safe search type of the search. */
|
||||
safeSearch?: SafeSearchType;
|
||||
/** The time range of the searches, can be a SearchTimeType or a date range ("2021-03-16..2021-03-30") */
|
||||
time?: SearchTimeType | string;
|
||||
/** The locale(?) of the search. Defaults to "en-us". */
|
||||
locale?: string;
|
||||
/** The region of the search. Defaults to "wt-wt" or all regions. */
|
||||
region?: string;
|
||||
/** The market region(?) of the search. Defaults to "US". */
|
||||
marketRegion?: string;
|
||||
/** The number to offset the results to. */
|
||||
offset?: number;
|
||||
/**
|
||||
* The string that acts like a key to a search.
|
||||
* Set this if you made a search with the same query.
|
||||
*/
|
||||
vqd?: string;
|
||||
}
|
||||
|
||||
enum SafeSearchType {
|
||||
/** Strict filtering, no NSFW content. */
|
||||
STRICT = 0,
|
||||
/** Moderate filtering. */
|
||||
MODERATE = -1,
|
||||
/** No filtering. */
|
||||
OFF = -2,
|
||||
}
|
||||
|
||||
const defaultOptions: SearchOptions = {
|
||||
safeSearch: SafeSearchType.OFF,
|
||||
time: SearchTimeType.ALL,
|
||||
locale: "en-us",
|
||||
region: "wt-wt",
|
||||
offset: 0,
|
||||
marketRegion: "us",
|
||||
};
|
||||
|
||||
async function search(
|
||||
query: string,
|
||||
options?: SearchOptions,
|
||||
): Promise<SearchResults> {
|
||||
if (!query) throw new Error("Query cannot be empty!");
|
||||
if (!options) options = defaultOptions;
|
||||
else options = sanityCheck(options);
|
||||
|
||||
let vqd = options.vqd!;
|
||||
if (!vqd) vqd = await getVQD(query, "web");
|
||||
|
||||
const queryObject: Record<string, string> = {
|
||||
q: query,
|
||||
...(options.safeSearch !== SafeSearchType.STRICT ? { t: "D" } : {}),
|
||||
l: options.locale!,
|
||||
...(options.safeSearch === SafeSearchType.STRICT ? { p: "1" } : {}),
|
||||
kl: options.region || "wt-wt",
|
||||
s: String(options.offset),
|
||||
dl: "en",
|
||||
ct: "US",
|
||||
ss_mkt: options.marketRegion!,
|
||||
df: options.time! as string,
|
||||
vqd,
|
||||
...(options.safeSearch !== SafeSearchType.STRICT
|
||||
? { ex: String(options.safeSearch) }
|
||||
: {}),
|
||||
sp: "1",
|
||||
bpa: "1",
|
||||
biaexp: "b",
|
||||
msvrtexp: "b",
|
||||
...(options.safeSearch === SafeSearchType.STRICT
|
||||
? {
|
||||
videxp: "a",
|
||||
nadse: "b",
|
||||
eclsexp: "a",
|
||||
stiaexp: "a",
|
||||
tjsexp: "b",
|
||||
related: "b",
|
||||
msnexp: "a",
|
||||
}
|
||||
: {
|
||||
nadse: "b",
|
||||
eclsexp: "b",
|
||||
tjsexp: "b",
|
||||
// cdrexp: 'b'
|
||||
}),
|
||||
};
|
||||
|
||||
const response = await fetch(
|
||||
`https://links.duckduckgo.com/d.js?${queryString(queryObject)}`,
|
||||
);
|
||||
const data = await response.text();
|
||||
|
||||
if (data.includes("DDG.deep.is506"))
|
||||
throw new Error("A server error occurred!");
|
||||
|
||||
const searchResults = JSON.parse(
|
||||
SEARCH_REGEX.exec(data)![1].replace(/\t/g, " "),
|
||||
) as (CallbackSearchResult | CallbackNextSearch)[];
|
||||
|
||||
if (searchResults.length === 1 && !("n" in searchResults[0])) {
|
||||
const onlyResult = searchResults[0] as CallbackSearchResult;
|
||||
/* istanbul ignore next */
|
||||
if (
|
||||
(!onlyResult.da && onlyResult.t === "EOF") ||
|
||||
!onlyResult.a ||
|
||||
onlyResult.d === "google.com search"
|
||||
)
|
||||
return {
|
||||
noResults: true,
|
||||
vqd,
|
||||
results: [],
|
||||
};
|
||||
}
|
||||
|
||||
const results: SearchResults = {
|
||||
noResults: false,
|
||||
vqd,
|
||||
results: [],
|
||||
};
|
||||
|
||||
for (const search of searchResults) {
|
||||
if ("n" in search) continue;
|
||||
let bang: SearchResultBang | undefined;
|
||||
if (search.b) {
|
||||
const [prefix, title, domain] = search.b.split("\t");
|
||||
bang = { prefix, title, domain };
|
||||
}
|
||||
results.results.push({
|
||||
title: search.t,
|
||||
description: decode(search.a),
|
||||
rawDescription: search.a,
|
||||
hostname: search.i,
|
||||
icon: `https://external-content.duckduckgo.com/ip3/${search.i}.ico`,
|
||||
url: search.u,
|
||||
bang,
|
||||
});
|
||||
}
|
||||
|
||||
// Images
|
||||
const imagesMatch = IMAGES_REGEX.exec(data);
|
||||
if (imagesMatch) {
|
||||
const imagesResult = JSON.parse(
|
||||
imagesMatch[1].replace(/\t/g, " "),
|
||||
) as CallbackDuckbarPayload<DuckbarImageResult>;
|
||||
results.images = imagesResult.results.map((i) => {
|
||||
i.title = decode(i.title);
|
||||
return i;
|
||||
});
|
||||
}
|
||||
|
||||
// News
|
||||
const newsMatch = NEWS_REGEX.exec(data);
|
||||
if (newsMatch) {
|
||||
const newsResult = JSON.parse(
|
||||
newsMatch[1].replace(/\t/g, " "),
|
||||
) as CallbackDuckbarPayload<DuckbarNewsResult>;
|
||||
results.news = newsResult.results.map((article) => ({
|
||||
date: article.date,
|
||||
excerpt: decode(article.excerpt),
|
||||
image: article.image,
|
||||
relativeTime: article.relative_time,
|
||||
syndicate: article.syndicate,
|
||||
title: decode(article.title),
|
||||
url: article.url,
|
||||
isOld: !!article.is_old,
|
||||
})) as NewsResult[];
|
||||
}
|
||||
|
||||
// Videos
|
||||
const videosMatch = VIDEOS_REGEX.exec(data);
|
||||
if (videosMatch) {
|
||||
const videoResult = JSON.parse(
|
||||
videosMatch[1].replace(/\t/g, " "),
|
||||
) as CallbackDuckbarPayload<DuckbarVideoResult>;
|
||||
results.videos = [];
|
||||
/* istanbul ignore next */
|
||||
for (const video of videoResult.results) {
|
||||
results.videos.push({
|
||||
url: video.content,
|
||||
title: decode(video.title),
|
||||
description: decode(video.description),
|
||||
image:
|
||||
video.images.large ||
|
||||
video.images.medium ||
|
||||
video.images.small ||
|
||||
video.images.motion,
|
||||
duration: video.duration,
|
||||
publishedOn: video.publisher,
|
||||
published: video.published,
|
||||
publisher: video.uploader,
|
||||
viewCount: video.statistics.viewCount || undefined,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Related Searches
|
||||
const relatedMatch = RELATED_SEARCHES_REGEX.exec(data);
|
||||
if (relatedMatch) {
|
||||
const relatedResult = JSON.parse(
|
||||
relatedMatch[1].replace(/\t/g, " "),
|
||||
) as CallbackDuckbarPayload<DuckbarRelatedSearch>;
|
||||
results.related = [];
|
||||
for (const related of relatedResult.results) {
|
||||
results.related.push({
|
||||
text: related.text,
|
||||
raw: related.display_text,
|
||||
});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
function queryString(query: Record<string, string>) {
|
||||
return new URLSearchParams(query).toString();
|
||||
}
|
||||
|
||||
async function getVQD(query: string, ia = "web") {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`https://duckduckgo.com/?${queryString({ q: query, ia })}`,
|
||||
);
|
||||
const data = await response.text();
|
||||
return VQD_REGEX.exec(data)![1];
|
||||
} catch (e) {
|
||||
throw new Error(`Failed to get the VQD for query "${query}".`);
|
||||
}
|
||||
}
|
||||
|
||||
function sanityCheck(options: SearchOptions) {
|
||||
options = Object.assign({}, defaultOptions, options);
|
||||
|
||||
if (!(options.safeSearch! in SafeSearchType))
|
||||
throw new TypeError(
|
||||
`${options.safeSearch} is an invalid safe search type!`,
|
||||
);
|
||||
|
||||
/* istanbul ignore next */
|
||||
if (typeof options.safeSearch! === "string")
|
||||
options.safeSearch = SafeSearchType[
|
||||
options.safeSearch!
|
||||
] as any as SafeSearchType;
|
||||
|
||||
if (typeof options.offset !== "number")
|
||||
throw new TypeError(`Search offset is not a number!`);
|
||||
|
||||
if (options.offset! < 0)
|
||||
throw new RangeError("Search offset cannot be below zero!");
|
||||
|
||||
if (
|
||||
options.time &&
|
||||
!Object.values(SearchTimeType).includes(options.time as SearchTimeType) &&
|
||||
!/\d{4}-\d{2}-\d{2}..\d{4}-\d{2}-\d{2}/.test(options.time as string)
|
||||
)
|
||||
throw new TypeError(`${options.time} is an invalid search time!`);
|
||||
|
||||
if (!options.locale || typeof options.locale! !== "string")
|
||||
throw new TypeError("Search locale must be a string!");
|
||||
|
||||
if (!options.region || typeof options.region! !== "string")
|
||||
throw new TypeError("Search region must be a string!");
|
||||
|
||||
if (!options.marketRegion || typeof options.marketRegion! !== "string")
|
||||
throw new TypeError("Search market region must be a string!");
|
||||
|
||||
if (options.vqd && !/\d-\d+-\d+/.test(options.vqd))
|
||||
throw new Error(`${options.vqd} is an invalid VQD!`);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
export class DuckDuckGo extends Tool {
|
||||
name = "duckduckgo_search";
|
||||
maxResults = 4;
|
||||
|
||||
/** @ignore */
|
||||
async _call(input: string) {
|
||||
const searchResults = await search(input, {
|
||||
safeSearch: SafeSearchType.OFF,
|
||||
});
|
||||
|
||||
if (searchResults.noResults) {
|
||||
return "No good search result found";
|
||||
}
|
||||
|
||||
const results = searchResults.results
|
||||
.slice(0, this.maxResults)
|
||||
.map(({ title, description, url }) => htmlToText(description))
|
||||
.join("\n\n");
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
description =
|
||||
"a search engine. useful for when you need to answer questions about current events. input should be a search query.";
|
||||
}
|
||||
@@ -16,7 +16,7 @@ import { BufferMemory, ChatMessageHistory } from "langchain/memory";
|
||||
import { initializeAgentExecutorWithOptions } from "langchain/agents";
|
||||
import { SerpAPI } from "langchain/tools";
|
||||
import { Calculator } from "langchain/tools/calculator";
|
||||
import { DuckDuckGo } from "@/app/api/langchain-tools/duckduckgo";
|
||||
import { DuckDuckGo } from "@/app/api/langchain-tools/duckduckgo_search";
|
||||
import { HttpGetTool } from "@/app/api/langchain-tools/http_get";
|
||||
|
||||
const serverConfig = getServerSideConfig();
|
||||
@@ -220,4 +220,4 @@ async function handle(req: NextRequest) {
|
||||
export const GET = handle;
|
||||
export const POST = handle;
|
||||
|
||||
export const runtime = "nodejs";
|
||||
export const runtime = "edge";
|
||||
|
||||
Reference in New Issue
Block a user