优化token计数

This commit is contained in:
sijinhui
2023-12-22 23:18:19 +08:00
parent 07c48ef7fd
commit b064e16278
7 changed files with 45 additions and 90 deletions

View File

@@ -20,3 +20,22 @@ export function estimateTokenLength(input: string): number {
return tokenLength;
}
// import { get_encoding } from "tiktoken";
export function getTokenLength(input: string): number {
// const { get_encoding } = require( "tiktoken" );
// const encoding = get_encoding("cl100k_base");
const { Tiktoken } = require("tiktoken/lite");
const cl100k_base = require("tiktoken/encoders/cl100k_base.json");
const encoding = new Tiktoken(
cl100k_base.bpe_ranks,
cl100k_base.special_tokens,
cl100k_base.pat_str,
);
const tokenLength = encoding.encode(input).length;
// console.log('[TOKEN],=========', input, tokenLength)
return tokenLength;
}