feat(agent-runner): support scoped token counting

2026-06-27 07:54:19 +00:00 · 2026-06-27 01:31:08 +08:00
parent ae49753f74
commit d0f6fe2cec
10 changed files with 302 additions and 15 deletions
@@ -411,6 +411,20 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
        """
        pass

+    async def count_tokens(
+        self,
+        model: RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+    ) -> int:
+        """Count model input tokens before invoking the model.
+
+        Requesters should use the same provider/model conversion path as
+        ``invoke_llm`` so the preflight count matches the actual request shape.
+        """
+        raise NotImplementedError('This requester does not support token counting')
+
    async def invoke_llm_stream(
        self,
        query: pipeline_query.Query,
@@ -521,6 +521,33 @@ class LiteLLMRequester(requester.ProviderAPIRequester):

        return args

+    async def count_tokens(
+        self,
+        model: requester.RuntimeLLMModel,
+        messages: typing.List[provider_message.Message],
+        funcs: typing.List[resource_tool.LLMTool] = None,
+        extra_args: dict[str, typing.Any] = {},
+    ) -> int:
+        """Count input tokens with LiteLLM's model-aware tokenizer."""
+        args = await self._build_completion_args(model, messages, funcs, extra_args, stream=False)
+        count_args: dict[str, typing.Any] = {
+            'model': args['model'],
+            'messages': args['messages'],
+        }
+        if 'tools' in args:
+            count_args['tools'] = args['tools']
+        if 'tool_choice' in args:
+            count_args['tool_choice'] = args['tool_choice']
+
+        try:
+            tokens = litellm.token_counter(**count_args)
+        except Exception as e:
+            self._handle_litellm_error(e)
+
+        if isinstance(tokens, bool) or not isinstance(tokens, int) or tokens < 0:
+            raise errors.RequesterError(f'token counter returned invalid value: {tokens!r}')
+        return tokens
+
    async def invoke_llm(
        self,
        query: pipeline_query.Query,