Files
LangBot/tests/unit_tests/pipeline/test_preproc.py
2026-05-19 12:20:28 +08:00

492 lines
18 KiB
Python

"""
Unit tests for PreProcessor pipeline stage.
Tests cover preprocessing behavior including:
- Normal text message processing
- Empty message handling
- Unsupported message segment handling
- Image/file segment behavior
- Model selection and fallback
"""
from __future__ import annotations
import pytest
from unittest.mock import AsyncMock, Mock
from importlib import import_module
from tests.factories import (
FakeApp,
text_query,
empty_query,
image_query,
group_text_query,
)
RUNNER_ID = 'plugin:langbot/local-agent/default'
def get_preproc_module():
"""Lazy import to avoid circular import issues."""
return import_module('langbot.pkg.pipeline.preproc.preproc')
def get_entities_module():
"""Lazy import for pipeline entities."""
return import_module('langbot.pkg.pipeline.entities')
class FakeAgentRunnerRegistry:
def __init__(self, descriptor):
self.descriptor = descriptor
async def get(self, runner_id, bound_plugins=None):
return self.descriptor
def make_host_model_runner_descriptor(
*,
multimodal_input: bool = True,
tool_calling: bool = True,
knowledge_retrieval: bool = True,
):
from langbot.pkg.agent.runner.descriptor import AgentRunnerDescriptor
return AgentRunnerDescriptor(
id=RUNNER_ID,
source='plugin',
label={'en_US': 'Local Agent'},
plugin_author='langbot',
plugin_name='local-agent',
runner_name='default',
config_schema=[
{'name': 'model', 'type': 'model-fallback-selector'},
{'name': 'prompt', 'type': 'prompt-editor', 'default': []},
{'name': 'knowledge-bases', 'type': 'knowledge-base-multi-selector', 'default': []},
],
capabilities={
'tool_calling': tool_calling,
'knowledge_retrieval': knowledge_retrieval,
'multimodal_input': multimodal_input,
},
permissions={
'models': ['list', 'invoke', 'stream'],
'tools': ['list', 'detail', 'call'],
'knowledge_bases': ['list', 'retrieve'],
},
)
def set_runner_descriptor(app, descriptor=None):
app.agent_runner_registry = FakeAgentRunnerRegistry(
descriptor or make_host_model_runner_descriptor()
)
def make_runner_config(
*,
primary: str = 'test-model-uuid',
fallbacks: list[str] | None = None,
prompt: list[dict] | None = None,
knowledge_bases: list[str] | None = None,
):
return {
'ai': {
'runner': {'id': RUNNER_ID},
'runner_config': {
RUNNER_ID: {
'model': {'primary': primary, 'fallbacks': fallbacks or []},
'prompt': prompt if prompt is not None else [],
'knowledge-bases': knowledge_bases or [],
},
},
},
'output': {'misc': {'at-sender': False}},
'trigger': {'misc': {}},
}
class TestPreProcessorNormalText:
"""Tests for normal text message preprocessing."""
@pytest.mark.asyncio
async def test_normal_text_continues(self):
"""Normal text message should continue pipeline."""
preproc = get_preproc_module()
entities = get_entities_module()
app = FakeApp()
# Mock session manager to return a session
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
# Mock conversation
mock_conversation = Mock()
mock_conversation.prompt = Mock()
mock_conversation.prompt.messages = []
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.update_time = Mock()
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
# Mock model manager
mock_model = Mock()
mock_model.model_entity = Mock()
mock_model.model_entity.uuid = 'test-model-uuid'
mock_model.model_entity.abilities = ['func_call', 'vision']
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=mock_model)
# Mock tool manager
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
# Mock plugin connector
mock_event_ctx = Mock()
mock_event_ctx.event = Mock()
mock_event_ctx.event.default_prompt = []
mock_event_ctx.event.prompt = []
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = text_query("hello world")
result = await stage.process(query, 'PreProcessor')
assert result.result_type == entities.ResultType.CONTINUE
assert result.new_query is not None
@pytest.mark.asyncio
async def test_normal_text_sets_user_message(self):
"""PreProcessor should set user_message from text content."""
preproc = get_preproc_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
mock_model = Mock()
mock_model.model_entity = Mock(uuid='test-model', abilities=['func_call'])
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=mock_model)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
set_runner_descriptor(app)
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = text_query("test message")
result = await stage.process(query, 'PreProcessor')
assert result.new_query.user_message is not None
assert result.new_query.user_message.role == 'user'
class TestPreProcessorEmptyMessage:
"""Tests for empty message handling."""
@pytest.mark.asyncio
async def test_empty_message_continues(self):
"""Empty message should follow expected behavior."""
preproc = get_preproc_module()
entities = get_entities_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=None)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = empty_query()
result = await stage.process(query, 'PreProcessor')
# Empty message should still continue with an empty provider content list.
assert result.result_type == entities.ResultType.CONTINUE
assert result.new_query.user_message is not None
assert result.new_query.user_message.content == []
class TestPreProcessorImageSegment:
"""Tests for image segment handling."""
@pytest.mark.asyncio
async def test_image_with_vision_model(self):
"""Image should be included when model supports vision."""
preproc = get_preproc_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
# Model with vision support
mock_model = Mock()
mock_model.model_entity = Mock(uuid='vision-model', abilities=['func_call', 'vision'])
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=mock_model)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
# Image query with base64
query = image_query(text="look at this", url=None)
query.pipeline_config = make_runner_config(primary='vision-model')
# Set base64 on the image component
import langbot_plugin.api.entities.builtin.platform.message as platform_message
chain = platform_message.MessageChain([
platform_message.Plain(text="look at this"),
platform_message.Image(base64="data:image/png;base64,abc123"),
])
query.message_chain = chain
result = await stage.process(query, 'PreProcessor')
assert result.result_type == preproc.entities.ResultType.CONTINUE
content_types = [elem.type for elem in result.new_query.user_message.content]
assert 'image_base64' in content_types
@pytest.mark.asyncio
async def test_image_without_vision_model(self):
"""Image should be excluded when model doesn't support vision."""
preproc = get_preproc_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
# Model WITHOUT vision support
mock_model = Mock()
mock_model.model_entity = Mock(uuid='text-only-model', abilities=['func_call'])
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=mock_model)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
set_runner_descriptor(app)
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = image_query(text="describe this")
query.pipeline_config = make_runner_config(primary='text-only-model')
result = await stage.process(query, 'PreProcessor')
assert result.result_type == preproc.entities.ResultType.CONTINUE
content_types = [elem.type for elem in result.new_query.user_message.content]
assert 'image_url' not in content_types
class TestPreProcessorModelSelection:
"""Tests for model selection and fallback behavior."""
@pytest.mark.asyncio
async def test_primary_model_selected(self):
"""Primary model UUID should be set in query."""
preproc = get_preproc_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
mock_model = Mock()
mock_model.model_entity = Mock(uuid='primary-model-uuid', abilities=['func_call'])
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=mock_model)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
set_runner_descriptor(app)
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = text_query("hello")
# Set pipeline config with primary model
query.pipeline_config = make_runner_config(primary='primary-model-uuid')
result = await stage.process(query, 'PreProcessor')
assert result.new_query.use_llm_model_uuid == 'primary-model-uuid'
@pytest.mark.asyncio
async def test_fallback_models_resolved(self):
"""Fallback model UUIDs should be resolved and stored."""
preproc = get_preproc_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
# Primary model
mock_primary = Mock()
mock_primary.model_entity = Mock(uuid='primary-uuid', abilities=['func_call'])
# Fallback model
mock_fallback = Mock()
mock_fallback.model_entity = Mock(uuid='fallback-uuid', abilities=['func_call'])
async def mock_get_model(uuid):
if uuid == 'primary-uuid':
return mock_primary
elif uuid == 'fallback-uuid':
return mock_fallback
raise ValueError(f'Model {uuid} not found')
app.model_mgr.get_model_by_uuid = AsyncMock(side_effect=mock_get_model)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
set_runner_descriptor(app)
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = text_query("hello")
query.pipeline_config = make_runner_config(primary='primary-uuid', fallbacks=['fallback-uuid'])
result = await stage.process(query, 'PreProcessor')
assert '_fallback_model_uuids' in result.new_query.variables
assert 'fallback-uuid' in result.new_query.variables['_fallback_model_uuids']
class TestPreProcessorVariables:
"""Tests for query variable extraction."""
@pytest.mark.asyncio
async def test_variables_set_from_query(self):
"""PreProcessor should set variables from query context."""
preproc = get_preproc_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='person')
mock_session.launcher_id = 12345
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = 'conv-123'
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=None)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = text_query("hello", sender_id=67890)
result = await stage.process(query, 'PreProcessor')
variables = result.new_query.variables
assert 'launcher_type' in variables
assert 'launcher_id' in variables
assert 'sender_id' in variables
assert variables['sender_id'] == 67890
assert 'user_message_text' in variables
@pytest.mark.asyncio
async def test_group_variables_include_group_name(self):
"""Group messages should include group_name variable."""
preproc = get_preproc_module()
app = FakeApp()
mock_session = Mock()
mock_session.launcher_type = Mock(value='group')
mock_session.launcher_id = 99999
app.sess_mgr.get_session = AsyncMock(return_value=mock_session)
mock_conversation = Mock()
mock_conversation.prompt = Mock(messages=[])
mock_conversation.prompt.copy = Mock(return_value=Mock(messages=[]))
mock_conversation.messages = []
mock_conversation.uuid = None
app.sess_mgr.get_conversation = AsyncMock(return_value=mock_conversation)
app.model_mgr.get_model_by_uuid = AsyncMock(return_value=None)
app.tool_mgr.get_all_tools = AsyncMock(return_value=[])
mock_event_ctx = Mock()
mock_event_ctx.event = Mock(default_prompt=[], prompt=[])
app.plugin_connector.emit_event = AsyncMock(return_value=mock_event_ctx)
stage = preproc.PreProcessor(app)
query = group_text_query("hello", group_id=99999)
result = await stage.process(query, 'PreProcessor')
variables = result.new_query.variables
assert 'group_name' in variables
assert 'sender_name' in variables