Fix agent runner steering and lifecycle hardening

This commit is contained in:
huanghuoguoguo
2026-06-12 11:58:09 +08:00
parent 8da5fecfbf
commit 9f95c6bd0d
17 changed files with 547 additions and 28 deletions

View File

@@ -566,6 +566,55 @@ class TestArtifactStoreRealSQLite:
assert result["has_more"] is True
assert result["length"] == 100
@pytest.mark.asyncio
async def test_expired_artifact_is_not_readable_before_cleanup(self, db_engine):
"""Expired artifacts are hidden even before a cleanup job deletes rows."""
store = ArtifactStore(db_engine)
await store.register_artifact(
artifact_id="art_expired_hidden",
artifact_type="file",
source="runner",
content=b"expired",
expires_at=datetime.datetime.utcnow() - datetime.timedelta(seconds=1),
)
assert await store.get_metadata("art_expired_hidden") is None
assert await store.read_artifact("art_expired_hidden") is None
@pytest.mark.asyncio
async def test_cleanup_expired_artifacts_deletes_binary_storage(self, db_engine):
"""Expired artifacts and their Host-owned binary blobs are removed."""
from sqlalchemy import select
from langbot.pkg.entity.persistence.bstorage import BinaryStorage
store = ArtifactStore(db_engine)
now = datetime.datetime.utcnow()
await store.register_artifact(
artifact_id="art_expired",
artifact_type="file",
source="runner",
content=b"expired",
expires_at=now - datetime.timedelta(seconds=1),
)
await store.register_artifact(
artifact_id="art_fresh",
artifact_type="file",
source="runner",
content=b"fresh",
expires_at=now + datetime.timedelta(days=1),
)
removed = await store.cleanup_expired_artifacts(now=now)
assert removed == 1
assert await store.get_metadata("art_expired") is None
assert await store.get_metadata("art_fresh") is not None
async with store._session_factory() as session:
result = await session.execute(
select(BinaryStorage).where(BinaryStorage.unique_key == "artifact:art_expired")
)
assert result.scalars().first() is None
@pytest.mark.asyncio
async def test_file_artifact_range_read_and_public_metadata(self, db_engine, tmp_path):
"""File-backed artifacts read ranges without exposing host paths."""

View File

@@ -1,6 +1,8 @@
"""Tests for EventLog, Transcript, and history/event APIs."""
from __future__ import annotations
import datetime
import pytest
from langbot.pkg.agent.runner.host_models import (
@@ -505,6 +507,45 @@ class TestEventLogStoreRealSQLite:
assert cursor is not None
assert int(cursor) > 0
@pytest.mark.asyncio
async def test_cleanup_events_older_than(self, db_engine):
"""EventLog cleanup removes only rows older than the cutoff."""
import sqlalchemy
from langbot.pkg.entity.persistence.event_log import EventLog
store = EventLogStore(db_engine)
cutoff = datetime.datetime.utcnow()
await store.append_event(
event_id="evt_cleanup_old",
event_type="message.received",
source="platform",
conversation_id="conv_cleanup",
)
await store.append_event(
event_id="evt_cleanup_new",
event_type="message.received",
source="platform",
conversation_id="conv_cleanup",
)
async with store._session_factory() as session:
await session.execute(
sqlalchemy.update(EventLog)
.where(EventLog.event_id == "evt_cleanup_old")
.values(created_at=cutoff - datetime.timedelta(days=2))
)
await session.execute(
sqlalchemy.update(EventLog)
.where(EventLog.event_id == "evt_cleanup_new")
.values(created_at=cutoff + datetime.timedelta(days=2))
)
await session.commit()
removed = await store.cleanup_events_older_than(cutoff)
assert removed == 1
assert await store.get_event("evt_cleanup_old") is None
assert await store.get_event("evt_cleanup_new") is not None
class TestTranscriptStoreRealSQLite:
"""Test TranscriptStore with real SQLite database."""
@@ -637,6 +678,47 @@ class TestTranscriptStoreRealSQLite:
assert cursor is not None
assert int(cursor) > 0
@pytest.mark.asyncio
async def test_cleanup_transcripts_older_than(self, db_engine):
"""Transcript cleanup removes only rows older than the cutoff."""
import sqlalchemy
from langbot.pkg.entity.persistence.transcript import Transcript
store = TranscriptStore(db_engine)
cutoff = datetime.datetime.utcnow()
await store.append_transcript(
transcript_id="trans_cleanup_old",
event_id="evt_cleanup_old",
conversation_id="conv_cleanup",
role="user",
content="old",
)
await store.append_transcript(
transcript_id="trans_cleanup_new",
event_id="evt_cleanup_new",
conversation_id="conv_cleanup",
role="assistant",
content="new",
)
async with store._session_factory() as session:
await session.execute(
sqlalchemy.update(Transcript)
.where(Transcript.transcript_id == "trans_cleanup_old")
.values(created_at=cutoff - datetime.timedelta(days=2))
)
await session.execute(
sqlalchemy.update(Transcript)
.where(Transcript.transcript_id == "trans_cleanup_new")
.values(created_at=cutoff + datetime.timedelta(days=2))
)
await session.commit()
removed = await store.cleanup_transcripts_older_than(cutoff)
items, _, _, _ = await store.page_transcript("conv_cleanup", limit=10)
assert removed == 1
assert [item["content"] for item in items] == ["new"]
# Fixtures
@pytest.fixture

View File

@@ -28,14 +28,17 @@ RUNNER_ID = "plugin:langbot/local-agent/default"
class FakeLogger:
def __init__(self):
self.warnings: list[str] = []
def debug(self, msg):
pass
def info(self, msg):
pass
def warning(self, msg):
pass
def warning(self, msg, *args, **kwargs):
self.warnings.append(str(msg))
def error(self, msg):
pass
@@ -424,6 +427,41 @@ async def test_orchestrator_streams_fake_plugin_deltas(clean_agent_state):
assert [chunk.content for chunk in chunks] == ["hel", "hello"]
@pytest.mark.asyncio
async def test_orchestrator_drops_duplicate_result_sequence(clean_agent_state):
"""Duplicate runner result sequences are idempotently ignored."""
db_engine = clean_agent_state
descriptor = make_descriptor()
plugin_connector = FakePluginConnector(
results=[
{
"type": "message.delta",
"sequence": 1,
"data": {"chunk": {"role": "assistant", "content": "first"}},
},
{
"type": "message.delta",
"sequence": 1,
"data": {"chunk": {"role": "assistant", "content": "duplicate"}},
},
{
"type": "message.delta",
"sequence": 3,
"data": {"chunk": {"role": "assistant", "content": "after-gap"}},
},
{"type": "run.completed", "sequence": 4, "data": {"finish_reason": "stop"}},
]
)
ap = FakeApplication(plugin_connector, db_engine)
orchestrator = AgentRunOrchestrator(ap, FakeRegistry(descriptor))
chunks = [message async for message in orchestrator.run_from_query(make_query())]
assert [chunk.content for chunk in chunks] == ["first", "after-gap"]
assert any("duplicate result sequence 1" in warning for warning in ap.logger.warnings)
assert any("result sequence gap or out-of-order" in warning for warning in ap.logger.warnings)
@pytest.mark.asyncio
async def test_orchestrator_applies_state_updates_and_suppresses_protocol_event(clean_agent_state):
"""Test that state.updated events are applied and not yielded to pipeline."""

View File

@@ -8,6 +8,7 @@ import time
from langbot.pkg.agent.runner.session_registry import (
AgentRunSessionRegistry,
AgentRunSession,
MAX_STEERING_QUEUE_ITEMS,
get_session_registry,
)
@@ -258,6 +259,59 @@ class TestSessionRegistryBasic:
assert [item['event']['event_id'] for item in first] == ['event_1']
assert [item['event']['event_id'] for item in second] == ['event_2']
@pytest.mark.asyncio
async def test_enqueue_steering_rejects_when_queue_is_full(self):
"""A full steering queue does not claim more queries."""
registry = AgentRunSessionRegistry()
await registry.register(
run_id='run_steering_full',
runner_id='plugin:test/my-runner/default',
query_id=1,
plugin_identity='test/my-runner',
resources=make_resources(),
conversation_id='conv_1',
available_apis={'steering_pull': True},
)
for index in range(MAX_STEERING_QUEUE_ITEMS):
assert await registry.enqueue_steering(
'run_steering_full',
{'event': {'event_id': f'event_{index}'}},
)
assert not await registry.enqueue_steering(
'run_steering_full',
{'event': {'event_id': 'overflow'}},
)
items = await registry.pull_steering('run_steering_full', mode='all')
assert len(items) == MAX_STEERING_QUEUE_ITEMS
assert all(item['event']['event_id'] != 'overflow' for item in items)
@pytest.mark.asyncio
async def test_unregister_returns_pending_steering_queue(self):
"""Unregister returns the removed session so callers can audit pending steering."""
registry = AgentRunSessionRegistry()
await registry.register(
run_id='run_steering_unregister',
runner_id='plugin:test/my-runner/default',
query_id=1,
plugin_identity='test/my-runner',
resources=make_resources(),
conversation_id='conv_1',
available_apis={'steering_pull': True},
)
await registry.enqueue_steering(
'run_steering_unregister',
{'event': {'event_id': 'event_pending'}},
)
session = await registry.unregister('run_steering_unregister')
assert session is not None
assert session['steering_queue'][0]['event']['event_id'] == 'event_pending'
assert await registry.get('run_steering_unregister') is None
class TestIsResourceAllowed:
"""Tests for is_resource_allowed validation."""

View File

@@ -0,0 +1,63 @@
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from langbot.pkg.agent.runner.errors import RunnerNotFoundError
from langbot.pkg.pipeline.controller import Controller
def make_app():
app = SimpleNamespace()
app.instance_config = SimpleNamespace(data={'concurrency': {'pipeline': 10}})
app.logger = MagicMock()
app.pipeline_mgr = SimpleNamespace()
app.pipeline_mgr.get_pipeline_by_uuid = AsyncMock()
app.sess_mgr = SimpleNamespace()
app.sess_mgr.get_session = AsyncMock(return_value=SimpleNamespace())
app.agent_run_orchestrator = SimpleNamespace()
app.agent_run_orchestrator.try_claim_steering_from_query = AsyncMock()
return app
def make_pipeline():
return SimpleNamespace(
pipeline_entity=SimpleNamespace(config={'ai': {'runner': {'id': 'plugin:test/runner/default'}}}),
bound_plugins=['test/runner'],
bound_mcp_servers=[],
)
@pytest.mark.asyncio
async def test_try_claim_steering_returns_false_when_runner_lookup_fails():
app = make_app()
app.pipeline_mgr.get_pipeline_by_uuid.return_value = make_pipeline()
app.agent_run_orchestrator.try_claim_steering_from_query.side_effect = RunnerNotFoundError(
'plugin:missing/runner/default'
)
controller = Controller(app)
query = SimpleNamespace(query_id=1, pipeline_uuid='pipeline-001', variables={})
claimed = await controller._try_claim_steering_before_session_slot(query)
assert claimed is False
app.logger.warning.assert_called_once()
@pytest.mark.asyncio
async def test_try_claim_steering_sets_pipeline_context_before_claiming():
app = make_app()
pipeline = make_pipeline()
app.pipeline_mgr.get_pipeline_by_uuid.return_value = pipeline
app.agent_run_orchestrator.try_claim_steering_from_query.return_value = True
controller = Controller(app)
query = SimpleNamespace(query_id=2, pipeline_uuid='pipeline-002', variables={})
claimed = await controller._try_claim_steering_before_session_slot(query)
assert claimed is True
assert query.pipeline_config is pipeline.pipeline_entity.config
assert query.variables['_pipeline_bound_plugins'] == ['test/runner']
app.agent_run_orchestrator.try_claim_steering_from_query.assert_awaited_once_with(query)