feat(agent-runner): add admin reconcile primitives

This commit is contained in:
huanghuoguoguo
2026-06-15 19:42:33 +08:00
parent aa4fdd1144
commit e9dd7f423d
4 changed files with 601 additions and 5 deletions
@@ -2,11 +2,15 @@
from __future__ import annotations
import datetime
from types import SimpleNamespace
from unittest.mock import MagicMock
import pytest
import sqlalchemy
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import sessionmaker
from langbot.pkg.agent.runner.run_ledger_store import RunLedgerStore
from langbot.pkg.agent.runner.session_registry import AgentRunSessionRegistry
@@ -29,10 +33,11 @@ class FakeConnection:
class FakeApplication:
def __init__(self, db_engine, admin_plugins=None):
def __init__(self, db_engine, admin_plugins=None, runner_registry=None):
self.logger = MagicMock()
self.persistence_mgr = MagicMock()
self.persistence_mgr.get_db_engine = MagicMock(return_value=db_engine)
self.agent_runner_registry = runner_registry
self.instance_config = SimpleNamespace(
data={
'agent_runner': {
@@ -62,11 +67,21 @@ async def db_engine():
await engine.dispose()
def _handler(db_engine, admin_plugins=None):
class FakeRunnerRegistry:
def __init__(self, runners):
self.runners = runners
self.calls = []
async def list_runners(self, *, bound_plugins=None, use_cache=True):
self.calls.append({'bound_plugins': bound_plugins, 'use_cache': use_cache})
return self.runners
def _handler(db_engine, admin_plugins=None, runner_registry=None):
async def fake_disconnect():
return True
fake_app = FakeApplication(db_engine, admin_plugins=admin_plugins)
fake_app = FakeApplication(db_engine, admin_plugins=admin_plugins, runner_registry=runner_registry)
return RuntimeConnectionHandler(FakeConnection(), fake_disconnect, fake_app)
@@ -353,6 +368,60 @@ async def test_agent_run_admin_permission_string_allows_without_run_id(db_engine
assert [run.run_id for run in page.items] == ['run_1']
@pytest.mark.asyncio
async def test_agent_run_admin_can_list_runner_registry_without_run_id(db_engine):
runner_registry = FakeRunnerRegistry(
[
{
'id': 'plugin:test/runner/default',
'source': 'plugin',
'plugin_author': 'test',
'plugin_name': 'runner',
'runner_name': 'default',
'label': {'en_US': 'Default'},
}
]
)
handler = _handler(
db_engine,
admin_plugins=[
{
'identity': 'langbot/control',
'permissions': ['agent_run:admin'],
}
],
runner_registry=runner_registry,
)
runner_list = handler.actions['runner_list']
result = await runner_list(
{
'caller_plugin_identity': 'langbot/control',
'include_plugins': ['test/runner'],
}
)
assert result.code == 0
assert result.data['items'][0]['id'] == 'plugin:test/runner/default'
assert runner_registry.calls == [
{
'bound_plugins': ['test/runner'],
'use_cache': True,
}
]
@pytest.mark.asyncio
async def test_unconfigured_plugin_cannot_list_runner_registry(db_engine):
handler = _handler(db_engine, runner_registry=FakeRunnerRegistry([]))
runner_list = handler.actions['runner_list']
result = await runner_list({'caller_plugin_identity': 'test/runner'})
assert result.code != 0
assert 'not authorized' in result.message.lower()
@pytest.mark.asyncio
async def test_agent_run_admin_can_get_and_page_cross_scope_with_own_run_session(session_registry, db_engine):
await _register_session(session_registry, available_apis={})
@@ -504,6 +573,13 @@ async def test_agent_run_admin_can_cancel_cross_scope_with_own_run_session(sessi
assert run.run_id == 'run_other'
assert run.cancel_requested_at is not None
assert run.status_reason == 'admin requested'
events, _next_cursor, _prev_cursor, _has_more = await RunLedgerStore(db_engine).page_run_events(
run_id='run_other',
)
assert [event['type'] for event in events] == ['message.completed', 'admin.run_cancel']
assert events[1]['source'] == 'host'
assert events[1]['data']['caller_plugin_identity'] == 'test/runner'
assert events[1]['metadata'] == {'permission': 'agent_run:admin'}
@pytest.mark.asyncio
@@ -682,6 +758,80 @@ async def test_runtime_admin_can_register_list_and_claim_without_run_id(db_engin
assert claimed.data['claimed_by_runtime_id'] == 'runtime_1'
@pytest.mark.asyncio
async def test_runtime_admin_can_reconcile_without_run_id(db_engine):
store = RunLedgerStore(db_engine)
await store.register_runtime(
runtime_id='runtime_stale',
display_name='Runtime Stale',
heartbeat_deadline_seconds=60,
)
await store.create_run(
run_id='claimed_run',
event_id='evt_claimed',
binding_id='binding_1',
runner_id='plugin:other/runner/default',
status='queued',
queue_name='default',
)
claim = await store.claim_next_run(runtime_id='runtime_stale', queue_name='default', lease_seconds=60)
assert claim is not None
session_factory = sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
expired_at = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(seconds=1)
async with session_factory() as session:
await session.execute(
sqlalchemy.update(agent_run_model.AgentRun)
.where(agent_run_model.AgentRun.run_id == 'claimed_run')
.values(claim_lease_expires_at=expired_at)
)
await session.execute(
sqlalchemy.update(agent_run_model.AgentRuntime)
.where(agent_run_model.AgentRuntime.runtime_id == 'runtime_stale')
.values(
last_heartbeat_at=expired_at,
heartbeat_deadline_at=expired_at,
)
)
await session.commit()
handler = _handler(
db_engine,
admin_plugins=[
{
'identity': 'langbot/control',
'permissions': ['runtime:admin'],
}
],
)
runtime_reconcile = handler.actions['runtime_reconcile']
result = await runtime_reconcile({'caller_plugin_identity': 'langbot/control'})
assert result.code == 0
assert result.data['stale_count'] == 1
assert result.data['released_claim_count'] == 1
assert result.data['stale_runtimes'][0]['runtime_id'] == 'runtime_stale'
assert result.data['released_claims'][0]['run_id'] == 'claimed_run'
assert (await store.get_runtime('runtime_stale'))['status'] == 'stale'
released_run = await store.get_run('claimed_run')
assert released_run is not None
assert released_run['status'] == 'queued'
assert released_run['claimed_by_runtime_id'] is None
assert released_run['claim_token'] is None
@pytest.mark.asyncio
async def test_unconfigured_plugin_cannot_reconcile_runtime(db_engine):
handler = _handler(db_engine)
runtime_reconcile = handler.actions['runtime_reconcile']
result = await runtime_reconcile({'caller_plugin_identity': 'test/runner'})
assert result.code != 0
assert 'not authorized' in result.message.lower()
@pytest.mark.asyncio
async def test_disabled_admin_plugin_entry_does_not_grant_access(session_registry, db_engine):
await _register_session(session_registry, available_apis={})
@@ -124,6 +124,84 @@ async def test_expired_claim_can_be_reclaimed(store, db_engine):
assert reclaimed['dispatch_attempts'] == 2
@pytest.mark.asyncio
async def test_release_expired_claims_requeues_runs(store, db_engine):
await store.create_run(
run_id='run-expired-release',
event_id='evt-3',
binding_id='binding-1',
runner_id='runner-a',
status='queued',
queue_name='default',
)
await store.create_run(
run_id='run-active-claim',
event_id='evt-4',
binding_id='binding-1',
runner_id='runner-a',
status='queued',
queue_name='default',
)
expired_claim = await store.claim_next_run(runtime_id='runtime-a', queue_name='default', lease_seconds=60)
active_claim = await store.claim_next_run(runtime_id='runtime-b', queue_name='default', lease_seconds=60)
assert expired_claim is not None
assert active_claim is not None
session_factory = sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
async with session_factory() as session:
await session.execute(
sqlalchemy.update(AgentRun)
.where(AgentRun.run_id == 'run-expired-release')
.values(claim_lease_expires_at=datetime.datetime.now(UTC) - datetime.timedelta(seconds=1))
)
await session.commit()
released = await store.release_expired_claims()
assert [run['run_id'] for run in released] == ['run-expired-release']
assert released[0]['status'] == 'queued'
assert released[0]['status_reason'] == 'claim lease expired'
assert released[0]['claimed_by_runtime_id'] is None
assert released[0]['claim_token'] is None
assert released[0]['claim_lease_expires_at'] is None
active = await store.get_run('run-active-claim')
assert active is not None
assert active['status'] == 'claimed'
assert active['claim_token'] == active_claim['claim_token']
@pytest.mark.asyncio
async def test_append_audit_event_uses_next_sequence(store):
await store.create_run(
run_id='run-audit',
event_id='evt-5',
binding_id='binding-1',
runner_id='runner-a',
)
await store.append_event(
run_id='run-audit',
sequence=1,
event_type='message.completed',
data={'ok': True},
)
event = await store.append_audit_event(
run_id='run-audit',
event_type='admin.run_cancel',
data={'action': 'run_cancel'},
metadata={'permission': 'agent_run:admin'},
)
assert event is not None
assert event['sequence'] == 2
assert event['type'] == 'admin.run_cancel'
assert event['source'] == 'host'
assert event['data'] == {'action': 'run_cancel'}
assert event['metadata'] == {'permission': 'agent_run:admin'}
assert await store.append_audit_event(run_id='missing', event_type='admin.missing') is None
@pytest.mark.asyncio
async def test_runtime_register_heartbeat_list_and_mark_stale(store):
registered = await store.register_runtime(