mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-02 03:55:55 +00:00
fix(skill): harden mount/reload paths and HTTP errors against stale skill cache
The Box backends behave inconsistently when extra_mounts reference a missing host directory (nsjail aborts the entire sandbox start, Docker silently creates a root-owned empty dir on the host, E2B silently skips the upload). The cache in skill_mgr.skills is only refreshed on in-process mutations, so out-of-band changes — container rebuilds, manual rm in the box volume, anything the LangBot API didn't drive — leave a stale skill that later produces one of those bad mount paths. - box/service.py: build_skill_extra_mounts now filters skills whose package_root is not isdir on the LangBot-visible filesystem and logs a warning, instead of passing the bad mount through to the backend - skill/manager.py: reload_skills (Box path) drops skills whose package_root is missing on the LangBot-side filesystem before they reach the in-memory cache, with a summary warning - api/http/controller/groups/skills.py: file/CRUD handlers now also catch BoxError (RuntimeError subclass, previously slipping past ``except ValueError`` and surfacing as 500); list/get handlers gain a try/except so a transient Box RPC failure becomes a clean 400 instead of a stack trace Tests added for build_skill_extra_mounts (skip missing, skip empty, no skill manager) and SkillManager.reload_skills (drop missing on Box path). Full unit suite: 279 passed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,8 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import quart
|
import quart
|
||||||
|
|
||||||
|
from langbot_plugin.box.errors import BoxError
|
||||||
|
|
||||||
from .. import group
|
from .. import group
|
||||||
|
|
||||||
|
|
||||||
@@ -13,7 +15,10 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
@self.route('', methods=['GET', 'POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
@self.route('', methods=['GET', 'POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
||||||
async def list_or_create_skills() -> quart.Response:
|
async def list_or_create_skills() -> quart.Response:
|
||||||
if quart.request.method == 'GET':
|
if quart.request.method == 'GET':
|
||||||
skills = await self.ap.skill_service.list_skills()
|
try:
|
||||||
|
skills = await self.ap.skill_service.list_skills()
|
||||||
|
except (ValueError, BoxError) as exc:
|
||||||
|
return self.http_status(400, -1, str(exc))
|
||||||
return self.success(data={'skills': skills})
|
return self.success(data={'skills': skills})
|
||||||
|
|
||||||
data = await quart.request.json
|
data = await quart.request.json
|
||||||
@@ -23,13 +28,16 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
try:
|
try:
|
||||||
skill = await self.ap.skill_service.create_skill(data)
|
skill = await self.ap.skill_service.create_skill(data)
|
||||||
return self.success(data={'skill': skill})
|
return self.success(data={'skill': skill})
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
|
|
||||||
@self.route('/<skill_name>', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
@self.route('/<skill_name>', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
||||||
async def get_update_delete_skill(skill_name: str) -> quart.Response:
|
async def get_update_delete_skill(skill_name: str) -> quart.Response:
|
||||||
if quart.request.method == 'GET':
|
if quart.request.method == 'GET':
|
||||||
skill = await self.ap.skill_service.get_skill(skill_name)
|
try:
|
||||||
|
skill = await self.ap.skill_service.get_skill(skill_name)
|
||||||
|
except (ValueError, BoxError) as exc:
|
||||||
|
return self.http_status(400, -1, str(exc))
|
||||||
if not skill:
|
if not skill:
|
||||||
return self.http_status(404, -1, 'Skill not found')
|
return self.http_status(404, -1, 'Skill not found')
|
||||||
return self.success(data={'skill': skill})
|
return self.success(data={'skill': skill})
|
||||||
@@ -39,13 +47,13 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
try:
|
try:
|
||||||
skill = await self.ap.skill_service.update_skill(skill_name, data)
|
skill = await self.ap.skill_service.update_skill(skill_name, data)
|
||||||
return self.success(data={'skill': skill})
|
return self.success(data={'skill': skill})
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.ap.skill_service.delete_skill(skill_name)
|
await self.ap.skill_service.delete_skill(skill_name)
|
||||||
return self.success()
|
return self.success()
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
|
|
||||||
@self.route('/<skill_name>/files', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
@self.route('/<skill_name>/files', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
||||||
@@ -61,7 +69,7 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
include_hidden=include_hidden,
|
include_hidden=include_hidden,
|
||||||
)
|
)
|
||||||
return self.success(data=result)
|
return self.success(data=result)
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
|
|
||||||
@self.route(
|
@self.route(
|
||||||
@@ -73,7 +81,7 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
try:
|
try:
|
||||||
result = await self.ap.skill_service.read_skill_file(skill_name, path)
|
result = await self.ap.skill_service.read_skill_file(skill_name, path)
|
||||||
return self.success(data=result)
|
return self.success(data=result)
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
|
|
||||||
# PUT - write file
|
# PUT - write file
|
||||||
@@ -85,7 +93,7 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
try:
|
try:
|
||||||
result = await self.ap.skill_service.write_skill_file(skill_name, path, content)
|
result = await self.ap.skill_service.write_skill_file(skill_name, path, content)
|
||||||
return self.success(data=result)
|
return self.success(data=result)
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
|
|
||||||
@self.route('/<skill_name>/preview', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
@self.route('/<skill_name>/preview', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
|
||||||
@@ -109,7 +117,7 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
try:
|
try:
|
||||||
skill = await self.ap.skill_service.install_from_github(data)
|
skill = await self.ap.skill_service.install_from_github(data)
|
||||||
return self.success(data={'skills': skill})
|
return self.success(data={'skills': skill})
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return self.http_status(500, -1, f'Failed to install skill: {exc}')
|
return self.http_status(500, -1, f'Failed to install skill: {exc}')
|
||||||
@@ -128,7 +136,7 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
try:
|
try:
|
||||||
preview = await self.ap.skill_service.preview_install_from_github(data)
|
preview = await self.ap.skill_service.preview_install_from_github(data)
|
||||||
return self.success(data={'skills': preview})
|
return self.success(data={'skills': preview})
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return self.http_status(500, -1, f'Failed to preview skill: {exc}')
|
return self.http_status(500, -1, f'Failed to preview skill: {exc}')
|
||||||
@@ -147,7 +155,7 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
source_paths=form.getlist('source_paths'),
|
source_paths=form.getlist('source_paths'),
|
||||||
)
|
)
|
||||||
return self.success(data={'skills': skill})
|
return self.success(data={'skills': skill})
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return self.http_status(500, -1, f'Failed to install skill: {exc}')
|
return self.http_status(500, -1, f'Failed to install skill: {exc}')
|
||||||
@@ -164,7 +172,7 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
filename=file.filename or '',
|
filename=file.filename or '',
|
||||||
)
|
)
|
||||||
return self.success(data={'skills': preview})
|
return self.success(data={'skills': preview})
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return self.http_status(500, -1, f'Failed to preview skill: {exc}')
|
return self.http_status(500, -1, f'Failed to preview skill: {exc}')
|
||||||
@@ -178,5 +186,5 @@ class SkillsRouterGroup(group.RouterGroup):
|
|||||||
try:
|
try:
|
||||||
result = await self.ap.skill_service.scan_directory_async(path)
|
result = await self.ap.skill_service.scan_directory_async(path)
|
||||||
return self.success(data=result)
|
return self.success(data=result)
|
||||||
except ValueError as exc:
|
except (ValueError, BoxError) as exc:
|
||||||
return self.http_status(400, -1, str(exc))
|
return self.http_status(400, -1, str(exc))
|
||||||
|
|||||||
@@ -192,6 +192,15 @@ class BoxService:
|
|||||||
This ensures that when a container is first created it already has
|
This ensures that when a container is first created it already has
|
||||||
all skill packages mounted, regardless of which skill is currently
|
all skill packages mounted, regardless of which skill is currently
|
||||||
activated.
|
activated.
|
||||||
|
|
||||||
|
Skills whose ``package_root`` is missing or no longer a directory on
|
||||||
|
the LangBot-visible filesystem are skipped with a warning instead of
|
||||||
|
being passed through to the backend. Without this guard the three
|
||||||
|
backends behave inconsistently on a stale mount: nsjail refuses to
|
||||||
|
start the sandbox (failing every exec in the session), Docker
|
||||||
|
silently auto-creates a root-owned empty directory on the host, and
|
||||||
|
E2B silently skips the upload — none of which surfaces an
|
||||||
|
actionable error to the agent or operator.
|
||||||
"""
|
"""
|
||||||
skill_mgr = getattr(self.ap, 'skill_mgr', None)
|
skill_mgr = getattr(self.ap, 'skill_mgr', None)
|
||||||
if skill_mgr is None:
|
if skill_mgr is None:
|
||||||
@@ -205,6 +214,13 @@ class BoxService:
|
|||||||
package_root = str(skill_data.get('package_root', '') or '').strip()
|
package_root = str(skill_data.get('package_root', '') or '').strip()
|
||||||
if not package_root:
|
if not package_root:
|
||||||
continue
|
continue
|
||||||
|
if not os.path.isdir(package_root):
|
||||||
|
self.ap.logger.warning(
|
||||||
|
f'Skill "{skill_name}" package_root missing on filesystem '
|
||||||
|
f'({package_root}); skipping mount to prevent sandbox failures. '
|
||||||
|
f'The skill cache may be stale — consider reloading skills.'
|
||||||
|
)
|
||||||
|
continue
|
||||||
mounts.append(
|
mounts.append(
|
||||||
{
|
{
|
||||||
'host_path': package_root,
|
'host_path': package_root,
|
||||||
|
|||||||
@@ -50,11 +50,32 @@ class SkillManager:
|
|||||||
box_service = getattr(self.ap, 'box_service', None)
|
box_service = getattr(self.ap, 'box_service', None)
|
||||||
if box_service is not None and getattr(box_service, 'available', False):
|
if box_service is not None and getattr(box_service, 'available', False):
|
||||||
try:
|
try:
|
||||||
|
dropped = 0
|
||||||
for skill_data in await box_service.list_skills():
|
for skill_data in await box_service.list_skills():
|
||||||
skill_name = skill_data.get('name')
|
skill_name = skill_data.get('name')
|
||||||
if skill_name:
|
if not skill_name:
|
||||||
self.skills[skill_name] = skill_data
|
continue
|
||||||
self.ap.logger.info(f'Loaded {len(self.skills)} skills from Box runtime')
|
# Drop skills whose package_root is no longer visible on the
|
||||||
|
# LangBot-side filesystem (e.g. Box volume was rebuilt or the
|
||||||
|
# directory was deleted out-of-band). Keeping them in the cache
|
||||||
|
# would cause stale extra_mounts and confusing UI states.
|
||||||
|
package_root = str(skill_data.get('package_root', '') or '').strip()
|
||||||
|
if package_root and not os.path.isdir(package_root):
|
||||||
|
self.ap.logger.warning(
|
||||||
|
f'Skill "{skill_name}" reported by Box runtime but '
|
||||||
|
f'package_root missing on LangBot filesystem '
|
||||||
|
f'({package_root}); dropping from in-memory cache.'
|
||||||
|
)
|
||||||
|
dropped += 1
|
||||||
|
continue
|
||||||
|
self.skills[skill_name] = skill_data
|
||||||
|
if dropped:
|
||||||
|
self.ap.logger.warning(
|
||||||
|
f'Loaded {len(self.skills)} skills from Box runtime '
|
||||||
|
f'({dropped} dropped due to missing package_root).'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.ap.logger.info(f'Loaded {len(self.skills)} skills from Box runtime')
|
||||||
return
|
return
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.ap.logger.warning(f'Failed to load skills from Box runtime, falling back to local data: {exc}')
|
self.ap.logger.warning(f'Failed to load skills from Box runtime, falling back to local data: {exc}')
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|||||||
import asyncio
|
import asyncio
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
import os
|
import os
|
||||||
|
import tempfile
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
from unittest.mock import AsyncMock, Mock
|
from unittest.mock import AsyncMock, Mock
|
||||||
|
|
||||||
@@ -1222,3 +1223,63 @@ class TestBoxHostMountModeNone:
|
|||||||
mount_path='/project',
|
mount_path='/project',
|
||||||
workdir='/workspace',
|
workdir='/workspace',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildSkillExtraMounts:
|
||||||
|
"""Robustness of skill mount construction against a stale skill cache.
|
||||||
|
|
||||||
|
The three sandbox backends behave inconsistently when a skill's
|
||||||
|
package_root no longer exists on disk (nsjail aborts the whole sandbox
|
||||||
|
start, Docker silently auto-creates a root-owned empty directory, E2B
|
||||||
|
silently skips). Mount construction must filter these out up front so
|
||||||
|
the backend never sees a bad mount.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _make_service(self, logger, skills):
|
||||||
|
app = make_app(logger)
|
||||||
|
app.skill_mgr = SimpleNamespace(skills=skills)
|
||||||
|
client = Mock(spec=BoxRuntimeClient)
|
||||||
|
return BoxService(app, client=client)
|
||||||
|
|
||||||
|
def test_skips_skill_with_missing_package_root(self):
|
||||||
|
logger = Mock()
|
||||||
|
with tempfile.TemporaryDirectory() as live_dir:
|
||||||
|
skills = {
|
||||||
|
'alive': {'name': 'alive', 'package_root': live_dir},
|
||||||
|
'ghost': {'name': 'ghost', 'package_root': '/nonexistent/path/should/never/exist'},
|
||||||
|
}
|
||||||
|
service = self._make_service(logger, skills)
|
||||||
|
query = make_query()
|
||||||
|
|
||||||
|
mounts = service.build_skill_extra_mounts(query)
|
||||||
|
|
||||||
|
assert mounts == [
|
||||||
|
{
|
||||||
|
'host_path': live_dir,
|
||||||
|
'mount_path': '/workspace/.skills/alive',
|
||||||
|
'mode': 'rw',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Warning logged so operators can see what was dropped
|
||||||
|
assert any(
|
||||||
|
'ghost' in str(call.args[0]) and 'package_root missing' in str(call.args[0])
|
||||||
|
for call in logger.warning.call_args_list
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_skips_skill_with_empty_package_root(self):
|
||||||
|
logger = Mock()
|
||||||
|
skills = {
|
||||||
|
'no_root': {'name': 'no_root', 'package_root': ''},
|
||||||
|
'whitespace': {'name': 'whitespace', 'package_root': ' '},
|
||||||
|
}
|
||||||
|
service = self._make_service(logger, skills)
|
||||||
|
|
||||||
|
assert service.build_skill_extra_mounts(make_query()) == []
|
||||||
|
|
||||||
|
def test_returns_empty_when_no_skill_manager(self):
|
||||||
|
logger = Mock()
|
||||||
|
app = make_app(logger)
|
||||||
|
# no skill_mgr attribute
|
||||||
|
service = BoxService(app, client=Mock(spec=BoxRuntimeClient))
|
||||||
|
|
||||||
|
assert service.build_skill_extra_mounts(make_query()) == []
|
||||||
|
|||||||
@@ -78,6 +78,36 @@ class TestSkillManagerPackageLoading:
|
|||||||
assert skill_data['instructions'] == 'Updated instructions'
|
assert skill_data['instructions'] == 'Updated instructions'
|
||||||
assert skill_data['description'] == 'Second'
|
assert skill_data['description'] == 'Second'
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_reload_skills_drops_box_skills_with_missing_package_root(self):
|
||||||
|
"""When Box reports a skill whose package_root is gone from the
|
||||||
|
LangBot-visible filesystem, the cache must drop it instead of
|
||||||
|
keeping a stale entry that would later produce a bad mount."""
|
||||||
|
from langbot.pkg.skill.manager import SkillManager
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as live_dir:
|
||||||
|
ghost_dir = os.path.join(live_dir, '_does_not_exist')
|
||||||
|
box_service = SimpleNamespace(
|
||||||
|
available=True,
|
||||||
|
list_skills=AsyncMock(
|
||||||
|
return_value=[
|
||||||
|
_make_skill_data(name='alive', package_root=live_dir),
|
||||||
|
_make_skill_data(name='ghost', package_root=ghost_dir),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
ap = _make_ap()
|
||||||
|
ap.box_service = box_service
|
||||||
|
mgr = SkillManager(ap)
|
||||||
|
|
||||||
|
await mgr.reload_skills()
|
||||||
|
|
||||||
|
assert list(mgr.skills) == ['alive']
|
||||||
|
# Warning fired with the dropped skill name so operators can see it.
|
||||||
|
warning_messages = [str(call.args[0]) for call in ap.logger.warning.call_args_list]
|
||||||
|
assert any('ghost' in msg and 'package_root missing' in msg for msg in warning_messages)
|
||||||
|
|
||||||
|
|
||||||
class TestSkillActivationHelper:
|
class TestSkillActivationHelper:
|
||||||
"""Skill activation is now Tool-Call based.
|
"""Skill activation is now Tool-Call based.
|
||||||
|
|||||||
Reference in New Issue
Block a user