Merge remote-tracking branch 'origin/master' into refactor/eba

# Conflicts:
#	pyproject.toml
#	uv.lock
This commit is contained in:
Junyan Qin
2026-06-11 01:05:14 +08:00
375 changed files with 64717 additions and 6102 deletions
@@ -0,0 +1,22 @@
from __future__ import annotations
from .. import group
@group.group_class('box', '/api/v1/box')
class BoxRouterGroup(group.RouterGroup):
async def initialize(self) -> None:
@self.route('/status', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
status = await self.ap.box_service.get_status()
return self.success(data=status)
@self.route('/sessions', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
sessions = await self.ap.box_service.get_sessions()
return self.success(data=sessions)
@self.route('/errors', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
errors = self.ap.box_service.get_recent_errors()
return self.success(data=errors)
@@ -0,0 +1,52 @@
from __future__ import annotations
import asyncio
import quart
from .. import group
@group.group_class('extensions', '/api/v1/extensions')
class ExtensionsRouterGroup(group.RouterGroup):
"""Unified API for installed extensions (plugins, MCP servers, skills)."""
async def initialize(self) -> None:
@self.route('', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def _() -> quart.Response:
plugins, mcp_servers, skills = await asyncio.gather(
self.ap.plugin_connector.list_plugins(),
self.ap.mcp_service.get_mcp_servers(contain_runtime_info=True),
self.ap.skill_service.list_skills(),
return_exceptions=True,
)
def _sort_key(item: dict) -> str:
if item['type'] == 'plugin':
return (
item['plugin']
.get('manifest', {})
.get('manifest', {})
.get('metadata', {})
.get('name', '')
.lower()
)
if item['type'] == 'mcp':
return (item['server'].get('name') or '').lower()
if item['type'] == 'skill':
return (item['skill'].get('display_name') or item['skill'].get('name') or '').lower()
return ''
extensions: list[dict] = []
if isinstance(plugins, list):
for plugin in plugins:
extensions.append({'type': 'plugin', 'plugin': plugin})
if isinstance(mcp_servers, list):
for server in mcp_servers:
extensions.append({'type': 'mcp', 'server': server})
if isinstance(skills, list):
for skill in skills:
extensions.append({'type': 'skill', 'skill': skill})
extensions.sort(key=_sort_key)
return self.success(data={'extensions': extensions})
@@ -73,15 +73,21 @@ class PipelinesRouterGroup(group.RouterGroup):
plugins = await self.ap.plugin_connector.list_plugins(component_kinds=pipeline_component_kinds)
mcp_servers = await self.ap.mcp_service.get_mcp_servers(contain_runtime_info=True)
# Get available skills
available_skills = await self.ap.skill_service.list_skills()
extensions_prefs = pipeline.get('extensions_preferences', {})
return self.success(
data={
'enable_all_plugins': extensions_prefs.get('enable_all_plugins', True),
'enable_all_mcp_servers': extensions_prefs.get('enable_all_mcp_servers', True),
'enable_all_skills': extensions_prefs.get('enable_all_skills', True),
'bound_plugins': extensions_prefs.get('plugins', []),
'available_plugins': plugins,
'bound_mcp_servers': extensions_prefs.get('mcp_servers', []),
'available_mcp_servers': mcp_servers,
'bound_skills': extensions_prefs.get('skills', []),
'available_skills': available_skills,
}
)
elif quart.request.method == 'PUT':
@@ -89,11 +95,19 @@ class PipelinesRouterGroup(group.RouterGroup):
json_data = await quart.request.json
enable_all_plugins = json_data.get('enable_all_plugins', True)
enable_all_mcp_servers = json_data.get('enable_all_mcp_servers', True)
enable_all_skills = json_data.get('enable_all_skills', True)
bound_plugins = json_data.get('bound_plugins', [])
bound_mcp_servers = json_data.get('bound_mcp_servers', [])
bound_skills = json_data.get('bound_skills', [])
await self.ap.pipeline_service.update_pipeline_extensions(
pipeline_uuid, bound_plugins, bound_mcp_servers, enable_all_plugins, enable_all_mcp_servers
pipeline_uuid,
bound_plugins,
bound_mcp_servers,
enable_all_plugins,
enable_all_mcp_servers,
bound_skills=bound_skills,
enable_all_skills=enable_all_skills,
)
return self.success()
@@ -43,8 +43,12 @@ class WebSocketChatRouterGroup(group.RouterGroup):
await quart.websocket.send(json.dumps({'type': 'error', 'message': 'WebSocket adapter not found'}))
return
# Find the owning bot for this pipeline (e.g. a web_page_bot)
owner_bot = self._find_owner_bot(pipeline_uuid)
# Dashboard pipeline-debug sessions must always run under the
# built-in websocket_proxy_bot identity. We deliberately do NOT
# resolve a web_page_bot owner here — even if one is bound to
# the same pipeline, debug requests must not be attributed to
# it. The embed widget path (`/api/v1/embed/<bot>/ws/connect`)
# is the one that carries the page-bot identity.
# 注册连接
connection = await ws_connection_manager.add_connection(
@@ -73,7 +77,7 @@ class WebSocketChatRouterGroup(group.RouterGroup):
)
# 创建接收和发送任务
receive_task = asyncio.create_task(self._handle_receive(connection, websocket_adapter, owner_bot))
receive_task = asyncio.create_task(self._handle_receive(connection, websocket_adapter))
send_task = asyncio.create_task(self._handle_send(connection))
# 等待任务完成
@@ -181,14 +185,7 @@ class WebSocketChatRouterGroup(group.RouterGroup):
except Exception as e:
return self.http_status(500, -1, f'Internal server error: {str(e)}')
def _find_owner_bot(self, pipeline_uuid: str):
"""Find a user-created bot (e.g. web_page_bot) that owns this pipeline."""
for bot in self.ap.platform_mgr.bots:
if bot.bot_entity.adapter == 'web_page_bot' and bot.bot_entity.use_pipeline_uuid == pipeline_uuid:
return bot
return None
async def _handle_receive(self, connection, websocket_adapter, owner_bot=None):
async def _handle_receive(self, connection, websocket_adapter):
"""处理接收消息的任务"""
try:
while connection.is_active:
@@ -213,7 +210,10 @@ class WebSocketChatRouterGroup(group.RouterGroup):
logger.debug(f'收到消息: {data} from {connection.connection_id}')
# 处理消息(不等待响应,响应会通过broadcast异步发送)
await websocket_adapter.handle_websocket_message(connection, data, owner_bot=owner_bot)
# owner_bot is intentionally NOT passed: the dashboard
# debug WebSocket must always run under the proxy bot,
# never under a coincidentally-bound web_page_bot.
await websocket_adapter.handle_websocket_message(connection, data)
elif message_type == 'disconnect':
# 客户端主动断开
@@ -1,5 +1,6 @@
import quart
import mimetypes
import asyncio
from ... import group
from langbot.pkg.utils import importutil
@@ -35,3 +36,617 @@ class AdaptersRouterGroup(group.RouterGroup):
return quart.Response(
importutil.read_resource_file_bytes(icon_path), mimetype=mimetypes.guess_type(icon_path)[0]
)
# In-memory session store for active registrations
_create_app_sessions: dict = {}
_SESSION_TTL = 900 # 15 minutes
def _cleanup_expired_sessions():
"""Remove sessions that have exceeded their TTL."""
import time
now = time.time()
expired = [sid for sid, s in _create_app_sessions.items() if now - s.get('created_at', 0) > _SESSION_TTL]
for sid in expired:
session = _create_app_sessions.pop(sid, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
@self.route('/lark/create-app', methods=['POST'])
async def _() -> str:
"""Start Feishu one-click app registration. Returns session_id + QR code URL."""
import uuid
import time
import lark_oapi as lark
from lark_oapi.scene.registration.errors import AppAccessDeniedError, AppExpiredError
_cleanup_expired_sessions()
session_id = str(uuid.uuid4())
loop = asyncio.get_running_loop()
session = {
'status': 'pending',
'qr_url': None,
'expire_at': None,
'app_id': None,
'app_secret': None,
'error': None,
'created_at': time.time(),
}
_create_app_sessions[session_id] = session
def on_qr_code(info):
# May be called from a background thread by the SDK;
# use call_soon_threadsafe to safely update session state.
def _update():
session['qr_url'] = info['url']
session['expire_at'] = time.time() + 600 # 10 minutes
session['status'] = 'waiting'
loop.call_soon_threadsafe(_update)
async def run_registration():
try:
result = await lark.aregister_app(
on_qr_code=on_qr_code,
source='langbot',
)
session['status'] = 'success'
session['app_id'] = result['client_id']
session['app_secret'] = result['client_secret']
except AppAccessDeniedError:
session['status'] = 'error'
session['error'] = 'User denied authorization'
except AppExpiredError:
session['status'] = 'error'
session['error'] = 'QR code expired'
except Exception as e:
session['status'] = 'error'
session['error'] = str(e)
task = asyncio.create_task(run_registration())
session['task'] = task
# Wait for QR code to be ready (max 10 seconds)
for _ in range(20):
if session['qr_url']:
break
await asyncio.sleep(0.5)
if not session['qr_url']:
task.cancel()
session['status'] = 'error'
session['error'] = 'Timeout waiting for QR code'
return self.http_status(504, -1, 'Timeout waiting for QR code')
return self.success(
data={
'session_id': session_id,
'qr_url': session['qr_url'],
'expire_at': session['expire_at'],
}
)
@self.route('/lark/create-app/status/<session_id>', methods=['GET'])
async def _(session_id: str) -> str:
"""Poll registration status."""
session = _create_app_sessions.get(session_id)
if not session:
return self.http_status(404, -1, 'Session not found')
data = {'status': session['status']}
if session['status'] == 'success':
data['app_id'] = session['app_id']
data['app_secret'] = session['app_secret']
_create_app_sessions.pop(session_id, None)
elif session['status'] == 'error':
data['error'] = session['error']
_create_app_sessions.pop(session_id, None)
return self.success(data=data)
@self.route('/lark/create-app/<session_id>', methods=['DELETE'])
async def _(session_id: str) -> str:
"""Cancel and clean up a registration session."""
session = _create_app_sessions.pop(session_id, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
return self.success(data={})
# -----------------------------------------------------------------------
# WeChat QR Code Login
# -----------------------------------------------------------------------
_weixin_login_sessions: dict = {}
_WEIXIN_SESSION_TTL = 600 # 10 minutes (3 retries × 3 min QR validity)
def _cleanup_expired_weixin_sessions():
import time
now = time.time()
expired = [
sid for sid, s in _weixin_login_sessions.items() if now - s.get('created_at', 0) > _WEIXIN_SESSION_TTL
]
for sid in expired:
session = _weixin_login_sessions.pop(sid, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
@self.route('/weixin/login', methods=['POST'])
async def _() -> str:
"""Start WeChat QR code login. Returns session_id + QR code data URL."""
import uuid
import time
from langbot.libs.openclaw_weixin_api.client import OpenClawWeixinClient, DEFAULT_BASE_URL
_cleanup_expired_weixin_sessions()
session_id = str(uuid.uuid4())
loop = asyncio.get_running_loop()
session = {
'status': 'pending',
'qr_data_url': None,
'expire_at': None,
'token': None,
'base_url': None,
'account_id': None,
'error': None,
'created_at': time.time(),
}
_weixin_login_sessions[session_id] = session
client = OpenClawWeixinClient(
base_url=DEFAULT_BASE_URL,
token='',
)
async def run_login():
try:
def on_qrcode(qr_data_url: str, _qr_url: str):
def _update():
session['qr_data_url'] = qr_data_url
session['expire_at'] = time.time() + 180
session['status'] = 'waiting'
loop.call_soon_threadsafe(_update)
result = await client.login(
max_retries=1,
poll_timeout_ms=180_000,
on_qrcode=on_qrcode,
)
session['status'] = 'success'
session['token'] = result.token
session['base_url'] = result.base_url
session['account_id'] = result.account_id
except Exception as e:
error_message = str(e)
if 'expired' in error_message.lower() or 'max retries exceeded' in error_message.lower():
session['status'] = 'expired'
session['error'] = 'QR code expired'
else:
session['status'] = 'error'
session['error'] = error_message
finally:
await client.close()
task = asyncio.create_task(run_login())
session['task'] = task
# Wait for QR code to be ready (max 10 seconds)
for _ in range(20):
if session['qr_data_url']:
break
await asyncio.sleep(0.5)
if not session['qr_data_url']:
task.cancel()
session['status'] = 'error'
session['error'] = 'Timeout waiting for QR code'
return self.http_status(504, -1, 'Timeout waiting for QR code')
return self.success(
data={
'session_id': session_id,
'qr_data_url': session['qr_data_url'],
'expire_at': session['expire_at'],
}
)
@self.route('/weixin/login/status/<session_id>', methods=['GET'])
async def _(session_id: str) -> str:
"""Poll WeChat login status."""
session = _weixin_login_sessions.get(session_id)
if not session:
return self.http_status(404, -1, 'Session not found')
data = {
'status': session['status'],
'qr_data_url': session['qr_data_url'],
'expire_at': session['expire_at'],
}
if session['status'] == 'success':
data['token'] = session['token']
data['base_url'] = session['base_url']
data['account_id'] = session['account_id']
_weixin_login_sessions.pop(session_id, None)
elif session['status'] == 'error':
data['error'] = session['error']
_weixin_login_sessions.pop(session_id, None)
elif session['status'] == 'expired':
data['error'] = session['error']
_weixin_login_sessions.pop(session_id, None)
return self.success(data=data)
@self.route('/weixin/login/<session_id>', methods=['DELETE'])
async def _(session_id: str) -> str:
"""Cancel and clean up a WeChat login session."""
session = _weixin_login_sessions.pop(session_id, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
return self.success(data={})
# -----------------------------------------------------------------------
# DingTalk Device Flow QR Code Login
# -----------------------------------------------------------------------
_dingtalk_sessions: dict = {}
_DINGTALK_SESSION_TTL = 600 # 10 minutes (QR code validity window)
def _cleanup_expired_dingtalk_sessions():
import time
now = time.time()
expired = [
sid for sid, s in _dingtalk_sessions.items() if now - s.get('created_at', 0) > _DINGTALK_SESSION_TTL
]
for sid in expired:
session = _dingtalk_sessions.pop(sid, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
@self.route('/dingtalk/create-app', methods=['POST'])
async def _() -> str:
"""Start DingTalk one-click app creation via Device Flow. Returns session_id + QR code URL."""
import uuid
import time
import aiohttp
DINGTALK_BASE_URL = 'https://oapi.dingtalk.com'
_cleanup_expired_dingtalk_sessions()
session_id = str(uuid.uuid4())
session = {
'status': 'pending',
'qr_url': None,
'expire_at': None,
'client_id': None,
'client_secret': None,
'error': None,
'created_at': time.time(),
'device_code': None,
'interval': 5,
}
_dingtalk_sessions[session_id] = session
async def run_device_flow():
try:
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as http:
# Step 1: Init — get nonce
async with http.post(
f'{DINGTALK_BASE_URL}/app/registration/init',
json={'source': 'langbot'},
) as resp:
try:
data = await resp.json()
except (aiohttp.ContentTypeError, ValueError):
session['status'] = 'error'
session['error'] = 'Invalid response from DingTalk service'
return
if data.get('errcode', -1) != 0:
session['status'] = 'error'
session['error'] = data.get('errmsg', 'Failed to init')
return
nonce = data['nonce']
# Step 2: Begin — get device_code + QR URL
async with http.post(
f'{DINGTALK_BASE_URL}/app/registration/begin',
json={'nonce': nonce},
) as resp:
try:
data = await resp.json()
except (aiohttp.ContentTypeError, ValueError):
session['status'] = 'error'
session['error'] = 'Invalid response from DingTalk service'
return
if data.get('errcode', -1) != 0:
session['status'] = 'error'
session['error'] = data.get('errmsg', 'Failed to begin authorization')
return
device_code = data['device_code']
verification_uri_complete = data.get('verification_uri_complete', '')
expires_in = data.get('expires_in', 7200)
interval = data.get('interval', 5)
session['device_code'] = device_code
session['interval'] = interval
session['qr_url'] = verification_uri_complete
session['expire_at'] = time.time() + 600 # QR code valid for ~10 min
session['status'] = 'waiting'
# Step 3: Poll for authorization result
deadline = time.time() + expires_in
while time.time() < deadline:
await asyncio.sleep(interval)
async with http.post(
f'{DINGTALK_BASE_URL}/app/registration/poll',
json={'device_code': device_code},
) as poll_resp:
try:
poll_data = await poll_resp.json()
except (aiohttp.ContentTypeError, ValueError):
continue
if poll_data.get('errcode', -1) != 0:
session['status'] = 'error'
session['error'] = poll_data.get('errmsg', 'Poll failed')
return
status = poll_data.get('status', '')
if status == 'SUCCESS':
session['status'] = 'success'
session['client_id'] = poll_data.get('client_id', '')
session['client_secret'] = poll_data.get('client_secret', '')
return
elif status == 'FAIL':
session['status'] = 'error'
session['error'] = poll_data.get('fail_reason', 'Authorization failed')
return
elif status == 'EXPIRED':
session['status'] = 'error'
session['error'] = 'QR code expired'
return
# status == 'WAITING': continue polling
# Timeout
session['status'] = 'error'
session['error'] = 'QR code expired'
except asyncio.CancelledError:
return
except Exception as e:
session['status'] = 'error'
session['error'] = str(e)
task = asyncio.create_task(run_device_flow())
session['task'] = task
# Wait for QR code to be ready (max 10 seconds)
for _ in range(20):
if session['qr_url'] or session['error']:
break
await asyncio.sleep(0.5)
if session['error']:
task.cancel()
return self.http_status(502, -1, session['error'])
if not session['qr_url']:
task.cancel()
session['status'] = 'error'
session['error'] = 'Timeout waiting for QR code'
return self.http_status(504, -1, 'Timeout waiting for QR code')
return self.success(
data={
'session_id': session_id,
'qr_url': session['qr_url'],
'expire_at': session['expire_at'],
}
)
@self.route('/dingtalk/create-app/status/<session_id>', methods=['GET'])
async def _(session_id: str) -> str:
"""Poll DingTalk Device Flow status."""
_cleanup_expired_dingtalk_sessions()
session = _dingtalk_sessions.get(session_id)
if not session:
return self.http_status(404, -1, 'Session not found')
data = {'status': session['status']}
if session['status'] == 'success':
data['client_id'] = session['client_id']
data['client_secret'] = session['client_secret']
_dingtalk_sessions.pop(session_id, None)
elif session['status'] == 'error':
data['error'] = session['error']
_dingtalk_sessions.pop(session_id, None)
return self.success(data=data)
@self.route('/dingtalk/create-app/<session_id>', methods=['DELETE'])
async def _(session_id: str) -> str:
"""Cancel and clean up a DingTalk Device Flow session."""
session = _dingtalk_sessions.pop(session_id, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
return self.success(data={})
# -----------------------------------------------------------------------
# WeComBot QR Code One-Click Create
# -----------------------------------------------------------------------
_wecombot_sessions: dict = {}
_WECOMBOT_SESSION_TTL = 300 # 5 minutes (WeCom QR validity window)
def _cleanup_expired_wecombot_sessions():
import time
now = time.time()
expired = [
sid for sid, s in _wecombot_sessions.items() if now - s.get('created_at', 0) > _WECOMBOT_SESSION_TTL
]
for sid in expired:
session = _wecombot_sessions.pop(sid, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
@self.route('/wecombot/create-bot', methods=['POST'])
async def _() -> str:
"""Start WeComBot one-click creation via QR code. Returns session_id + QR code URL."""
import uuid
import time
import aiohttp
WECOM_QC_GENERATE_URL = 'https://work.weixin.qq.com/ai/qc/generate'
WECOM_QC_QUERY_URL = 'https://work.weixin.qq.com/ai/qc/query_result'
_cleanup_expired_wecombot_sessions()
session_id = str(uuid.uuid4())
session = {
'status': 'pending',
'qr_url': None,
'expire_at': None,
'botid': None,
'secret': None,
'error': None,
'created_at': time.time(),
'scode': None,
'task': None,
}
_wecombot_sessions[session_id] = session
async def run_qr_flow():
try:
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as http:
# Step 1: Generate QR code
async with http.get(
f'{WECOM_QC_GENERATE_URL}?source=langbot&plat=0',
) as resp:
try:
data = await resp.json()
except (aiohttp.ContentTypeError, ValueError):
session['status'] = 'error'
session['error'] = 'Invalid response from WeCom service'
return
if not data.get('data', {}).get('scode') or not data.get('data', {}).get('auth_url'):
session['status'] = 'error'
session['error'] = data.get('errmsg', 'Failed to generate QR code')
return
scode = data['data']['scode']
auth_url = data['data']['auth_url']
session['scode'] = scode
session['qr_url'] = auth_url
session['expire_at'] = time.time() + _WECOMBOT_SESSION_TTL
session['status'] = 'waiting'
# Step 2: Poll for scan result
deadline = time.time() + _WECOMBOT_SESSION_TTL
while time.time() < deadline:
await asyncio.sleep(3)
async with http.get(
f'{WECOM_QC_QUERY_URL}?scode={scode}',
) as poll_resp:
try:
poll_data = await poll_resp.json()
except (aiohttp.ContentTypeError, ValueError):
continue
status = poll_data.get('data', {}).get('status', '')
if status == 'success':
bot_info = poll_data.get('data', {}).get('bot_info', {})
if bot_info.get('botid') and bot_info.get('secret'):
session['status'] = 'success'
session['botid'] = bot_info['botid']
session['secret'] = bot_info['secret']
return
else:
session['status'] = 'error'
session['error'] = 'Scan succeeded but bot info is incomplete'
return
# Timeout
session['status'] = 'error'
session['error'] = 'QR code expired'
except asyncio.CancelledError:
return
except Exception as e:
session['status'] = 'error'
session['error'] = str(e)
task = asyncio.create_task(run_qr_flow())
session['task'] = task
# Wait for QR code to be ready (max 10 seconds)
for _ in range(20):
if session['qr_url'] or session['error']:
break
await asyncio.sleep(0.5)
if session['error']:
task.cancel()
return self.http_status(502, -1, session['error'])
if not session['qr_url']:
task.cancel()
session['status'] = 'error'
session['error'] = 'Timeout waiting for QR code'
return self.http_status(504, -1, 'Timeout waiting for QR code')
return self.success(
data={
'session_id': session_id,
'qr_url': session['qr_url'],
'expire_at': session['expire_at'],
}
)
@self.route('/wecombot/create-bot/status/<session_id>', methods=['GET'])
async def _(session_id: str) -> str:
"""Poll WeComBot creation status."""
_cleanup_expired_wecombot_sessions()
session = _wecombot_sessions.get(session_id)
if not session:
return self.http_status(404, -1, 'Session not found')
data = {'status': session['status']}
if session['status'] == 'success':
data['botid'] = session['botid']
data['secret'] = session['secret']
_wecombot_sessions.pop(session_id, None)
elif session['status'] == 'error':
data['error'] = session['error']
_wecombot_sessions.pop(session_id, None)
return self.success(data=data)
@self.route('/wecombot/create-bot/<session_id>', methods=['DELETE'])
async def _(session_id: str) -> str:
"""Cancel and clean up a WeComBot creation session."""
session = _wecombot_sessions.pop(session_id, None)
if session and session.get('task') and not session['task'].done():
session['task'].cancel()
return self.success(data={})
@@ -1,14 +1,20 @@
from __future__ import annotations
import base64
import io
import quart
import re
import httpx
import uuid
import os
import zipfile
import yaml
from urllib.parse import urlparse
import posixpath
import sqlalchemy
from .....core import taskmgr
from .....entity.persistence import plugin as persistence_plugin
from .. import group
from langbot_plugin.runtime.plugin.mgr import PluginInstallSource
@@ -39,8 +45,109 @@ def _normalize_plugin_asset_path(filepath: str) -> str | None:
return f'assets/{normalized}'
def _get_request_origin() -> str:
"""Return the public request origin, respecting reverse-proxy headers."""
forwarded_proto = quart.request.headers.get('X-Forwarded-Proto', '').split(',')[0].strip()
forwarded_host = quart.request.headers.get('X-Forwarded-Host', '').split(',')[0].strip()
scheme = forwarded_proto or quart.request.scheme
host = forwarded_host or quart.request.host
return f'{scheme}://{host}'
@group.group_class('plugins', '/api/v1/plugins')
class PluginsRouterGroup(group.RouterGroup):
@staticmethod
def _normalize_archive_path(path: str) -> str:
normalized = str(path or '').replace('\\', '/').strip('/')
return posixpath.normpath(normalized) if normalized else ''
@classmethod
def _component_source_path(cls, entry) -> str:
if isinstance(entry, dict):
return cls._normalize_archive_path(entry.get('path') or '')
return cls._normalize_archive_path(str(entry or ''))
@classmethod
def _count_component_configs(cls, component_config, archive_names: list[str]) -> int:
normalized_names = [cls._normalize_archive_path(name) for name in archive_names]
component_files: set[str] = set()
if isinstance(component_config, list):
return len(component_config)
if not isinstance(component_config, dict):
return 1 if component_config else 0
for entry in component_config.get('fromFiles') or []:
source_path = cls._component_source_path(entry)
if source_path and source_path in normalized_names:
component_files.add(source_path)
for entry in component_config.get('fromDirs') or []:
source_dir = cls._component_source_path(entry).rstrip('/')
if not source_dir:
continue
prefix = f'{source_dir}/'
for archive_name in normalized_names:
if not archive_name.startswith(prefix):
continue
if archive_name.lower().endswith(('.yaml', '.yml')):
component_files.add(archive_name)
if component_files:
return len(component_files)
return 1 if any(key in component_config for key in ('path', 'name', 'kind')) else 0
@classmethod
def _count_plugin_components(cls, components, archive_names: list[str]) -> dict[str, int]:
if not isinstance(components, dict):
return {}
component_counts: dict[str, int] = {}
for kind, component_config in components.items():
count = cls._count_component_configs(component_config, archive_names)
if count > 0:
component_counts[str(kind)] = count
return component_counts
@staticmethod
def _parse_github_repo_url(repo_url: str) -> dict | None:
raw_url = str(repo_url or '').strip()
if not raw_url:
return None
if not re.match(r'^[a-zA-Z][a-zA-Z0-9+.-]*://', raw_url):
raw_url = f'https://{raw_url}'
parsed = urlparse(raw_url)
if parsed.netloc.lower() not in ('github.com', 'www.github.com'):
return None
parts = [part for part in parsed.path.strip('/').split('/') if part]
if len(parts) < 2:
return None
owner = parts[0]
repo = parts[1]
if repo.endswith('.git'):
repo = repo[:-4]
if not owner or not repo:
return None
ref = ''
subdir = ''
if len(parts) >= 4 and parts[2] in ('tree', 'blob'):
ref = parts[3]
subdir = '/'.join(parts[4:]).strip('/')
return {
'owner': owner,
'repo': repo,
'ref': ref,
'subdir': subdir,
}
async def _check_extensions_limit(self) -> str | None:
"""Check if extensions limit is reached. Returns error response if limit exceeded, None otherwise."""
limitation = self.ap.instance_config.data.get('system', {}).get('limitation', {})
@@ -138,7 +245,15 @@ class PluginsRouterGroup(group.RouterGroup):
return self.http_status(404, -1, 'plugin not found')
if quart.request.method == 'GET':
return self.success(data={'config': plugin['plugin_config']})
result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_plugin.PluginSetting.config)
.where(persistence_plugin.PluginSetting.plugin_author == author)
.where(persistence_plugin.PluginSetting.plugin_name == plugin_name)
)
persisted_config = result.scalar_one_or_none()
config = persisted_config if persisted_config is not None else plugin['plugin_config']
return self.success(data={'config': config})
elif quart.request.method == 'PUT':
data = await quart.request.json
@@ -189,7 +304,7 @@ class PluginsRouterGroup(group.RouterGroup):
# CSP for HTML pages served to sandboxed iframes (opaque origin).
# 'self' doesn't work in sandboxed iframes — use actual server origin.
if mime_type and mime_type.startswith('text/html'):
origin = f'{quart.request.scheme}://{quart.request.host}'
origin = _get_request_origin()
resp.headers['Content-Security-Policy'] = (
f'default-src {origin}; '
f"script-src {origin} 'unsafe-inline'; "
@@ -234,17 +349,37 @@ class PluginsRouterGroup(group.RouterGroup):
data = await quart.request.json
repo_url = data.get('repo_url', '')
# Parse GitHub repository URL to extract owner and repo
# Supports: https://github.com/owner/repo or github.com/owner/repo
pattern = r'github\.com/([^/]+)/([^/]+?)(?:\.git)?(?:/.*)?$'
match = re.search(pattern, repo_url)
if not match:
parsed_repo = self._parse_github_repo_url(repo_url)
if not parsed_repo:
return self.http_status(400, -1, 'Invalid GitHub repository URL')
owner, repo = match.groups()
owner = parsed_repo['owner']
repo = parsed_repo['repo']
requested_ref = parsed_repo['ref']
requested_subdir = parsed_repo['subdir']
try:
if requested_ref:
return self.success(
data={
'releases': [
{
'id': 0,
'tag_name': requested_ref,
'name': requested_ref,
'published_at': '',
'prerelease': False,
'draft': False,
'source_type': 'branch',
'archive_url': f'https://api.github.com/repos/{owner}/{repo}/zipball/{requested_ref}',
}
],
'owner': owner,
'repo': repo,
'source_subdir': requested_subdir,
}
)
# Fetch releases from GitHub API
url = f'https://api.github.com/repos/{owner}/{repo}/releases'
async with httpx.AsyncClient(
@@ -270,7 +405,14 @@ class PluginsRouterGroup(group.RouterGroup):
}
)
return self.success(data={'releases': formatted_releases, 'owner': owner, 'repo': repo})
return self.success(
data={
'releases': formatted_releases,
'owner': owner,
'repo': repo,
'source_subdir': requested_subdir,
}
)
except httpx.RequestError as e:
return self.http_status(500, -1, f'Failed to fetch releases: {str(e)}')
@@ -425,6 +567,62 @@ class PluginsRouterGroup(group.RouterGroup):
return self.success(data={'task_id': wrapper.id})
@self.route('/install/local/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def _() -> str:
file = (await quart.request.files).get('file')
if file is None:
return self.http_status(400, -1, 'file is required')
file_bytes = file.read()
try:
with zipfile.ZipFile(io.BytesIO(file_bytes)) as zf:
names = [name for name in zf.namelist() if not name.endswith('/')]
manifest_name = next(
(
name
for name in names
if name.replace('\\', '/').strip('/').lower() in ('manifest.yaml', 'manifest.yml')
),
None,
)
if manifest_name is None:
return self.http_status(400, -1, 'manifest.yaml is required')
manifest = yaml.safe_load(zf.read(manifest_name).decode('utf-8')) or {}
requirements: list[str] = []
requirements_name = next(
(name for name in names if name.replace('\\', '/').strip('/').lower() == 'requirements.txt'),
None,
)
if requirements_name is not None:
requirements = [
line.strip()
for line in zf.read(requirements_name).decode('utf-8', errors='ignore').splitlines()
if line.strip() and not line.strip().startswith('#')
]
spec = manifest.get('spec') or {}
components = spec.get('components') or {}
component_counts = self._count_plugin_components(components, names)
component_types = list(component_counts.keys())
return self.success(
data={
'filename': file.filename or 'local plugin',
'size': len(file_bytes),
'manifest': manifest,
'metadata': manifest.get('metadata') or {},
'component_types': component_types,
'component_counts': component_counts,
'requirements': requirements,
'file_count': len(names),
}
)
except zipfile.BadZipFile:
return self.http_status(400, -1, 'invalid .lbpkg file')
except Exception as exc:
return self.http_status(500, -1, f'Failed to preview plugin package: {exc}')
@self.route('/config-files', methods=['POST'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
"""Upload a file for plugin configuration"""
@@ -31,6 +31,9 @@ class MCPRouterGroup(group.RouterGroup):
@self.route('/servers/<server_name>', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN)
async def _(server_name: str) -> str:
"""获取、更新或删除MCP服务器配置"""
from urllib.parse import unquote
server_name = unquote(server_name)
server_data = await self.ap.mcp_service.get_mcp_server_by_name(server_name)
if server_data is None:
@@ -57,6 +60,9 @@ class MCPRouterGroup(group.RouterGroup):
@self.route('/servers/<server_name>/test', methods=['POST'], auth_type=group.AuthType.USER_TOKEN)
async def _(server_name: str) -> str:
"""测试MCP服务器连接"""
from urllib.parse import unquote
server_name = unquote(server_name)
server_data = await quart.request.json
task_id = await self.ap.mcp_service.test_mcp_server(server_name=server_name, server_data=server_data)
return self.success(data={'task_id': task_id})
@@ -0,0 +1,190 @@
from __future__ import annotations
import quart
from langbot_plugin.box.errors import BoxError
from .. import group
@group.group_class('skills', '/api/v1/skills')
class SkillsRouterGroup(group.RouterGroup):
"""Skills management API endpoints."""
async def initialize(self) -> None:
@self.route('', methods=['GET', 'POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def list_or_create_skills() -> quart.Response:
if quart.request.method == 'GET':
try:
skills = await self.ap.skill_service.list_skills()
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
return self.success(data={'skills': skills})
data = await quart.request.json
if 'name' not in data or not data['name']:
return self.http_status(400, -1, 'Missing required field: name')
try:
skill = await self.ap.skill_service.create_skill(data)
return self.success(data={'skill': skill})
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
@self.route('/<skill_name>', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def get_update_delete_skill(skill_name: str) -> quart.Response:
if quart.request.method == 'GET':
try:
skill = await self.ap.skill_service.get_skill(skill_name)
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
if not skill:
return self.http_status(404, -1, 'Skill not found')
return self.success(data={'skill': skill})
if quart.request.method == 'PUT':
data = await quart.request.json
try:
skill = await self.ap.skill_service.update_skill(skill_name, data)
return self.success(data={'skill': skill})
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
try:
await self.ap.skill_service.delete_skill(skill_name)
return self.success()
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
@self.route('/<skill_name>/files', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def list_skill_files(skill_name: str) -> quart.Response:
"""List files in skill package directory."""
path = quart.request.args.get('path', '.').strip()
include_hidden = quart.request.args.get('include_hidden', 'false').lower() == 'true'
try:
result = await self.ap.skill_service.list_skill_files(
skill_name,
path=path,
include_hidden=include_hidden,
)
return self.success(data=result)
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
@self.route(
'/<skill_name>/files/<path:path>', methods=['GET', 'PUT'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY
)
async def read_or_write_skill_file(skill_name: str, path: str) -> quart.Response:
"""Read or write a file in skill package."""
if quart.request.method == 'GET':
try:
result = await self.ap.skill_service.read_skill_file(skill_name, path)
return self.success(data=result)
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
# PUT - write file
data = await quart.request.json
content = data.get('content', '')
if content is None:
return self.http_status(400, -1, 'Missing required field: content')
try:
result = await self.ap.skill_service.write_skill_file(skill_name, path, content)
return self.success(data=result)
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
@self.route('/<skill_name>/preview', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def preview_skill(skill_name: str) -> quart.Response:
skill = self.ap.skill_mgr.get_skill_by_name(skill_name)
if not skill:
return self.http_status(404, -1, 'Skill not found')
return self.success(data={'instructions': skill.get('instructions', '')})
@self.route('/install/github', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def install_skill_from_github() -> quart.Response:
data = await quart.request.json
required_fields = ['asset_url', 'owner', 'repo']
for field in required_fields:
if field not in data or not data[field]:
return self.http_status(400, -1, f'Missing required field: {field}')
asset_url = str(data['asset_url']).strip().lower().split('?', 1)[0].split('#', 1)[0]
if not asset_url.endswith('skill.md') and not data.get('release_tag'):
return self.http_status(400, -1, 'Missing required field: release_tag')
try:
skill = await self.ap.skill_service.install_from_github(data)
return self.success(data={'skills': skill})
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
except Exception as exc:
return self.http_status(500, -1, f'Failed to install skill: {exc}')
@self.route('/install/github/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def preview_skill_from_github() -> quart.Response:
data = await quart.request.json
required_fields = ['asset_url', 'owner', 'repo']
for field in required_fields:
if field not in data or not data[field]:
return self.http_status(400, -1, f'Missing required field: {field}')
asset_url = str(data['asset_url']).strip().lower().split('?', 1)[0].split('#', 1)[0]
if not asset_url.endswith('skill.md') and not data.get('release_tag'):
return self.http_status(400, -1, 'Missing required field: release_tag')
try:
preview = await self.ap.skill_service.preview_install_from_github(data)
return self.success(data={'skills': preview})
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
except Exception as exc:
return self.http_status(500, -1, f'Failed to preview skill: {exc}')
@self.route('/install/upload', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def install_skill_from_upload() -> quart.Response:
file = (await quart.request.files).get('file')
if file is None:
return self.http_status(400, -1, 'file is required')
form = await quart.request.form
try:
skill = await self.ap.skill_service.install_from_zip_upload(
file_bytes=file.read(),
filename=file.filename or '',
source_paths=form.getlist('source_paths'),
)
return self.success(data={'skills': skill})
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
except Exception as exc:
return self.http_status(500, -1, f'Failed to install skill: {exc}')
@self.route('/install/upload/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def preview_skill_from_upload() -> quart.Response:
file = (await quart.request.files).get('file')
if file is None:
return self.http_status(400, -1, 'file is required')
try:
preview = await self.ap.skill_service.preview_install_from_zip_upload(
file_bytes=file.read(),
filename=file.filename or '',
)
return self.success(data={'skills': preview})
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
except Exception as exc:
return self.http_status(500, -1, f'Failed to preview skill: {exc}')
@self.route('/scan', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
async def scan_skill_directory() -> quart.Response:
path = quart.request.args.get('path', '').strip()
if not path:
return self.http_status(400, -1, 'Missing required parameter: path')
try:
result = await self.ap.skill_service.scan_directory_async(path)
return self.success(data=result)
except (ValueError, BoxError) as exc:
return self.http_status(400, -1, str(exc))
@@ -31,6 +31,18 @@ class SystemRouterGroup(group.RouterGroup):
except Exception:
pass
# ``system.outbound_ips`` may be a comma-separated string instead of
# a list when injected via the SYSTEM__OUTBOUND_IPS env var into a
# pre-existing data/config.yaml that lacks the key (env overrides
# only coerce to list when the key already holds one).
outbound_ips = self.ap.instance_config.data.get('system', {}).get('outbound_ips', [])
if isinstance(outbound_ips, str):
outbound_ips = [ip.strip() for ip in outbound_ips.split(',') if ip.strip()]
elif isinstance(outbound_ips, list):
outbound_ips = [str(ip).strip() for ip in outbound_ips if str(ip).strip()]
else:
outbound_ips = []
return self.success(
data={
'version': constants.semantic_version,
@@ -49,6 +61,7 @@ class SystemRouterGroup(group.RouterGroup):
'disable_models_service', False
),
'limitation': self.ap.instance_config.data.get('system', {}).get('limitation', {}),
'outbound_ips': outbound_ips,
'wizard_status': wizard_status,
'wizard_progress': wizard_progress,
}
@@ -140,17 +153,6 @@ class SystemRouterGroup(group.RouterGroup):
async def _() -> str:
return self.success(data=await self.ap.maintenance_service.get_storage_analysis())
@self.route('/debug/exec', methods=['POST'], auth_type=group.AuthType.USER_TOKEN)
async def _() -> str:
if not constants.debug_mode:
return self.http_status(403, 403, 'Forbidden')
py_code = await quart.request.data
ap = self.ap
return self.success(data=exec(py_code, {'ap': ap}))
@self.route(
'/debug/plugin/action',
methods=['POST'],
@@ -146,6 +146,7 @@ class UserRouterGroup(group.RouterGroup):
return self.fail(3, str(e))
except ValueError as e:
traceback.print_exc()
self.ap.logger.warning(f'Space OAuth callback failed: {e}')
return self.fail(1, str(e))
except Exception as e:
traceback.print_exc()
@@ -52,6 +52,9 @@ class ApiKeyService:
async def verify_api_key(self, key: str) -> bool:
"""Verify if an API key is valid"""
if not isinstance(key, str) or not key.startswith('lbk_'):
return False
result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(apikey.ApiKey).where(apikey.ApiKey.key == key)
)
+12 -10
View File
@@ -111,11 +111,11 @@ class BotService:
# TODO: 检查配置信息格式
bot_data['uuid'] = str(uuid.uuid4())
# checkout the default pipeline
# bind the most recently updated pipeline if any exist
result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_pipeline.LegacyPipeline).where(
persistence_pipeline.LegacyPipeline.is_default == True
)
sqlalchemy.select(persistence_pipeline.LegacyPipeline)
.order_by(persistence_pipeline.LegacyPipeline.updated_at.desc())
.limit(1)
)
pipeline = result.first()
if pipeline is not None:
@@ -132,24 +132,26 @@ class BotService:
async def update_bot(self, bot_uuid: str, bot_data: dict) -> None:
"""Update bot"""
if 'uuid' in bot_data:
del bot_data['uuid']
update_data = bot_data.copy()
if 'uuid' in update_data:
del update_data['uuid']
# set use_pipeline_name
if 'use_pipeline_uuid' in bot_data:
if 'use_pipeline_uuid' in update_data:
result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_pipeline.LegacyPipeline).where(
persistence_pipeline.LegacyPipeline.uuid == bot_data['use_pipeline_uuid']
persistence_pipeline.LegacyPipeline.uuid == update_data['use_pipeline_uuid']
)
)
pipeline = result.first()
if pipeline is not None:
bot_data['use_pipeline_name'] = pipeline.name
update_data['use_pipeline_name'] = pipeline.name
else:
raise Exception('Pipeline not found')
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_bot.Bot).values(bot_data).where(persistence_bot.Bot.uuid == bot_uuid)
sqlalchemy.update(persistence_bot.Bot).values(update_data).where(persistence_bot.Bot.uuid == bot_uuid)
)
await self.ap.platform_mgr.remove_bot(bot_uuid)
+113 -2
View File
@@ -31,15 +31,126 @@ class KnowledgeService:
if not knowledge_engine_plugin_id:
raise ValueError('knowledge_engine_plugin_id is required')
creation_settings = kb_data.get('creation_settings', {})
retrieval_settings = kb_data.get('retrieval_settings', {})
# Validate required fields based on plugin's creation_schema and retrieval_schema
await self._validate_schema_required_fields(
knowledge_engine_plugin_id,
creation_settings,
retrieval_settings,
)
kb = await self.ap.rag_mgr.create_knowledge_base(
name=kb_data.get('name', 'Untitled'),
knowledge_engine_plugin_id=knowledge_engine_plugin_id,
creation_settings=kb_data.get('creation_settings', {}),
retrieval_settings=kb_data.get('retrieval_settings', {}),
creation_settings=creation_settings,
retrieval_settings=retrieval_settings,
description=kb_data.get('description', ''),
)
return kb.uuid
async def _validate_schema_required_fields(
self,
plugin_id: str,
creation_settings: dict,
retrieval_settings: dict,
) -> None:
"""Validate required fields based on plugin's creation_schema and retrieval_schema.
This is a business-agnostic validation that checks all fields marked as
required in the plugin's schema, regardless of field type.
Args:
plugin_id: Knowledge Engine plugin ID.
creation_settings: User-provided creation settings.
retrieval_settings: User-provided retrieval settings.
Raises:
ValueError: If any required field is missing or empty.
"""
# Validate creation_schema
try:
creation_schema = await self.ap.plugin_connector.get_rag_creation_schema(plugin_id)
self._check_required_fields(creation_schema, creation_settings, 'creation_settings')
except ValueError:
raise
except Exception as e:
self.ap.logger.warning(f'Failed to get creation_schema for validation: {e}')
# Validate retrieval_schema
try:
retrieval_schema = await self.ap.plugin_connector.get_rag_retrieval_schema(plugin_id)
self._check_required_fields(retrieval_schema, retrieval_settings, 'retrieval_settings')
except ValueError:
raise
except Exception as e:
self.ap.logger.warning(f'Failed to get retrieval_schema for validation: {e}')
def _check_required_fields(
self,
schema: dict | list,
settings: dict,
context: str,
) -> None:
"""Check required fields in schema against provided settings.
Args:
schema: Plugin-defined schema (can be list or dict with 'schema' key).
settings: User-provided settings values.
context: Context name for error messages (e.g., 'creation_settings').
Raises:
ValueError: If a required field is missing or empty.
"""
if not schema:
return
# schema can be a list directly, or a dict with 'schema' key
items = schema if isinstance(schema, list) else schema.get('schema', [])
if not items:
return
for item in items:
field_name = item.get('name')
if not field_name:
continue
is_required = item.get('required', False)
if not is_required:
continue
# Check show_if condition - if field is conditionally shown, only validate when condition is met
show_if = item.get('show_if')
if show_if:
depend_field = show_if.get('field')
operator = show_if.get('operator')
expected_value = show_if.get('value')
if depend_field and operator:
depend_value = settings.get(depend_field)
# If show_if condition is not met, skip validation for this field
if operator == 'eq' and depend_value != expected_value:
continue
if operator == 'neq' and depend_value == expected_value:
continue
if operator == 'in' and isinstance(expected_value, list) and depend_value not in expected_value:
continue
value = settings.get(field_name)
# Validate required field has a non-empty value
if value is None or (isinstance(value, str) and value.strip() == ''):
# Get field label for friendly error message
label = item.get('label', {})
field_label = (
label.get('en_US', field_name)
or label.get('zh_Hans', field_name)
or label.get('zh_Hant', field_name)
or field_name
)
raise ValueError(f'{field_label} is required ({context}.{field_name})')
async def update_knowledge_base(self, kb_uuid: str, kb_data: dict) -> None:
"""更新知识库"""
# Filter to only mutable fields
+18 -1
View File
@@ -152,7 +152,24 @@ class MCPService:
coroutine = runtime_mcp_session.refresh()
else:
runtime_mcp_session = await self.ap.tool_mgr.mcp_tool_loader.load_mcp_server(server_config=server_data)
coroutine = runtime_mcp_session.start()
# A transient test owns an isolated Box session. Always tear it down
# after the test completes (success or failure) so it does not leak.
test_session = runtime_mcp_session
async def _run_and_cleanup() -> None:
try:
await test_session.start()
finally:
try:
await test_session.shutdown()
except Exception as exc:
self.ap.logger.warning(
f'Failed to tear down transient MCP test session '
f'{test_session.server_name}: {type(exc).__name__}: {exc}'
)
coroutine = _run_and_cleanup()
ctx = taskmgr.TaskContext.new()
wrapper = self.ap.task_mgr.create_user_task(
+8 -8
View File
@@ -113,14 +113,9 @@ class PipelineService:
return pipeline_data['uuid']
async def update_pipeline(self, pipeline_uuid: str, pipeline_data: dict) -> None:
if 'uuid' in pipeline_data:
del pipeline_data['uuid']
if 'for_version' in pipeline_data:
del pipeline_data['for_version']
if 'stages' in pipeline_data:
del pipeline_data['stages']
if 'is_default' in pipeline_data:
del pipeline_data['is_default']
pipeline_data = pipeline_data.copy()
for protected_field in ('uuid', 'for_version', 'stages', 'is_default'):
pipeline_data.pop(protected_field, None)
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_pipeline.LegacyPipeline)
@@ -220,6 +215,8 @@ class PipelineService:
bound_mcp_servers: list[str] = None,
enable_all_plugins: bool = True,
enable_all_mcp_servers: bool = True,
bound_skills: list[str] = None,
enable_all_skills: bool = True,
) -> None:
"""Update the bound plugins and MCP servers for a pipeline"""
# Get current pipeline
@@ -237,9 +234,12 @@ class PipelineService:
extensions_preferences = pipeline.extensions_preferences or {}
extensions_preferences['enable_all_plugins'] = enable_all_plugins
extensions_preferences['enable_all_mcp_servers'] = enable_all_mcp_servers
extensions_preferences['enable_all_skills'] = enable_all_skills
extensions_preferences['plugins'] = bound_plugins
if bound_mcp_servers is not None:
extensions_preferences['mcp_servers'] = bound_mcp_servers
if bound_skills is not None:
extensions_preferences['skills'] = bound_skills
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_pipeline.LegacyPipeline)
+25 -2
View File
@@ -17,6 +17,24 @@ class ModelProviderService:
def __init__(self, ap: app.Application) -> None:
self.ap = ap
@staticmethod
def _normalize_api_keys(api_keys: str | list[str] | tuple[str, ...] | None) -> list[str]:
if api_keys is None:
return []
raw_keys = [api_keys] if isinstance(api_keys, str) else list(api_keys)
normalized_keys = []
seen_keys = set()
for raw_key in raw_keys:
normalized_key = raw_key.strip() if isinstance(raw_key, str) else ''
if not normalized_key or normalized_key in seen_keys:
continue
normalized_keys.append(normalized_key)
seen_keys.add(normalized_key)
return normalized_keys
async def get_providers(self) -> list[dict]:
"""Get all providers"""
result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.ModelProvider))
@@ -59,6 +77,7 @@ class ModelProviderService:
async def create_provider(self, provider_data: dict) -> str:
"""Create a new provider"""
provider_data['uuid'] = str(uuid.uuid4())
provider_data['api_keys'] = self._normalize_api_keys(provider_data.get('api_keys'))
await self.ap.persistence_mgr.execute_async(
sqlalchemy.insert(persistence_model.ModelProvider).values(**provider_data)
)
@@ -72,6 +91,8 @@ class ModelProviderService:
"""Update an existing provider"""
if 'uuid' in provider_data:
del provider_data['uuid']
if 'api_keys' in provider_data:
provider_data['api_keys'] = self._normalize_api_keys(provider_data.get('api_keys'))
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_model.ModelProvider)
.where(persistence_model.ModelProvider.uuid == provider_uuid)
@@ -141,6 +162,8 @@ class ModelProviderService:
async def find_or_create_provider(self, requester: str, base_url: str, api_keys: list) -> str:
"""Find existing provider or create new one"""
api_keys = self._normalize_api_keys(api_keys)
# Try to find existing provider with same config
result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_model.ModelProvider).where(
@@ -168,7 +191,7 @@ class ModelProviderService:
'name': provider_name,
'requester': requester,
'base_url': base_url,
'api_keys': api_keys or [],
'api_keys': api_keys,
}
)
@@ -177,7 +200,7 @@ class ModelProviderService:
await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_model.ModelProvider)
.where(persistence_model.ModelProvider.uuid == '00000000-0000-0000-0000-000000000000')
.values(api_keys=[api_key])
.values(api_keys=self._normalize_api_keys(api_key))
)
await self.ap.model_mgr.reload_provider('00000000-0000-0000-0000-000000000000')
+428
View File
@@ -0,0 +1,428 @@
from __future__ import annotations
import io
import inspect
import os
import posixpath
import zipfile
from typing import Optional
from urllib.parse import quote, unquote, urlparse
import httpx
from ....core import app
from ....skill.utils import parse_frontmatter
_PUBLIC_SKILL_FIELDS = (
'name',
'display_name',
'description',
'instructions',
'package_root',
'created_at',
'updated_at',
)
_GITHUB_ASSET_HOSTS = {
'github.com',
'api.github.com',
'objects.githubusercontent.com',
'githubusercontent.com',
'raw.githubusercontent.com',
'codeload.github.com',
}
class SkillService:
"""Filesystem-backed skill management service."""
ap: app.Application
def __init__(self, ap: app.Application) -> None:
self.ap = ap
def _box_service(self):
box_service = getattr(self.ap, 'box_service', None)
if box_service is not None and getattr(box_service, 'available', False):
return box_service
return None
def _require_box(self, action: str):
"""Return the Box service or raise if it is not available.
Box is the only source of truth for skills. Every read and write
operation goes through it — there is no local-filesystem fallback.
"""
box_service = self._box_service()
if box_service is not None:
return box_service
ap_box = getattr(self.ap, 'box_service', None)
if ap_box is None:
reason = 'not initialised'
elif not getattr(ap_box, 'enabled', True):
reason = 'disabled in config (box.enabled = false)'
else:
connector_error = getattr(ap_box, '_connector_error', '') or 'currently unavailable'
reason = f'unavailable: {connector_error}'
raise ValueError(
f'{action} requires the Box runtime, which is {reason}. '
f'Enable Box in config.yaml (box.enabled = true) and ensure the '
f'runtime is reachable before retrying.'
)
def _require_box_for_write(self, action: str) -> None:
"""Backwards-compatible alias preserved for clarity at call sites."""
self._require_box(action)
@staticmethod
def _serialize_skill(skill: dict) -> dict:
return {field: skill.get(field) for field in _PUBLIC_SKILL_FIELDS if field in skill}
async def list_skills(self) -> list[dict]:
# When Box is unavailable, surface an empty list rather than raising —
# the skills page should render cleanly, and the UI separately renders
# a "Box disabled / unavailable" banner via useBoxStatus.
box_service = self._box_service()
if box_service is None:
return []
return [self._serialize_skill(skill) for skill in await box_service.list_skills()]
async def get_skill(self, skill_name: str) -> Optional[dict]:
box_service = self._box_service()
if box_service is None:
return None
skill = await box_service.get_skill(skill_name)
return self._serialize_skill(skill) if skill else None
async def get_skill_by_name(self, name: str) -> Optional[dict]:
return await self.get_skill(name)
async def create_skill(self, data: dict) -> dict:
box_service = self._require_box('Creating a skill')
created = await box_service.create_skill(data)
await self._reload_skills()
return self._serialize_skill(created)
async def update_skill(self, skill_name: str, data: dict) -> dict:
box_service = self._require_box('Editing a skill')
updated = await box_service.update_skill(skill_name, data)
await self._reload_skills()
return self._serialize_skill(updated)
async def delete_skill(self, skill_name: str) -> bool:
box_service = self._require_box('Deleting a skill')
await box_service.delete_skill(skill_name)
await self._reload_skills()
return True
async def list_skill_files(
self,
skill_name: str,
path: str = '.',
include_hidden: bool = False,
max_entries: int = 200,
) -> dict:
box_service = self._require_box('Browsing skill files')
return await box_service.list_skill_files(skill_name, path, include_hidden, max_entries)
async def read_skill_file(self, skill_name: str, path: str) -> dict:
box_service = self._require_box('Reading a skill file')
return await box_service.read_skill_file(skill_name, path)
async def write_skill_file(self, skill_name: str, path: str, content: str) -> dict:
box_service = self._require_box('Editing skill files')
result = await box_service.write_skill_file(skill_name, path, content)
await self._reload_skills()
return result
async def install_from_github(self, data: dict) -> list[dict]:
box_service = self._require_box('Installing a skill from GitHub')
owner = str(data['owner']).strip()
repo = str(data['repo']).strip()
release_tag = str(data.get('release_tag', '')).strip()
raw_asset_url = str(data['asset_url']).strip()
if self._is_github_skill_md_url(raw_asset_url):
return await self._install_github_skill_md(raw_asset_url, owner=owner, repo=repo, data=data)
asset_url = self._validate_github_asset_url(raw_asset_url, owner=owner, repo=repo, release_tag=release_tag)
source_subdir = str(data.get('source_subdir', '') or '').strip()
zip_bytes = await self._download_github_asset(asset_url)
filename = f'{repo}-{release_tag.lstrip("v").replace("/", "-") or "source"}.zip'
installed = await box_service.install_skill_zip(
zip_bytes,
filename,
source_paths=data.get('source_paths') or [],
source_path=str(data.get('source_path', '') or ''),
source_subdir=source_subdir,
)
await self._reload_skills()
return [self._serialize_skill(skill) for skill in installed]
async def preview_install_from_github(self, data: dict) -> list[dict]:
box_service = self._require_box('Previewing a skill from GitHub')
owner = str(data['owner']).strip()
repo = str(data['repo']).strip()
release_tag = str(data.get('release_tag', '')).strip()
raw_asset_url = str(data['asset_url']).strip()
if self._is_github_skill_md_url(raw_asset_url):
return await self._preview_github_skill_md(raw_asset_url, owner=owner, repo=repo)
asset_url = self._validate_github_asset_url(raw_asset_url, owner=owner, repo=repo, release_tag=release_tag)
source_subdir = str(data.get('source_subdir', '') or '').strip()
zip_bytes = await self._download_github_asset(asset_url)
return await box_service.preview_skill_zip(
zip_bytes,
f'{repo}-{release_tag.lstrip("v").replace("/", "-") or "source"}.zip',
source_subdir=source_subdir,
)
async def install_from_zip_upload(
self,
*,
file_bytes: bytes,
filename: str,
source_paths: list[str] | None = None,
source_path: str = '',
) -> list[dict]:
box_service = self._require_box('Installing a skill from upload')
installed = await box_service.install_skill_zip(
file_bytes,
filename,
source_paths=source_paths or [],
source_path=source_path,
)
await self._reload_skills()
return [self._serialize_skill(skill) for skill in installed]
async def preview_install_from_zip_upload(self, *, file_bytes: bytes, filename: str) -> list[dict]:
box_service = self._require_box('Previewing a skill upload')
return await box_service.preview_skill_zip(file_bytes, filename)
async def _install_github_skill_md(self, asset_url: str, *, owner: str, repo: str, data: dict) -> list[dict]:
box_service = self._require_box('Installing a skill from GitHub')
zip_bytes, filename, _package_name = await self._download_github_skill_directory_as_zip(
asset_url,
owner=owner,
repo=repo,
)
installed = await box_service.install_skill_zip(
zip_bytes,
filename,
source_paths=data.get('source_paths') or [],
source_path=str(data.get('source_path', '') or ''),
target_suffix='',
)
await self._reload_skills()
return [self._serialize_skill(skill) for skill in installed]
async def _preview_github_skill_md(self, asset_url: str, *, owner: str, repo: str) -> list[dict]:
box_service = self._require_box('Previewing a skill from GitHub')
zip_bytes, _filename, package_name = await self._download_github_skill_directory_as_zip(
asset_url,
owner=owner,
repo=repo,
)
return await box_service.preview_skill_zip(zip_bytes, f'{package_name}.zip', target_suffix='')
async def reload_skills(self) -> list[dict]:
await self._reload_skills()
return await self.list_skills()
async def scan_directory_async(self, path: str) -> dict:
box_service = self._require_box('Scanning a skill directory')
return await box_service.scan_skill_directory(path)
async def _reload_skills(self) -> None:
skill_mgr = getattr(self.ap, 'skill_mgr', None)
reload_skills = getattr(skill_mgr, 'reload_skills', None)
if not callable(reload_skills):
return
result = reload_skills()
if inspect.isawaitable(result):
await result
async def _download_github_asset(self, asset_url: str) -> bytes:
async with httpx.AsyncClient(follow_redirects=True, timeout=120) as client:
resp = await client.get(asset_url)
resp.raise_for_status()
return resp.content
async def _download_github_skill_directory_as_zip(
self, asset_url: str, *, owner: str, repo: str
) -> tuple[bytes, str, str]:
info = self._parse_github_skill_md_url(asset_url, owner=owner, repo=repo)
archive_url = f'https://codeload.github.com/{owner}/{repo}/zip/{quote(info["ref"], safe="/")}'
archive_bytes = await self._download_github_asset(archive_url)
try:
source_archive = zipfile.ZipFile(io.BytesIO(archive_bytes), 'r')
except zipfile.BadZipFile as exc:
raise ValueError('GitHub repository archive must be a valid .zip archive') from exc
with source_archive as source_zip:
skill_entry = self._find_github_skill_archive_entry(source_zip, info['file_path'])
try:
skill_md_content = source_zip.read(skill_entry).decode('utf-8')
except UnicodeDecodeError as exc:
raise ValueError('GitHub SKILL.md must be valid UTF-8 text') from exc
package_name = self._resolve_github_skill_md_package_name(skill_md_content, info['package_name'])
source_skill_dir = posixpath.dirname(posixpath.normpath(skill_entry.filename))
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as target_zip:
self._copy_github_skill_directory_to_zip(source_zip, target_zip, source_skill_dir, package_name)
return buffer.getvalue(), f'{package_name}.zip', package_name
def _find_github_skill_archive_entry(self, archive: zipfile.ZipFile, file_path: str) -> zipfile.ZipInfo:
normalized_file_path = posixpath.normpath(file_path).lower()
for member in archive.infolist():
if member.is_dir():
continue
normalized_member = posixpath.normpath(member.filename)
path_parts = normalized_member.split('/', 1)
if len(path_parts) != 2:
continue
archive_relative_path = path_parts[1].lower()
if archive_relative_path == normalized_file_path:
return member
raise ValueError(f'GitHub archive does not contain requested SKILL.md: {file_path}')
def _copy_github_skill_directory_to_zip(
self,
source_zip: zipfile.ZipFile,
target_zip: zipfile.ZipFile,
source_skill_dir: str,
package_name: str,
) -> None:
normalized_source_dir = posixpath.normpath(source_skill_dir)
source_prefix = f'{normalized_source_dir}/'
copied_files = 0
for member in source_zip.infolist():
normalized_member = posixpath.normpath(member.filename)
if normalized_member != normalized_source_dir and not normalized_member.startswith(source_prefix):
continue
relative_path = posixpath.relpath(normalized_member, normalized_source_dir)
if relative_path in ('', '.'):
continue
if relative_path.startswith('../') or relative_path == '..' or posixpath.isabs(relative_path):
raise ValueError(f'GitHub archive contains an unsafe skill path: {member.filename}')
target_name = f'{package_name}/{relative_path}'
if member.is_dir() and not target_name.endswith('/'):
target_name = f'{target_name}/'
target_info = zipfile.ZipInfo(target_name, date_time=member.date_time)
target_info.external_attr = member.external_attr
target_info.compress_type = zipfile.ZIP_DEFLATED
if member.is_dir():
target_zip.writestr(target_info, b'')
continue
target_zip.writestr(target_info, source_zip.read(member))
copied_files += 1
if copied_files == 0:
raise ValueError('GitHub skill directory is empty')
def _uploaded_skill_target_stem(self, filename: str) -> str:
stem = os.path.splitext(os.path.basename(str(filename or '').strip()))[0]
safe_stem = ''.join(ch if ch.isalnum() or ch in ('-', '_') else '-' for ch in stem).strip('-_')
if not safe_stem:
safe_stem = 'uploaded-skill'
return safe_stem
@staticmethod
def _is_github_skill_md_url(asset_url: str) -> bool:
parsed = urlparse(str(asset_url or '').strip())
normalized_path = posixpath.normpath(parsed.path or '/')
return normalized_path.lower().endswith('/skill.md')
def _parse_github_skill_md_url(self, asset_url: str, *, owner: str, repo: str) -> dict:
parsed = urlparse(str(asset_url or '').strip())
if parsed.scheme != 'https' or not parsed.netloc:
raise ValueError('asset_url must be a valid HTTPS GitHub SKILL.md URL')
host = parsed.netloc.lower()
path_parts = [unquote(part) for part in (parsed.path or '').split('/') if part]
if host == 'github.com':
if (
len(path_parts) < 5
or path_parts[0] != owner
or path_parts[1] != repo
or path_parts[2]
not in (
'blob',
'raw',
)
):
raise ValueError('GitHub SKILL.md URL must point to the requested owner/repo blob path')
ref = path_parts[3]
file_path = '/'.join(path_parts[4:])
elif host == 'raw.githubusercontent.com':
if len(path_parts) < 4 or path_parts[0] != owner or path_parts[1] != repo:
raise ValueError('GitHub SKILL.md URL must point to the requested owner/repo raw path')
ref = path_parts[2]
file_path = '/'.join(path_parts[3:])
else:
raise ValueError('asset_url must point to a GitHub SKILL.md file')
normalized_file_path = posixpath.normpath(file_path)
normalized_file_path_lower = normalized_file_path.lower()
if normalized_file_path_lower != 'skill.md' and not normalized_file_path_lower.endswith('/skill.md'):
raise ValueError('GitHub skill import requires a URL ending with SKILL.md')
parent_dir = posixpath.basename(posixpath.dirname(normalized_file_path)) or repo
return {
'ref': ref,
'file_path': normalized_file_path,
'package_name': self._uploaded_skill_target_stem(parent_dir),
}
def _resolve_github_skill_md_package_name(self, content: str, fallback: str) -> str:
metadata, _instructions = parse_frontmatter(content)
candidate = str(metadata.get('name') or fallback or '').strip()
try:
return self._validate_skill_name(candidate)
except ValueError:
return self._validate_skill_name(fallback)
@staticmethod
def _validate_github_asset_url(asset_url: str, *, owner: str, repo: str, release_tag: str) -> str:
parsed = urlparse(str(asset_url).strip())
if parsed.scheme != 'https' or not parsed.netloc:
raise ValueError('asset_url must be a valid HTTPS GitHub asset URL')
host = parsed.netloc.lower()
if host not in _GITHUB_ASSET_HOSTS:
raise ValueError('asset_url must point to a GitHub-hosted release asset or archive')
normalized_path = posixpath.normpath(parsed.path or '/')
allowed_prefixes = [
f'/repos/{owner}/{repo}/',
f'/{owner}/{repo}/',
]
if not any(normalized_path.startswith(prefix) for prefix in allowed_prefixes):
raise ValueError('asset_url does not match the requested owner/repo')
if release_tag and release_tag not in parsed.path and release_tag not in parsed.query:
raise ValueError('asset_url does not match the requested release_tag')
return parsed.geturl()
@staticmethod
def _validate_skill_name(name: str) -> str:
name = str(name or '').strip()
if not name:
raise ValueError('Skill name is required')
if not name.replace('-', '').replace('_', '').isalnum():
raise ValueError('Skill name can only contain letters, numbers, hyphens and underscores')
if len(name) > 64:
raise ValueError('Skill name cannot exceed 64 characters')
return name
+5
View File
@@ -0,0 +1,5 @@
"""LangBot Box runtime package."""
from .workspace import BoxWorkspaceSession
__all__ = ['BoxWorkspaceSession']
+364
View File
@@ -0,0 +1,364 @@
from __future__ import annotations
import asyncio
import json
import os
import sys
import typing
from typing import TYPE_CHECKING
from urllib.parse import urlparse
from langbot_plugin.entities.io.actions.enums import CommonAction
from langbot_plugin.runtime.io.handler import Handler
from langbot_plugin.runtime.io.connection import Connection
from langbot_plugin.box.client import ActionRPCBoxClient
from langbot_plugin.box.errors import BoxRuntimeUnavailableError
from langbot_plugin.box.actions import LangBotToBoxAction
from ..utils import platform
from ..utils.managed_runtime import ManagedRuntimeConnector
if TYPE_CHECKING:
from ..core import app as core_app
# Default Docker Compose service name for the standalone Box container.
_DOCKER_BOX_HOST = 'langbot_box'
_DEFAULT_PORT = 5410
_HEARTBEAT_INTERVAL_SEC = 20
# Top-level keys under ``box`` that are LangBot-internal and should not be
# forwarded to the Box runtime.
_INTERNAL_BOX_CONFIG_KEYS = frozenset({'runtime'})
def _get_box_config(ap) -> dict:
"""Return the 'box' section from instance config.
Environment-variable overrides are handled uniformly by
``LoadConfigStage._apply_env_overrides_to_config`` using the
``SECTION__SUBSECTION__KEY`` convention (e.g. ``BOX__LOCAL__HOST_ROOT``,
``BOX__LOCAL__ALLOWED_MOUNT_ROOTS="/a,/b"``) before this is read, so no
box-specific env parsing is needed here.
"""
instance_config = getattr(ap, 'instance_config', None)
config_data = getattr(instance_config, 'data', {}) if instance_config is not None else {}
return dict(config_data.get('box', {}) or {})
def _get_runtime_endpoint(box_cfg: dict) -> str:
runtime_cfg = box_cfg.get('runtime') or {}
return str(runtime_cfg.get('endpoint', '')).strip()
def _filter_config_for_runtime(box_cfg: dict) -> dict:
return {k: v for k, v in box_cfg.items() if k not in _INTERNAL_BOX_CONFIG_KEYS}
def resolve_box_ws_relay_url(ap: core_app.Application) -> str:
"""Derive the WS relay base URL used for managed-process attach.
The WS relay serves the ``/v1/sessions/{id}/managed-process/ws`` endpoint
on the *relay* port (default 5410).
"""
box_cfg = _get_box_config(ap)
# Explicit runtime endpoint takes precedence. The config value is a base
# URL; endpoint-specific paths are appended by the SDK client.
endpoint = _get_runtime_endpoint(box_cfg)
if endpoint:
parsed = urlparse(endpoint)
scheme = parsed.scheme or 'ws'
if scheme == 'ws':
scheme = 'http'
elif scheme == 'wss':
scheme = 'https'
host = parsed.hostname or '127.0.0.1'
port = parsed.port or _DEFAULT_PORT
return f'{scheme}://{host}:{port}'
# In Docker, relay lives on the box runtime container.
if platform.get_platform() == 'docker':
return f'http://{_DOCKER_BOX_HOST}:{_DEFAULT_PORT}'
return f'http://127.0.0.1:{_DEFAULT_PORT}'
class BoxRuntimeConnector(ManagedRuntimeConnector):
"""Connect to the Box runtime via action RPC.
Transport decision (mirrors Plugin runtime logic):
1. Docker / --standalone-box / explicit runtime.endpoint -> WebSocket to external Box process
2. Windows (non-Docker) -> subprocess + WebSocket (Windows lacks async stdio pipe)
3. Unix / macOS -> subprocess + stdio pipe
"""
def __init__(
self,
ap: core_app.Application,
runtime_disconnect_callback: typing.Callable[
['BoxRuntimeConnector'], typing.Coroutine[typing.Any, typing.Any, None]
]
| None = None,
):
super().__init__(ap)
self.runtime_disconnect_callback = runtime_disconnect_callback
self.configured_runtime_endpoint = self._load_configured_runtime_endpoint()
self.ws_relay_base_url = resolve_box_ws_relay_url(ap)
self.client = ActionRPCBoxClient(logger=ap.logger)
self._handler: Handler | None = None
self._handler_task: asyncio.Task | None = None
self._ctrl_task: asyncio.Task | None = None
self._heartbeat_task: asyncio.Task | None = None
# Parse the relay URL once for reuse.
parsed = urlparse(self.ws_relay_base_url)
self._relay_host = parsed.hostname or '127.0.0.1'
self._relay_port = parsed.port or _DEFAULT_PORT
self._filtered_box_config = _filter_config_for_runtime(_get_box_config(ap))
def uses_websocket(self) -> bool:
"""Whether the connector should use WebSocket to reach the Box runtime.
True when:
- Running inside Docker (Box runtime is a separate container)
- The ``--standalone-box`` CLI flag was passed
- An explicit ``runtime.endpoint`` was configured
When this is True the Box runtime lives in a separate process with its
own filesystem view (container, pod sidecar, or remote host), so paths
it reports (e.g. skill ``package_root``) are NOT resolvable on the
LangBot side. When False, Box runs as a stdio child process that shares
LangBot's filesystem.
"""
return bool(
self.configured_runtime_endpoint
or platform.get_platform() == 'docker'
or platform.use_websocket_to_connect_box_runtime()
)
# Backwards-compatible private alias.
def _uses_websocket(self) -> bool:
return self.uses_websocket()
async def initialize(self) -> None:
if self._uses_websocket():
if platform.get_platform() == 'win32' and not self.configured_runtime_endpoint:
await self._start_subprocess_then_ws()
else:
await self._connect_remote_ws()
else:
await self._start_local_stdio()
# Start heartbeat after successful connection
if self._heartbeat_task is None:
self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
# -- heartbeat -----------------------------------------------------------
async def _heartbeat_loop(self) -> None:
"""Periodically ping the Box runtime to detect silent disconnections."""
while True:
await asyncio.sleep(_HEARTBEAT_INTERVAL_SEC)
try:
await self.ping()
self.ap.logger.debug('Heartbeat to Box runtime success.')
except Exception as e:
self.ap.logger.debug(f'Failed to heartbeat to Box runtime: {e}')
async def ping(self) -> None:
if self._handler is None:
raise BoxRuntimeUnavailableError('Box runtime is not connected')
await self._handler.call_action(CommonAction.PING, {})
# -- transport paths -----------------------------------------------------
async def _start_local_stdio(self) -> None:
"""Launch box server as subprocess and connect via stdio (Unix/macOS)."""
from langbot_plugin.runtime.io.controllers.stdio.client import StdioClientController
self.ap.logger.info('Use stdio to connect to box runtime')
python_path = sys.executable
env = os.environ.copy()
if self._filtered_box_config:
env['LANGBOT_BOX_CONFIG'] = json.dumps(self._filtered_box_config)
connected = asyncio.Event()
connect_error: list[Exception] = []
ctrl = StdioClientController(
command=python_path,
# Launched through the same CLI entry point as the plugin runtime
# (cli.__init__ <subcommand>); `-s` selects the stdio transport,
# mirroring `rt -s`.
args=['-m', 'langbot_plugin.cli.__init__', 'box', '-s', '--ws-control-port', str(self._relay_port)],
env=env,
)
self._ctrl_task = asyncio.create_task(
ctrl.run(self._make_connection_callback('stdio', connected, connect_error))
)
try:
await asyncio.wait_for(connected.wait(), timeout=30.0)
except asyncio.TimeoutError:
raise BoxRuntimeUnavailableError('box runtime subprocess did not connect in time')
if connect_error:
raise BoxRuntimeUnavailableError(f'box runtime connection failed: {connect_error[0]}')
self._subprocess = ctrl.process
async def _start_subprocess_then_ws(self) -> None:
"""Launch box server as detached subprocess, then connect via WS (Windows)."""
self.ap.logger.info('(windows) Use cmd to launch box runtime and communicate via ws')
env = os.environ.copy()
if self._filtered_box_config:
env['LANGBOT_BOX_CONFIG'] = json.dumps(self._filtered_box_config)
python_path = sys.executable
# Launched through the same CLI entry point as the plugin runtime
# (cli.__init__ <subcommand>); no flag => WebSocket transport.
self.runtime_subprocess = await asyncio.create_subprocess_exec(
python_path,
'-m',
'langbot_plugin.cli.__init__',
'box',
'--ws-control-port',
str(self._relay_port),
env=env,
)
self.runtime_subprocess_task = asyncio.create_task(self.runtime_subprocess.wait())
ws_url = f'ws://localhost:{self._relay_port}/rpc/ws'
await self._connect_ws(ws_url, '(windows) WebSocket')
async def _connect_remote_ws(self) -> None:
"""Connect to a remote (or Docker) box server via WebSocket."""
ws_url = self._resolve_rpc_ws_url()
self.ap.logger.info(f'Use WebSocket to connect to box runtime ({ws_url})')
await self._connect_ws(ws_url, 'WebSocket')
# -- helpers -------------------------------------------------------------
def _resolve_rpc_ws_url(self) -> str:
"""Determine the action-RPC WebSocket URL.
All endpoints share a single port; action RPC is at ``/rpc/ws``.
"""
if self.configured_runtime_endpoint:
base = self.configured_runtime_endpoint.rstrip('/')
parsed = urlparse(base)
scheme = parsed.scheme or 'ws'
if scheme in ('http', 'https'):
scheme = 'wss' if scheme == 'https' else 'ws'
host = parsed.hostname or '127.0.0.1'
port = parsed.port or _DEFAULT_PORT
return f'{scheme}://{host}:{port}/rpc/ws'
if platform.get_platform() == 'docker':
return f'ws://{_DOCKER_BOX_HOST}:{_DEFAULT_PORT}/rpc/ws'
return f'ws://localhost:{self._relay_port}/rpc/ws'
async def _connect_ws(self, ws_url: str, transport_name: str) -> None:
"""Shared WebSocket connection procedure."""
from langbot_plugin.runtime.io.controllers.ws.client import WebSocketClientController
connected = asyncio.Event()
connect_error: list[Exception] = []
async def on_connect_failed(ctrl, exc):
if exc is not None:
self.ap.logger.error(f'Failed to connect to Box runtime ({ws_url}): {exc}')
else:
self.ap.logger.error(f'Failed to connect to Box runtime ({ws_url}), trying to reconnect...')
connect_error.append(exc or BoxRuntimeUnavailableError('ws connection failed'))
connected.set()
if self.runtime_disconnect_callback is not None:
await self.runtime_disconnect_callback(self)
ctrl = WebSocketClientController(ws_url=ws_url, make_connection_failed_callback=on_connect_failed)
self._ctrl_task = asyncio.create_task(
ctrl.run(self._make_connection_callback(transport_name, connected, connect_error))
)
try:
await asyncio.wait_for(connected.wait(), timeout=30.0)
except asyncio.TimeoutError:
raise BoxRuntimeUnavailableError(f'box runtime ws connection timed out ({ws_url})')
if connect_error:
raise BoxRuntimeUnavailableError(f'box runtime connection failed: {connect_error[0]}')
def _make_connection_callback(
self,
transport_name: str,
connected: asyncio.Event,
connect_error: list[Exception],
):
async def new_connection_callback(connection: Connection) -> None:
handler = Handler(connection)
self._handler = handler
self.client.set_handler(handler)
self._handler_task = asyncio.create_task(handler.run())
try:
await handler.call_action(CommonAction.PING, {})
if self._filtered_box_config:
await handler.call_action(LangBotToBoxAction.INIT, self._filtered_box_config)
self.ap.logger.debug('Sent box configuration to Box runtime via INIT.')
self.ap.logger.info(f'Connected to Box runtime via {transport_name}.')
connected.set()
await self._handler_task
except Exception as exc:
if not connected.is_set():
connect_error.append(exc)
connected.set()
return
# If we reach here, handler.run() returned normally (connection
# closed) or raised after the initial handshake succeeded.
# Either way, treat it as a disconnect.
if connected.is_set():
if self._uses_websocket():
self.ap.logger.error('Disconnected from Box runtime, trying to reconnect...')
if self.runtime_disconnect_callback is not None:
await self.runtime_disconnect_callback(self)
else:
self.ap.logger.error(
'Disconnected from Box runtime via stdio. '
'Cannot automatically reconnect — please restart LangBot.'
)
return new_connection_callback
# -- lifecycle -----------------------------------------------------------
def dispose(self) -> None:
if self._heartbeat_task is not None:
self._heartbeat_task.cancel()
self._heartbeat_task = None
if self._handler_task is not None:
self._handler_task.cancel()
self._handler_task = None
if self._ctrl_task is not None:
self._ctrl_task.cancel()
self._ctrl_task = None
# stdio-managed subprocess (stored as self._subprocess by _start_local_stdio)
if hasattr(self, '_subprocess') and self._subprocess is not None and self._subprocess.returncode is None:
self.ap.logger.info('Terminating managed box runtime process...')
self._subprocess.terminate()
# Subprocess launched by ManagedRuntimeConnector._start_runtime_subprocess (Windows path)
self._dispose_subprocess()
# -- config helpers ------------------------------------------------------
def _load_configured_runtime_endpoint(self) -> str:
return _get_runtime_endpoint(_get_box_config(self.ap))
+98
View File
@@ -0,0 +1,98 @@
"""Three-layer security policy for LangBot Box.
The design separates concerns into three independent layers, aligned with
OpenCode / OpenClaw patterns:
1. **SandboxPolicy** *where* tools run (host vs sandbox).
2. **ToolPolicy** *which* tools are allowed (allow/deny lists).
3. **ElevatedPolicy** *whether* a single exec call may temporarily
escape the default sandbox boundary.
These three layers are orthogonal:
- ToolPolicy is a hard boundary; ``elevated`` cannot bypass a denied tool.
- SandboxPolicy decides the default execution location.
- ElevatedPolicy only affects ``exec`` and only when the framework allows it.
"""
from __future__ import annotations
import enum
from typing import Sequence
# ── Layer 1: Sandbox Policy ──────────────────────────────────────────
class SandboxMode(str, enum.Enum):
"""Determines when agent execution is routed through the sandbox."""
OFF = 'off'
"""Sandbox disabled; all exec runs on the host."""
NON_DEFAULT = 'non_default'
"""Only non-default sessions are sandboxed (e.g. sub-agents, MCP)."""
ALL = 'all'
"""Every agent exec call is routed through the sandbox."""
class SandboxPolicy:
"""Decides whether a given execution context should use the sandbox."""
def __init__(self, mode: SandboxMode = SandboxMode.ALL):
self.mode = mode
def should_sandbox(self, *, is_default_session: bool = True) -> bool:
if self.mode == SandboxMode.OFF:
return False
if self.mode == SandboxMode.ALL:
return True
# NON_DEFAULT: sandbox everything except the default session
return not is_default_session
# ── Layer 2: Tool Policy ─────────────────────────────────────────────
class ToolPolicy:
"""Controls which tools are available to the current agent/session.
Rules:
- ``deny`` always takes precedence over ``allow``.
- An empty ``allow`` list means "all tools allowed" (no allowlist filter).
- ``elevated`` cannot bypass a denied tool.
"""
def __init__(
self,
allow: Sequence[str] = (),
deny: Sequence[str] = (),
):
self._allow: frozenset[str] = frozenset(allow)
self._deny: frozenset[str] = frozenset(deny)
def is_tool_allowed(self, tool_name: str) -> bool:
if tool_name in self._deny:
return False
if self._allow and tool_name not in self._allow:
return False
return True
# ── Layer 3: Elevated Policy ─────────────────────────────────────────
class ElevatedPolicy:
"""Controls whether ``exec`` may request temporary privilege escalation.
``elevated`` only applies to the ``exec`` tool. It means "run this
command outside the default sandbox boundary" (e.g. with network, or
on the host). The framework decides whether to honor the request.
"""
def __init__(self, *, allow_elevated: bool = False, require_approval: bool = True):
self.allow_elevated = allow_elevated
self.require_approval = require_approval
def is_elevation_permitted(self) -> bool:
return self.allow_elevated
+865
View File
@@ -0,0 +1,865 @@
from __future__ import annotations
import asyncio
import collections
import datetime as _dt
import enum
import json
import os
from typing import TYPE_CHECKING
import pydantic
from langbot_plugin.box.client import BoxRuntimeClient
from .connector import BoxRuntimeConnector, _get_box_config
from langbot_plugin.box.errors import BoxError, BoxValidationError
from langbot_plugin.box.models import (
BUILTIN_PROFILES,
BoxExecutionResult,
BoxManagedProcessInfo,
BoxManagedProcessSpec,
BoxProfile,
BoxSpec,
)
_INT_ADAPTER = pydantic.TypeAdapter(int)
_UTC = _dt.timezone.utc
_MAX_RECENT_ERRORS = 50
_MIB = 1024 * 1024
def _is_path_under(path: str, root: str) -> bool:
"""Check whether *path* equals *root* or is a child of *root*."""
return path == root or path.startswith(f'{root}{os.sep}')
if TYPE_CHECKING:
from ..core import app as core_app
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
class BoxService:
def __init__(
self,
ap: core_app.Application,
client: BoxRuntimeClient | None = None,
output_limit_chars: int = 4000,
):
self.ap = ap
self._enabled = self._load_enabled()
self._runtime_connector: BoxRuntimeConnector | None = None
if client is None:
# Always construct a connector — its __init__ is side-effect free
# (no I/O, no subprocess). When ``box.enabled = false`` we simply
# skip ``connector.initialize()`` so no connection is attempted.
self._runtime_connector = BoxRuntimeConnector(ap, runtime_disconnect_callback=self._on_runtime_disconnect)
client = self._runtime_connector.client
self.client = client
self.output_limit_chars = output_limit_chars
self.host_root = self._load_host_root()
self.allowed_mount_roots = self._load_allowed_mount_roots()
self.default_workspace = self._load_default_workspace()
self.profile = self._load_profile()
self.custom_image = self._load_custom_image()
self.workspace_quota_mb = self._load_workspace_quota_mb()
self._recent_errors: collections.deque[dict] = collections.deque(maxlen=_MAX_RECENT_ERRORS)
self._shutdown_task = None
self._available = False
self._connector_error: str = ''
self._reconnecting = False
# Optional explicit override for shares_filesystem_with_box. None means
# "derive from the connector transport". Set by tests / embedders that
# know the real LangBot<->Box filesystem topology.
self._shares_filesystem_with_box_override: bool | None = None
@property
def enabled(self) -> bool:
"""Whether Box is enabled in config. False means the operator has
deliberately turned the sandbox off via ``box.enabled = false``.
Disabled and "enabled but unavailable" are reported as the same
``available = False`` to consumers, but distinguished in get_status."""
return self._enabled
async def initialize(self):
self._ensure_default_workspace()
if not self._enabled:
# Disabled by config: do NOT connect to a remote runtime, do NOT
# fork a stdio subprocess. Every consumer of box_service should
# gate on ``available`` and degrade gracefully.
self._available = False
self._connector_error = 'Box runtime is disabled in config (box.enabled = false)'
self.ap.logger.info(
'Box runtime disabled by config; sandbox features (exec/read/write/edit, '
'skill add/edit, stdio MCP) will be unavailable.'
)
return
try:
if self._runtime_connector is not None:
await self._runtime_connector.initialize()
else:
await self.client.initialize()
self._available = True
self._connector_error = ''
self.ap.logger.info(
f'LangBot Box runtime initialized: profile={self.profile.name} '
f'default_workspace={self.default_workspace or "(none)"}'
)
except Exception as exc:
self.ap.logger.warning(f'LangBot Box runtime unavailable, sandbox features disabled: {exc}')
self._available = False
self._connector_error = str(exc)
async def _on_runtime_disconnect(self, connector: BoxRuntimeConnector) -> None:
"""Called by the connector when the Box runtime connection drops.
Spawns a background reconnection loop so the caller is not blocked.
Skipped entirely when Box is disabled by config — that path should
never have connected in the first place.
"""
if not self._enabled:
return
if self._reconnecting:
return # Another reconnect loop is already running
self._reconnecting = True
self._available = False
self._connector_error = 'Disconnected from Box runtime'
self.ap.logger.warning('Box runtime disconnected, sandbox features temporarily disabled.')
asyncio.create_task(self._reconnect_loop(connector))
async def _reconnect_loop(self, connector: BoxRuntimeConnector) -> None:
"""Retry reconnection with exponential backoff (3s → 60s max)."""
delay = 3
max_delay = 60
try:
while True:
self.ap.logger.info(f'Attempting to reconnect to Box runtime in {delay}s...')
await asyncio.sleep(delay)
try:
connector.dispose()
await connector.initialize()
self._available = True
self._connector_error = ''
self.ap.logger.info('Box runtime reconnected, sandbox features restored.')
return
except Exception as exc:
self._connector_error = str(exc)
self.ap.logger.warning(f'Box runtime reconnection failed: {exc}')
delay = min(delay * 2, max_delay)
finally:
self._reconnecting = False
@property
def available(self) -> bool:
return self._available
@property
def shares_filesystem_with_box(self) -> bool:
"""Whether LangBot and the Box runtime share a filesystem view.
This is True only when Box runs as a local stdio child process of
LangBot (same container/host). In that case paths the Box runtime
reports — notably skill ``package_root`` — resolve identically on the
LangBot side, so LangBot may validate them against its own filesystem.
It is False for every separated deployment (Docker Compose, k8s
sidecar, ``--standalone-box``, or an explicit ``runtime.endpoint``),
where the Box runtime owns its own filesystem and LangBot must trust
the paths it reports rather than checking them locally.
When Box is wired up with an injected client (tests, custom embeds)
there is no connector to introspect; we conservatively report False so
LangBot never wrongly drops Box-reported skills. An explicit override
can be set via ``_shares_filesystem_with_box`` (used by tests and any
embedder that knows the real topology).
"""
if self._shares_filesystem_with_box_override is not None:
return self._shares_filesystem_with_box_override
if self._runtime_connector is None:
return False
return not self._runtime_connector.uses_websocket()
async def execute_spec_payload(
self,
spec_payload: dict,
query: pipeline_query.Query,
*,
skip_host_mount_validation: bool = False,
) -> dict:
if not self._available:
raise BoxError('Box runtime is not available. Install and start Docker to use sandbox features.')
try:
spec = self.build_spec(spec_payload, skip_host_mount_validation=skip_host_mount_validation)
except BoxError as exc:
self._record_error(exc, query)
raise
self.ap.logger.info(
'LangBot Box request: '
f'query_id={query.query_id} '
f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}'
)
try:
await self._enforce_workspace_quota(spec, phase='before execution')
except BoxError as exc:
self._record_error(exc, query)
raise
try:
result = await self.client.execute(spec)
except BoxError as exc:
self._record_error(exc, query)
raise
try:
await self._enforce_workspace_quota(spec, phase='after execution')
except BoxError as exc:
await self._cleanup_exceeded_session(spec)
self._record_error(exc, query)
raise
self.ap.logger.info(
'LangBot Box result: '
f'query_id={query.query_id} '
f'summary={json.dumps(self._summarize_result(result), ensure_ascii=False)}'
)
return self._serialize_result(result)
def resolve_box_session_id(self, query: pipeline_query.Query) -> str:
"""Resolve the Box session_id from the pipeline's template and query variables.
When ``system.limitation.force_box_session_id_template`` is set to a
non-empty value, that template overrides whatever the pipeline
configured. This is the authoritative SaaS guard: it runs on every
``exec`` call, so a tenant cannot escape a single shared sandbox even
by editing the pipeline config directly through the API (which only
gates the web UI).
"""
forced_template = self._forced_box_session_id_template()
if forced_template:
template = forced_template
else:
template = (
(query.pipeline_config or {})
.get('ai', {})
.get('local-agent', {})
.get('box-session-id-template', '{launcher_type}_{launcher_id}')
)
variables = dict(query.variables or {})
launcher_type = getattr(query, 'launcher_type', None)
if hasattr(launcher_type, 'value'):
launcher_type = launcher_type.value
launcher_id = getattr(query, 'launcher_id', None)
sender_id = getattr(query, 'sender_id', None)
query_id = getattr(query, 'query_id', None)
variables.setdefault('query_id', str(query_id or 'unknown'))
variables.setdefault('launcher_type', str(launcher_type or 'query'))
variables.setdefault('launcher_id', str(launcher_id or query_id or 'unknown'))
variables.setdefault('sender_id', str(sender_id or launcher_id or query_id or 'unknown'))
variables.setdefault('global', 'global')
return template.format_map(collections.defaultdict(lambda: 'unknown', variables))
def build_skill_extra_mounts(self, query: pipeline_query.Query) -> list[dict]:
"""Build extra_mounts entries for all pipeline-bound skills.
This ensures that when a container is first created it already has
all skill packages mounted, regardless of which skill is currently
activated.
Path validation is filesystem-topology dependent. When LangBot and the
Box runtime share a filesystem (local stdio mode), a skill whose
``package_root`` is missing or no longer a directory is skipped with a
warning instead of being passed through to the backend. Without that
guard the three backends behave inconsistently on a stale mount: nsjail
refuses to start the sandbox (failing every exec in the session),
Docker silently auto-creates a root-owned empty directory on the host,
and E2B silently skips the upload — none of which surfaces an
actionable error.
When Box runs as a separate process (Docker Compose, k8s sidecar,
``--standalone-box``, or a remote ``runtime.endpoint``), the
``package_root`` reported by ``list_skills`` is the Box runtime's own
filesystem path and is NOT resolvable on the LangBot side. Validating
it locally would wrongly drop every skill, so LangBot trusts the path
and lets the Box runtime resolve it. The Box runtime only ever reports
skills it discovered on its own filesystem, so the path is valid there
by construction.
"""
skill_mgr = getattr(self.ap, 'skill_mgr', None)
if skill_mgr is None:
return []
from ..provider.tools.loaders import skill as skill_loader
validate_locally = self.shares_filesystem_with_box
visible_skills = skill_loader.get_visible_skills(self.ap, query)
mounts: list[dict] = []
for skill_name, skill_data in visible_skills.items():
package_root = str(skill_data.get('package_root', '') or '').strip()
if not package_root:
continue
if validate_locally and not os.path.isdir(package_root):
self.ap.logger.warning(
f'Skill "{skill_name}" package_root missing on filesystem '
f'({package_root}); skipping mount to prevent sandbox failures. '
f'The skill cache may be stale — consider reloading skills.'
)
continue
mounts.append(
{
'host_path': package_root,
'mount_path': f'/workspace/.skills/{skill_name}',
'mode': 'rw',
}
)
return mounts
async def execute_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
"""Execute an agent-facing ``exec`` tool call.
Translates the agent-facing ``command`` field to the internal
``BoxSpec.cmd`` field and injects the session id from the query.
"""
spec_payload: dict = {'cmd': parameters['command']}
# Pass through allowed agent-facing fields
for key in ('workdir', 'timeout_sec', 'env'):
if key in parameters:
spec_payload[key] = parameters[key]
# Inject context the agent must not control
spec_payload.setdefault('session_id', self.resolve_box_session_id(query))
# Mount all pipeline-bound skills so they are available in the container
if 'extra_mounts' not in spec_payload:
spec_payload['extra_mounts'] = self.build_skill_extra_mounts(query)
return await self.execute_spec_payload(spec_payload, query)
async def shutdown(self):
await self.client.shutdown()
def dispose(self):
if self._runtime_connector is not None:
self._runtime_connector.dispose()
loop = getattr(self.ap, 'event_loop', None)
if loop is not None and not loop.is_closed() and (self._shutdown_task is None or self._shutdown_task.done()):
self._shutdown_task = loop.create_task(self.shutdown())
async def get_sessions(self) -> list[dict]:
if not self._available:
return []
try:
return await self.client.get_sessions()
except Exception:
return []
def build_spec(self, spec_payload: dict, skip_host_mount_validation: bool = False) -> BoxSpec:
spec_payload = dict(spec_payload)
spec_payload.setdefault('env', {})
if spec_payload.get('host_path') in (None, '') and self.default_workspace is not None:
spec_payload['host_path'] = self.default_workspace
if spec_payload.get('workspace_quota_mb') in (None, '') and self.workspace_quota_mb is not None:
spec_payload['workspace_quota_mb'] = self.workspace_quota_mb
# Global custom image overrides profile default (but not caller-specified image)
if self.custom_image and 'image' not in spec_payload:
spec_payload['image'] = self.custom_image
self._apply_profile(spec_payload)
try:
spec = BoxSpec.model_validate(spec_payload)
except pydantic.ValidationError as exc:
first_error = exc.errors()[0]
raise BoxValidationError(first_error.get('msg', 'invalid box arguments')) from exc
if not skip_host_mount_validation:
self._validate_host_mount(spec)
return spec
async def create_session(self, spec_payload: dict, *, skip_host_mount_validation: bool = False) -> dict:
spec = self.build_spec(spec_payload, skip_host_mount_validation=skip_host_mount_validation)
return await self.client.create_session(spec)
async def start_managed_process(self, session_id: str, process_payload: dict) -> BoxManagedProcessInfo:
process_spec = BoxManagedProcessSpec.model_validate(process_payload)
return await self.client.start_managed_process(session_id, process_spec)
async def get_managed_process(self, session_id: str, process_id: str = 'default') -> BoxManagedProcessInfo:
return await self.client.get_managed_process(session_id, process_id)
async def stop_managed_process(self, session_id: str, process_id: str = 'default') -> None:
return await self.client.stop_managed_process(session_id, process_id)
def get_managed_process_websocket_url(self, session_id: str, process_id: str = 'default') -> str:
getter = getattr(self.client, 'get_managed_process_websocket_url', None)
if getter is None:
raise BoxValidationError('box runtime client does not support managed process websocket attach')
ws_relay_base_url = (
self._runtime_connector.ws_relay_base_url
if self._runtime_connector is not None
else 'http://127.0.0.1:5410'
)
return getter(session_id, ws_relay_base_url, process_id)
async def list_skills(self) -> list[dict]:
return await self.client.list_skills()
async def get_skill(self, name: str) -> dict | None:
return await self.client.get_skill(name)
async def create_skill(self, skill: dict) -> dict:
return await self.client.create_skill(skill)
async def update_skill(self, name: str, skill: dict) -> dict:
return await self.client.update_skill(name, skill)
async def delete_skill(self, name: str) -> None:
await self.client.delete_skill(name)
async def scan_skill_directory(self, path: str) -> dict:
return await self.client.scan_skill_directory(path)
async def list_skill_files(
self,
name: str,
path: str = '.',
include_hidden: bool = False,
max_entries: int = 200,
) -> dict:
return await self.client.list_skill_files(name, path, include_hidden, max_entries)
async def read_skill_file(self, name: str, path: str) -> dict:
return await self.client.read_skill_file(name, path)
async def write_skill_file(self, name: str, path: str, content: str) -> dict:
return await self.client.write_skill_file(name, path, content)
async def preview_skill_zip(
self,
file_bytes: bytes,
filename: str,
source_subdir: str = '',
target_suffix: str = 'upload',
) -> list[dict]:
return await self.client.preview_skill_zip(file_bytes, filename, source_subdir, target_suffix)
async def install_skill_zip(
self,
file_bytes: bytes,
filename: str,
source_paths: list[str] | None = None,
source_path: str = '',
source_subdir: str = '',
target_suffix: str = 'upload',
) -> list[dict]:
return await self.client.install_skill_zip(
file_bytes,
filename,
source_paths,
source_path,
source_subdir,
target_suffix,
)
def _serialize_result(self, result: BoxExecutionResult) -> dict:
stdout, stdout_truncated = self._truncate(result.stdout)
stderr, stderr_truncated = self._truncate(result.stderr)
return {
'session_id': result.session_id,
'backend': result.backend_name,
'status': result.status.value,
'ok': result.ok,
'exit_code': result.exit_code,
'stdout': stdout,
'stderr': stderr,
'stdout_truncated': stdout_truncated,
'stderr_truncated': stderr_truncated,
'duration_ms': result.duration_ms,
}
def _truncate(self, text: str) -> tuple[str, bool]:
if len(text) <= self.output_limit_chars:
return text, False
if self.output_limit_chars <= 0:
return '', True
head_size = 0
tail_size = 0
notice = ''
# Recompute once the omitted count is known so the final payload
# stays within output_limit_chars even after adding the notice.
for _ in range(4):
omitted = max(len(text) - head_size - tail_size, 0)
notice = f'\n\n... [{omitted} characters truncated] ...\n\n'
available = self.output_limit_chars - len(notice)
if available <= 0:
return notice[: self.output_limit_chars], True
new_head_size = int(available * 0.6)
new_tail_size = available - new_head_size
if new_head_size == head_size and new_tail_size == tail_size:
break
head_size = new_head_size
tail_size = new_tail_size
head = text[:head_size]
tail = text[-tail_size:] if tail_size else ''
truncated = f'{head}{notice}{tail}'
return truncated[: self.output_limit_chars], True
def _summarize_spec(self, spec: BoxSpec) -> dict:
cmd = spec.cmd.strip()
if len(cmd) > 400:
cmd = f'{cmd[:397]}...'
return {
'session_id': spec.session_id,
'workdir': spec.workdir,
'mount_path': spec.mount_path,
'timeout_sec': spec.timeout_sec,
'network': spec.network.value,
'image': spec.image,
'host_path': spec.host_path,
'host_path_mode': spec.host_path_mode.value,
'cpus': spec.cpus,
'memory_mb': spec.memory_mb,
'pids_limit': spec.pids_limit,
'read_only_rootfs': spec.read_only_rootfs,
'workspace_quota_mb': spec.workspace_quota_mb,
'env_keys': sorted(spec.env.keys()),
'cmd': cmd,
}
def _summarize_result(self, result: BoxExecutionResult) -> dict:
stdout_preview = result.stdout[:200]
stderr_preview = result.stderr[:200]
if len(result.stdout) > 200:
stdout_preview = f'{stdout_preview}...'
if len(result.stderr) > 200:
stderr_preview = f'{stderr_preview}...'
return {
'session_id': result.session_id,
'backend': result.backend_name,
'status': result.status.value,
'exit_code': result.exit_code,
'duration_ms': result.duration_ms,
'stdout_preview': stdout_preview,
'stderr_preview': stderr_preview,
}
def _local_config(self) -> dict:
"""Return ``box.local`` from instance config.
Environment overrides are applied uniformly by
``LoadConfigStage._apply_env_overrides_to_config`` (e.g.
``BOX__LOCAL__HOST_ROOT``) before this is read, so no box-specific
env parsing happens here.
"""
return dict(_get_box_config(self.ap).get('local') or {})
def _load_allowed_mount_roots(self) -> list[str]:
configured_roots = self._local_config().get('allowed_mount_roots', [])
# The unified env-override mechanism stores a brand-new key as a raw
# string when the key is absent from config.yaml. Accept a
# comma-separated string as well as a list so that
# ``BOX__LOCAL__ALLOWED_MOUNT_ROOTS="/a,/b"`` keeps working even when
# the config file has no ``box.local.allowed_mount_roots`` entry.
if isinstance(configured_roots, str):
configured_roots = [item.strip() for item in configured_roots.split(',') if item.strip()]
normalized_roots: list[str] = []
for root in configured_roots:
root_value = str(root).strip()
if not root_value:
continue
normalized_roots.append(os.path.realpath(os.path.abspath(root_value)))
if not normalized_roots and self.host_root is not None:
normalized_roots.append(self.host_root)
return normalized_roots
def _load_host_root(self) -> str | None:
host_root = str(self._local_config().get('host_root', '')).strip()
if not host_root:
return None
return os.path.realpath(os.path.abspath(host_root))
def _load_default_workspace(self) -> str | None:
default_workspace = str(self._local_config().get('default_workspace', '')).strip()
if not default_workspace:
if self.host_root is None:
return None
default_workspace = os.path.join(self.host_root, 'default')
elif not os.path.isabs(default_workspace) and self.host_root is not None:
default_workspace = os.path.join(self.host_root, default_workspace)
return os.path.realpath(os.path.abspath(default_workspace))
def get_skills_root(self) -> str | None:
skills_root = str(self._local_config().get('skills_root', '') or 'skills').strip()
if not skills_root:
skills_root = 'skills'
if not os.path.isabs(skills_root) and self.host_root is not None:
skills_root = os.path.join(self.host_root, skills_root)
return os.path.realpath(os.path.abspath(skills_root))
def _load_enabled(self) -> bool:
"""Read ``box.enabled`` (top-level, not ``box.local.*``). Default True
— disabling is opt-in. Accepts bool, ``'true'``/``'false'`` strings,
and the standard env-overridden truthy values that
``LoadConfigStage._apply_env_overrides_to_config`` produces."""
raw = _get_box_config(self.ap).get('enabled', True)
if isinstance(raw, bool):
return raw
return str(raw).strip().lower() not in ('false', '0', 'no', 'off', '')
def _load_custom_image(self) -> str | None:
raw = str(self._local_config().get('image', '') or '').strip()
return raw or None
def _forced_box_session_id_template(self) -> str:
"""Return the SaaS-forced sandbox-scope template, or '' when unset.
Read from ``system.limitation.force_box_session_id_template``. A
non-empty value pins every pipeline to a single sandbox scope
(e.g. ``'{global}'``) and cannot be overridden per-pipeline.
"""
limitation = (
(self.ap.instance_config.data or {}).get('system', {}).get('limitation', {})
if getattr(self.ap, 'instance_config', None) is not None
else {}
)
return str(limitation.get('force_box_session_id_template', '') or '').strip()
def _load_workspace_quota_mb(self) -> int | None:
raw_value = self._local_config().get('workspace_quota_mb')
if raw_value in (None, ''):
return None
try:
value = _INT_ADAPTER.validate_python(raw_value)
except pydantic.ValidationError as exc:
raise BoxValidationError('workspace_quota_mb must be an integer greater than or equal to 0') from exc
if value < 0:
raise BoxValidationError('workspace_quota_mb must be greater than or equal to 0')
return value
def _ensure_default_workspace(self):
if self.default_workspace is None:
return
if os.path.isdir(self.default_workspace):
return
if os.path.exists(self.default_workspace):
raise BoxValidationError('box.local.default_workspace must point to a directory on the host')
if not self.allowed_mount_roots:
raise BoxValidationError(
'box.local.default_workspace cannot be created because no allowed_mount_roots are configured'
)
for allowed_root in self.allowed_mount_roots:
if _is_path_under(self.default_workspace, allowed_root):
os.makedirs(self.default_workspace, exist_ok=True)
return
allowed_roots = ', '.join(self.allowed_mount_roots)
raise BoxValidationError(f'box.local.default_workspace is outside allowed_mount_roots: {allowed_roots}')
def _validate_host_mount(self, spec: BoxSpec):
if spec.host_path is None:
return
host_path = os.path.realpath(spec.host_path)
if not os.path.isdir(host_path):
raise BoxValidationError('host_path must point to an existing directory on the host')
if not self.allowed_mount_roots:
raise BoxValidationError('host_path mounting is disabled because no allowed_mount_roots are configured')
for allowed_root in self.allowed_mount_roots:
if _is_path_under(host_path, allowed_root):
return
allowed_roots = ', '.join(self.allowed_mount_roots)
raise BoxValidationError(f'host_path is outside allowed_mount_roots: {allowed_roots}')
def _load_profile(self) -> BoxProfile:
profile_name = str(self._local_config().get('profile', 'default')).strip() or 'default'
profile = BUILTIN_PROFILES.get(profile_name)
if profile is None:
available = ', '.join(sorted(BUILTIN_PROFILES))
raise BoxValidationError(f"unknown box profile '{profile_name}', available profiles: {available}")
return profile
def _apply_profile(self, params: dict):
"""Merge profile defaults into *params* in-place, enforce locked fields and clamp timeout."""
profile = self.profile
_PROFILE_FIELDS = (
'image',
'network',
'timeout_sec',
'host_path_mode',
'cpus',
'memory_mb',
'pids_limit',
'read_only_rootfs',
'workspace_quota_mb',
)
for field in _PROFILE_FIELDS:
profile_value = getattr(profile, field)
raw_value = profile_value.value if isinstance(profile_value, enum.Enum) else profile_value
if field in profile.locked:
params[field] = raw_value
elif field not in params:
params[field] = raw_value
timeout = params.get('timeout_sec')
try:
normalized_timeout = _INT_ADAPTER.validate_python(timeout)
except pydantic.ValidationError:
return
if normalized_timeout > profile.max_timeout_sec:
params['timeout_sec'] = profile.max_timeout_sec
def _get_workspace_size_bytes(self, root: str) -> int:
total = 0
def _walk(path: str):
nonlocal total
try:
with os.scandir(path) as entries:
for entry in entries:
try:
if entry.is_symlink():
total += entry.stat(follow_symlinks=False).st_size
continue
if entry.is_dir(follow_symlinks=False):
_walk(entry.path)
continue
total += entry.stat(follow_symlinks=False).st_size
except FileNotFoundError:
continue
except FileNotFoundError:
return
_walk(root)
return total
async def _enforce_workspace_quota(self, spec: BoxSpec, *, phase: str) -> None:
if spec.host_path is None or spec.workspace_quota_mb <= 0:
return
host_path = os.path.realpath(spec.host_path)
if not os.path.isdir(host_path):
return
# Walk the workspace off the event loop — this runs on every
# quota-enforced exec, and a large tree would otherwise block the whole
# asyncio runtime (all bots/pipelines) for the duration of the scan.
used_bytes = await asyncio.to_thread(self._get_workspace_size_bytes, host_path)
limit_bytes = spec.workspace_quota_mb * _MIB
if used_bytes <= limit_bytes:
return
raise BoxValidationError(
f'workspace quota exceeded {phase}: '
f'used={used_bytes} bytes limit={limit_bytes} bytes '
f'host_path={host_path} session_id={spec.session_id}'
)
async def _cleanup_exceeded_session(self, spec: BoxSpec) -> None:
try:
await self.client.delete_session(spec.session_id)
except Exception as exc:
self.ap.logger.warning(
'Failed to clean up Box session after workspace quota was exceeded: '
f'session_id={spec.session_id} error={exc}'
)
# ── Observability ─────────────────────────────────────────────────
def _record_error(self, exc: Exception, query: pipeline_query.Query):
self._recent_errors.append(
{
'timestamp': _dt.datetime.now(_UTC).isoformat(),
'type': type(exc).__name__,
'message': str(exc),
'query_id': str(query.query_id),
}
)
def get_recent_errors(self) -> list[dict]:
return list(self._recent_errors)
def get_system_guidance(self) -> str:
"""Return LLM system-prompt guidance for the exec tool.
All execution-specific prompt text is kept here so that callers
(e.g. LocalAgentRunner) stay free of box domain knowledge.
"""
guidance = (
'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, '
'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
'JSON, or other data and asks for a computed answer, prefer running a short Python script via exec '
'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation '
'details, do not include the generated script in the final answer; return the result and a brief explanation only.'
)
if self.default_workspace:
guidance += (
' A default workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
'modify local files in the working directory, use exec with /workspace paths directly; do not ask the '
'user for directory parameters unless they explicitly need a different directory.'
)
return guidance
async def get_status(self) -> dict:
if not self._available:
return {
'available': False,
'enabled': self._enabled,
'profile': self.profile.name,
'recent_error_count': len(self._recent_errors),
'connector_error': self._connector_error,
}
try:
runtime_status = await self.client.get_status()
except Exception as exc:
# RPC failed — the runtime likely just disconnected and the
# heartbeat hasn't flipped _available yet.
return {
'available': False,
'enabled': self._enabled,
'profile': self.profile.name,
'recent_error_count': len(self._recent_errors),
'connector_error': str(exc),
}
# Backend state can be unavailable even when the connector is healthy
# (operator selected nsjail but the binary is missing, Docker daemon
# went down after the runtime started, E2B credentials wrong, ...).
# Report the combined state in the top-level ``available`` so the
# frontend banner / ``useBoxStatus`` hook / native-tool gate all
# agree on "actually usable" rather than "connector alive". The
# detailed ``backend`` object stays in the payload so the dialog
# can still show which backend was tried.
backend_info = runtime_status.get('backend') if isinstance(runtime_status, dict) else None
backend_ok = bool(backend_info and backend_info.get('available', False))
payload = {
**runtime_status,
'available': backend_ok,
'enabled': self._enabled,
'profile': self.profile.name,
'recent_error_count': len(self._recent_errors),
}
if not backend_ok and 'connector_error' not in payload:
backend_name = backend_info.get('name') if backend_info else None
if backend_name:
payload['connector_error'] = f'Configured sandbox backend "{backend_name}" is unavailable'
else:
payload['connector_error'] = 'No supported sandbox backend (Docker / nsjail / E2B) is available'
return payload
+413
View File
@@ -0,0 +1,413 @@
"""Reusable workspace/session helpers built on top of Box.
This module is the middle layer between the raw Box runtime primitives and
application-specific flows such as skills or MCP stdio.
It intentionally stays generic:
- path and virtualenv rewriting are workspace concerns
- Python project detection/bootstrap are workspace concerns
- session exec / managed-process helpers are workspace concerns
Higher layers add their own semantics on top, for example:
- skills choose a stable per-skill session id and use repeated exec
- MCP stdio chooses how to prepare dependencies and attaches to a managed process
"""
from __future__ import annotations
import os
import textwrap
from typing import Any
PYTHON_MANIFEST_FILES = (
'requirements.txt',
'pyproject.toml',
'setup.py',
'setup.cfg',
)
_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'})
_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'})
def normalize_host_path(path: str | None) -> str:
if path is None:
return ''
stripped = str(path).strip()
if not stripped:
return ''
return os.path.realpath(os.path.abspath(stripped))
def rewrite_mounted_path(path: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
"""Translate a host path into the path visible inside the sandbox mount."""
if not host_path or not path:
return path
normalized_host = os.path.realpath(host_path)
normalized_path = os.path.realpath(path)
if normalized_path.startswith(normalized_host + '/'):
return mount_path + normalized_path[len(normalized_host) :]
if normalized_path == normalized_host:
return mount_path
return path
def unwrap_venv_path(directory: str) -> str:
"""Collapse ``.../.venv/bin`` style paths back to the project root."""
parts = directory.replace('\\', '/').split('/')
for i in range(len(parts) - 1, 0, -1):
if parts[i] in _VENV_BIN_DIRS and i >= 1:
venv_dir = parts[i - 1]
if venv_dir in _VENV_DIRS:
project_root = '/'.join(parts[: i - 1])
return project_root if project_root else '/'
return directory
def infer_workspace_host_path(command: str, args: list[str] | None = None) -> str | None:
"""Infer the project/workspace root from absolute command/arg paths."""
candidates: list[str] = []
for part in [command, *(args or [])]:
if not os.path.isabs(part):
continue
if os.path.exists(part):
directory = os.path.dirname(part)
candidates.append(os.path.realpath(unwrap_venv_path(directory)))
if not candidates:
return None
common = os.path.commonpath(candidates)
return common if common != '/' else None
def rewrite_venv_command(command: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
"""Rewrite host venv interpreters to plain ``python`` inside the sandbox.
Once a project is mounted into the sandbox, host virtualenv paths are no
longer valid. For those paths we intentionally drop down to ``python`` and
let the sandbox-side environment/bootstrap decide what interpreter to use.
"""
if not host_path or not command:
return command
normalized_host = os.path.realpath(host_path)
normalized_command = os.path.realpath(command)
if not normalized_command.startswith(normalized_host + '/'):
return command
rel = normalized_command[len(normalized_host) + 1 :]
parts = rel.replace('\\', '/').split('/')
if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'):
return 'python'
return rewrite_mounted_path(normalized_command, host_path, mount_path=mount_path)
def list_python_manifest_files(host_path: str | None) -> list[str]:
normalized_root = normalize_host_path(host_path)
if not normalized_root:
return []
return [filename for filename in PYTHON_MANIFEST_FILES if os.path.isfile(os.path.join(normalized_root, filename))]
def classify_python_workspace(host_path: str | None) -> str | None:
"""Return the generic Python workspace shape, without app-specific policy."""
manifest_files = set(list_python_manifest_files(host_path))
if not manifest_files:
return None
if {'pyproject.toml', 'setup.py', 'setup.cfg'} & manifest_files:
return 'package'
if 'requirements.txt' in manifest_files:
return 'requirements'
return None
def should_prepare_python_env(host_path: str | None) -> bool:
normalized_root = normalize_host_path(host_path)
if not normalized_root:
return False
if os.path.isdir(os.path.join(normalized_root, '.venv')):
return True
return bool(list_python_manifest_files(normalized_root))
def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace') -> str:
"""Wrap a command with a reusable sandbox-local Python env bootstrap.
This is the generic "workspace is a Python project" path used by mutable
workspaces such as skills. Read-only installation strategies stay in the
higher-level caller because they are application policy, not workspace
semantics.
"""
bootstrap = textwrap.dedent(
f"""
set -e
_LB_VENV_DIR="{mount_path}/.venv"
_LB_META_DIR="{mount_path}/.langbot"
_LB_META_FILE="$_LB_META_DIR/python-env.json"
_LB_LOCK_DIR="$_LB_META_DIR/python-env.lock"
_LB_TMP_DIR="{mount_path}/.tmp"
_LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
export TMPDIR="$_LB_TMP_DIR"
export TEMP="$_LB_TMP_DIR"
export TMP="$_LB_TMP_DIR"
export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
_lb_python_meta() {{
python - <<'PY'
import hashlib
import json
import os
import sys
root = "{mount_path}"
digest = hashlib.sha256()
manifest_files = []
for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"):
path = os.path.join(root, rel)
if not os.path.isfile(path):
continue
manifest_files.append(rel)
with open(path, "rb") as handle:
digest.update(rel.encode("utf-8"))
digest.update(b"\\0")
digest.update(handle.read())
digest.update(b"\\0")
print(
json.dumps(
{{
"python_executable": sys.executable,
"python_version": list(sys.version_info[:3]),
"manifest_files": manifest_files,
"manifest_sha256": digest.hexdigest(),
}},
sort_keys=True,
)
)
PY
}}
_LB_CURRENT_META="$(_lb_python_meta)"
_LB_NEEDS_BOOTSTRAP=0
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
_LB_NEEDS_BOOTSTRAP=1
elif [ ! -f "$_LB_META_FILE" ]; then
_LB_NEEDS_BOOTSTRAP=1
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
_LB_NEEDS_BOOTSTRAP=1
fi
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
_LB_LOCK_WAIT=0
while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
exit 1
fi
sleep 1
_LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
done
_lb_cleanup_lock() {{
rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
}}
trap _lb_cleanup_lock EXIT INT TERM
_LB_CURRENT_META="$(_lb_python_meta)"
_LB_NEEDS_BOOTSTRAP=0
if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
_LB_NEEDS_BOOTSTRAP=1
elif [ ! -f "$_LB_META_FILE" ]; then
_LB_NEEDS_BOOTSTRAP=1
elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
_LB_NEEDS_BOOTSTRAP=1
fi
if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
rm -rf "$_LB_VENV_DIR"
python -m venv "$_LB_VENV_DIR"
. "$_LB_VENV_DIR/bin/activate"
python -m pip install --upgrade pip setuptools wheel
if [ -f "{mount_path}/requirements.txt" ]; then
python -m pip install -r "{mount_path}/requirements.txt"
elif [ -f "{mount_path}/pyproject.toml" ] || [ -f "{mount_path}/setup.py" ] || [ -f "{mount_path}/setup.cfg" ]; then
python -m pip install "{mount_path}"
fi
printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE"
fi
fi
export VIRTUAL_ENV="$_LB_VENV_DIR"
export PATH="$_LB_VENV_DIR/bin:$PATH"
{command}
"""
).strip()
return bootstrap + '\n'
class BoxWorkspaceSession:
"""High-level handle for one reusable workspace-backed Box session.
The Box runtime already understands sessions and managed processes. This
wrapper adds LangBot's workspace-centric view on top: a mounted host path,
a stable ``session_id``, optional environment defaults, and convenience
helpers for exec or long-running processes inside that workspace.
"""
def __init__(
self,
box_service,
session_id: str,
*,
host_path: str | None = None,
host_path_mode: str = 'rw',
workdir: str = '/workspace',
env: dict[str, str] | None = None,
mount_path: str = '/workspace',
network: str | None = None,
read_only_rootfs: bool | None = None,
image: str | None = None,
cpus: float | None = None,
memory_mb: int | None = None,
pids_limit: int | None = None,
persistent: bool = False,
):
self.box_service = box_service
self.session_id = session_id
self.host_path = host_path
self.host_path_mode = host_path_mode
self.workdir = workdir
self.env = dict(env or {})
self.mount_path = mount_path
self.network = network
self.read_only_rootfs = read_only_rootfs
self.image = image
self.cpus = cpus
self.memory_mb = memory_mb
self.pids_limit = pids_limit
self.persistent = persistent
def rewrite_path(self, path: str) -> str:
return rewrite_mounted_path(path, self.host_path, mount_path=self.mount_path)
def rewrite_venv_command(self, command: str) -> str:
return rewrite_venv_command(command, self.host_path, mount_path=self.mount_path)
def build_session_payload(self) -> dict[str, Any]:
# Keep this payload generic so callers can reuse the same workspace
# handle for plain exec, file-producing tasks, or managed processes.
payload: dict[str, Any] = {
'session_id': self.session_id,
'workdir': self.workdir,
'env': self.env,
'persistent': self.persistent,
}
if self.network is not None:
payload['network'] = self.network
if self.read_only_rootfs is not None:
payload['read_only_rootfs'] = self.read_only_rootfs
if self.host_path:
payload['host_path'] = self.host_path
payload['host_path_mode'] = self.host_path_mode
for key in ('image', 'cpus', 'memory_mb', 'pids_limit'):
value = getattr(self, key)
if value is not None:
payload[key] = value
return payload
def build_exec_payload(
self,
cmd: str,
*,
workdir: str | None = None,
env: dict[str, str] | None = None,
timeout_sec: int | None = None,
) -> dict[str, Any]:
# Exec payloads inherit the session-level workspace config, then layer
# per-call command/workdir/env overrides on top.
payload = self.build_session_payload()
payload['cmd'] = cmd
payload['workdir'] = workdir or self.workdir
if timeout_sec is not None:
payload['timeout_sec'] = timeout_sec
resolved_env = self.env if env is None else env
if resolved_env:
payload['env'] = resolved_env
elif 'env' in payload and not payload['env']:
payload.pop('env')
return payload
async def execute_raw(
self,
cmd: str,
*,
workdir: str | None = None,
env: dict[str, str] | None = None,
timeout_sec: int | None = None,
):
payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
return await self.box_service.client.execute(self.box_service.build_spec(payload))
async def execute_for_query(
self,
query,
cmd: str,
*,
workdir: str | None = None,
env: dict[str, str] | None = None,
timeout_sec: int | None = None,
) -> dict:
payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
return await self.box_service.execute_spec_payload(payload, query)
async def create_session(self):
return await self.box_service.create_session(self.build_session_payload())
def build_process_payload(
self,
command: str,
args: list[str] | None = None,
*,
env: dict[str, str] | None = None,
cwd: str = '/workspace',
) -> dict[str, Any]:
# Managed processes run inside the same workspace model as one-shot
# execs, so path/venv rewriting is shared here.
normalized_command = command
normalized_args = list(args or [])
normalized_cwd = cwd
if self.host_path:
normalized_command = self.rewrite_venv_command(command)
normalized_args = [self.rewrite_path(arg) for arg in normalized_args]
normalized_cwd = self.rewrite_path(cwd)
return {
'command': normalized_command,
'args': normalized_args,
'env': dict(env or {}),
'cwd': normalized_cwd,
}
async def start_managed_process(
self,
command: str,
args: list[str] | None = None,
*,
process_id: str = 'default',
env: dict[str, str] | None = None,
cwd: str = '/workspace',
):
payload = self.build_process_payload(command, args, env=env, cwd=cwd)
payload['process_id'] = process_id
return await self.box_service.start_managed_process(self.session_id, payload)
async def get_managed_process(self, process_id: str = 'default'):
return await self.box_service.get_managed_process(self.session_id, process_id)
async def stop_managed_process(self, process_id: str = 'default') -> None:
await self.box_service.stop_managed_process(self.session_id, process_id)
def get_managed_process_websocket_url(self, process_id: str = 'default') -> str:
return self.box_service.get_managed_process_websocket_url(self.session_id, process_id)
async def cleanup(self) -> None:
await self.box_service.client.delete_session(self.session_id)
+12 -2
View File
@@ -9,6 +9,7 @@ from ..platform import botmgr as im_mgr
from ..platform.webhook_pusher import WebhookPusher
from ..provider.session import sessionmgr as llm_session_mgr
from ..provider.modelmgr import modelmgr as llm_model_mgr
from ..box import service as box_service_module
from langbot.pkg.provider.tools import toolmgr as llm_tool_mgr
from ..config import manager as config_mgr
@@ -31,8 +32,8 @@ from ..api.http.service import mcp as mcp_service
from ..api.http.service import apikey as apikey_service
from ..api.http.service import webhook as webhook_service
from ..api.http.service import monitoring as monitoring_service
from ..api.http.service import skill as skill_service
from ..api.http.service import maintenance as maintenance_service
from ..discover import engine as discover_engine
from ..storage import mgr as storagemgr
from ..utils import logcache
@@ -43,6 +44,7 @@ from ..rag.service import RAGRuntimeService
from ..vector import mgr as vectordb_mgr
from ..telemetry import telemetry as telemetry_module
from ..survey import manager as survey_module
from ..skill import manager as skill_mgr
class Application:
@@ -70,6 +72,7 @@ class Application:
# TODO move to pipeline
tool_mgr: llm_tool_mgr.ToolManager = None
box_service: box_service_module.BoxService = None
# ======= Config manager =======
@@ -156,6 +159,10 @@ class Application:
monitoring_service: monitoring_service.MonitoringService = None
skill_service: skill_service.SkillService = None
skill_mgr: skill_mgr.SkillManager = None
maintenance_service: maintenance_service.MaintenanceService = None
def __init__(self):
@@ -301,7 +308,10 @@ class Application:
return parsed
def dispose(self):
self.plugin_connector.dispose()
if self.plugin_connector is not None:
self.plugin_connector.dispose()
if self.box_service is not None:
self.box_service.dispose()
async def print_web_access_info(self):
"""Print access webui tips"""
+5 -1
View File
@@ -46,12 +46,14 @@ async def make_app(loop: asyncio.AbstractEventLoop) -> app.Application:
async def main(loop: asyncio.AbstractEventLoop):
app_inst: app.Application | None = None
try:
# Hang system signal processing
import signal
def signal_handler(sig, frame):
app_inst.dispose()
if app_inst is not None:
app_inst.dispose()
print('[Signal] Program exit.')
os._exit(0)
@@ -60,4 +62,6 @@ async def main(loop: asyncio.AbstractEventLoop):
app_inst = await make_app(loop)
await app_inst.run()
except Exception:
if app_inst is not None:
app_inst.dispose()
traceback.print_exc()
+67 -7
View File
@@ -1,5 +1,6 @@
import logging
import logging.handlers
import os
import sys
import time
@@ -20,6 +21,66 @@ log_colors_config = {
LOG_FILE_MAX_BYTES = 10 * 1024 * 1024 # 10MB per file
LOG_FILE_BACKUP_COUNT = 5 # Keep 5 backup files (total ~50MB max)
LOG_DIR = 'data/logs'
class DailyGroupedRotatingFileHandler(logging.handlers.RotatingFileHandler):
"""File handler that writes to ``data/logs/langbot-YYYY-MM-DD.log``.
It combines two rotation triggers:
* **Size** — within a single day the file is rotated once it exceeds
``maxBytes``, producing numbered backups (``langbot-DATE.log.1`` etc.),
exactly like :class:`~logging.handlers.RotatingFileHandler`.
* **Date** — when the local date changes, logging switches to a fresh
``langbot-<new date>.log`` file. This happens even within a single
long-running process, so a bot started on day N keeps writing to that
day's file and rolls over to day N+1's file at midnight, instead of
appending every subsequent day's logs to the start-day file.
The on-disk naming stays compatible with the log-retention cleanup in
``api/http/service/maintenance.py`` (``LOG_FILE_PATTERN``).
"""
def __init__(self, log_dir: str, max_bytes: int, backup_count: int, encoding: str = 'utf-8'):
self.log_dir = log_dir
self._current_date = self._today()
super().__init__(
self._build_path(self._current_date),
maxBytes=max_bytes,
backupCount=backup_count,
encoding=encoding,
)
@staticmethod
def _today() -> str:
return time.strftime('%Y-%m-%d', time.localtime())
def _build_path(self, date_str: str) -> str:
return os.path.join(self.log_dir, 'langbot-%s.log' % date_str)
def shouldRollover(self, record):
# Roll over when the day changes, regardless of file size.
if self._today() != self._current_date:
return True
return super().shouldRollover(record)
def doRollover(self):
today = self._today()
if today != self._current_date:
# Date changed: point the handler at the new day's file.
# This is a date switch, not a size-based numbered rotation.
if self.stream:
self.stream.close()
self.stream = None
self._current_date = today
self.baseFilename = os.path.abspath(self._build_path(today))
if not self.delay:
self.stream = self._open()
else:
# Same day, file exceeded maxBytes: numbered rotation.
super().doRollover()
async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging.Logger:
# Remove all existing loggers
@@ -31,8 +92,6 @@ async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging.
if constants.debug_mode:
level = logging.DEBUG
log_file_name = 'data/logs/langbot-%s.log' % time.strftime('%Y-%m-%d', time.localtime())
qcg_logger = logging.getLogger('langbot')
qcg_logger.setLevel(level)
@@ -48,12 +107,13 @@ async def init_logging(extra_handlers: list[logging.Handler] = None) -> logging.
# stream_handler.setFormatter(color_formatter)
stream_handler.stream = open(sys.stdout.fileno(), mode='w', encoding='utf-8', buffering=1)
# Use RotatingFileHandler to prevent unbounded log file growth
rotating_file_handler = logging.handlers.RotatingFileHandler(
log_file_name,
# Rotate by size within a day and switch files when the date changes,
# so long-running processes still produce a log file for the current day.
rotating_file_handler = DailyGroupedRotatingFileHandler(
LOG_DIR,
max_bytes=LOG_FILE_MAX_BYTES,
backup_count=LOG_FILE_BACKUP_COUNT,
encoding='utf-8',
maxBytes=LOG_FILE_MAX_BYTES,
backupCount=LOG_FILE_BACKUP_COUNT,
)
log_handlers: list[logging.Handler] = [
@@ -0,0 +1,27 @@
from __future__ import annotations
from .. import migration
@migration.migration_class('weknora-api-config', 42)
class WeKnoraAPICfgMigration(migration.Migration):
"""WeKnora API 配置迁移"""
async def need_migrate(self) -> bool:
"""判断当前环境是否需要运行此迁移"""
return 'weknora-api' not in self.ap.provider_cfg.data
async def run(self):
"""执行迁移"""
self.ap.provider_cfg.data['weknora-api'] = {
'base-url': 'http://localhost:8080/api/v1',
'app-type': 'agent',
'api-key': '',
'agent-id': 'builtin-smart-reasoning',
'knowledge-base-ids': [],
'web-search-enabled': False,
'timeout': 120,
'base-prompt': '请回答用户的问题。',
}
await self.ap.provider_cfg.dump_config()
@@ -0,0 +1,30 @@
from __future__ import annotations
from .. import migration
@migration.migration_class('deerflow-api-config', 43)
class DeerFlowAPICfgMigration(migration.Migration):
"""DeerFlow API 配置迁移"""
async def need_migrate(self) -> bool:
"""判断当前环境是否需要运行此迁移"""
return 'deerflow-api' not in self.ap.provider_cfg.data
async def run(self):
"""执行迁移"""
self.ap.provider_cfg.data['deerflow-api'] = {
'api-base': 'http://127.0.0.1:2026',
'api-key': '',
'auth-header': '',
'assistant-id': 'lead_agent',
'model-name': '',
'thinking-enabled': False,
'plan-mode': False,
'subagent-enabled': False,
'max-concurrent-subagents': 3,
'timeout': 300,
'recursion-limit': 1000,
}
await self.ap.provider_cfg.dump_config()
+15
View File
@@ -6,6 +6,7 @@ from .. import stage, app
from ...utils import version, proxy
from ...pipeline import pool, controller, pipelinemgr
from ...pipeline import aggregator as message_aggregator
from ...box import service as box_service
from ...plugin import connector as plugin_connector
from ...command import cmdmgr
from ...provider.session import sessionmgr as llm_session_mgr
@@ -28,6 +29,8 @@ from ...api.http.service import mcp as mcp_service
from ...api.http.service import apikey as apikey_service
from ...api.http.service import webhook as webhook_service
from ...api.http.service import monitoring as monitoring_service
from ...api.http.service import skill as skill_service
from ...skill import manager as skill_mgr
from ...api.http.service import maintenance as maintenance_service
from ...discover import engine as discover_engine
from ...storage import mgr as storagemgr
@@ -86,6 +89,9 @@ class BuildAppStage(stage.BootingStage):
webhook_service_inst = webhook_service.WebhookService(ap)
ap.webhook_service = webhook_service_inst
skill_service_inst = skill_service.SkillService(ap)
ap.skill_service = skill_service_inst
proxy_mgr = proxy.ProxyManager(ap)
await proxy_mgr.initialize()
ap.proxy_mgr = proxy_mgr
@@ -129,6 +135,10 @@ class BuildAppStage(stage.BootingStage):
await llm_session_mgr_inst.initialize()
ap.sess_mgr = llm_session_mgr_inst
box_service_inst = box_service.BoxService(ap)
await box_service_inst.initialize()
ap.box_service = box_service_inst
llm_tool_mgr_inst = llm_tool_mgr.ToolManager(ap)
await llm_tool_mgr_inst.initialize()
ap.tool_mgr = llm_tool_mgr_inst
@@ -149,6 +159,11 @@ class BuildAppStage(stage.BootingStage):
msg_aggregator_inst = message_aggregator.MessageAggregator(ap)
ap.msg_aggregator = msg_aggregator_inst
# Initialize skill manager
skill_mgr_inst = skill_mgr.SkillManager(ap)
await skill_mgr_inst.initialize()
ap.skill_mgr = skill_mgr_inst
rag_mgr_inst = rag_mgr.RAGManager(ap)
await rag_mgr_inst.initialize()
ap.rag_mgr = rag_mgr_inst
@@ -11,6 +11,10 @@ class MCPServer(Base):
enable = sqlalchemy.Column(sqlalchemy.Boolean, nullable=False, default=False)
mode = sqlalchemy.Column(sqlalchemy.String(255), nullable=False) # stdio, sse, http
extra_args = sqlalchemy.Column(sqlalchemy.JSON, nullable=False, default={})
# Markdown documentation captured from LangBot Space at install time so the
# detail page can show docs even when the server is offline / has no tools.
# Empty string for manually-created servers that have no marketplace README.
readme = sqlalchemy.Column(sqlalchemy.Text, nullable=False, server_default='', default='')
created_at = sqlalchemy.Column(sqlalchemy.DateTime, nullable=False, server_default=sqlalchemy.func.now())
updated_at = sqlalchemy.Column(
sqlalchemy.DateTime,
@@ -0,0 +1,34 @@
"""add readme column to mcp_servers
Revision ID: 0004_add_mcp_readme
Revises: 0003_add_rerank_models
Create Date: 2026-06-06
"""
import sqlalchemy as sa
from alembic import op
revision = '0004_add_mcp_readme'
down_revision = '0003_add_rerank_models'
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add ``readme`` to mcp_servers if the table exists and the column is missing
# (the table may have been created by create_all() with the column already
# present on fresh installs, so guard against duplicate-add).
conn = op.get_bind()
inspector = sa.inspect(conn)
if 'mcp_servers' not in inspector.get_table_names():
return
columns = {col['name'] for col in inspector.get_columns('mcp_servers')}
if 'readme' not in columns:
op.add_column(
'mcp_servers',
sa.Column('readme', sa.Text(), nullable=False, server_default=''),
)
def downgrade() -> None:
op.drop_column('mcp_servers', 'readme')
+1
View File
@@ -275,6 +275,7 @@ class MessageAggregator:
message_chain=merged_chain,
adapter=base_msg.adapter,
pipeline_uuid=base_msg.pipeline_uuid,
routed_by_rule=any(msg.routed_by_rule for msg in messages),
)
async def flush_all(self) -> None:
@@ -76,6 +76,10 @@ class LongTextProcessStage(stage.PipelineStage):
self.ap.logger.debug('Long message processing strategy is not set, skip long message processing.')
return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query)
if not query.resp_message_chain:
self.ap.logger.debug('Response message chain is empty, skip long message processing.')
return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query)
# 检查是否包含非 Plain 组件
contains_non_plain = False
+1
View File
@@ -63,6 +63,7 @@ class QueryPool:
self.cached_queries[query_id] = query
self.query_id_counter += 1
self.condition.notify_all()
return query
async def __aenter__(self):
await self.pool_lock.acquire()
+76 -2
View File
@@ -32,6 +32,9 @@ class PreProcessor(stage.PipelineStage):
) -> entities.StageProcessResult:
"""Process"""
selected_runner = query.pipeline_config['ai']['runner']['runner']
include_skill_authoring = (
selected_runner == 'local-agent' and getattr(self.ap, 'skill_service', None) is not None
)
session = await self.ap.sess_mgr.get_session(query)
@@ -110,7 +113,11 @@ class PreProcessor(stage.PipelineStage):
# Get bound plugins and MCP servers for filtering tools
bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
query.use_funcs = await self.ap.tool_mgr.get_all_tools(bound_plugins, bound_mcp_servers)
query.use_funcs = await self.ap.tool_mgr.get_all_tools(
bound_plugins,
bound_mcp_servers,
include_skill_authoring=include_skill_authoring,
)
self.ap.logger.debug(f'Bound plugins: {bound_plugins}')
self.ap.logger.debug(f'Bound MCP servers: {bound_mcp_servers}')
@@ -121,7 +128,11 @@ class PreProcessor(stage.PipelineStage):
if not query.use_funcs and query.variables.get('_fallback_model_uuids'):
bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
query.use_funcs = await self.ap.tool_mgr.get_all_tools(bound_plugins, bound_mcp_servers)
query.use_funcs = await self.ap.tool_mgr.get_all_tools(
bound_plugins,
bound_mcp_servers,
include_skill_authoring=include_skill_authoring,
)
sender_name = ''
@@ -248,4 +259,67 @@ class PreProcessor(stage.PipelineStage):
query.prompt.messages = event_ctx.event.default_prompt
query.messages = event_ctx.event.prompt
# =========== Skill awareness for the local-agent runner ===========
# The actual activation goes through the ``activate`` Tool Call so the
# LLM doesn't see full SKILL.md instructions until it commits to a
# skill (Claude Code's progressive disclosure). But the LLM still has
# to KNOW which skills exist to make that choice, so we:
# 1. resolve the pipeline's bound skills and stash them in
# ``query.variables['_pipeline_bound_skills']`` for downstream
# visibility checks (skill loader, native exec workdir);
# 2. inject a short ``Available Skills`` index (name + description
# only) into the system prompt. The contributor's original PR
# relied on this injection; without it the LLM never discovers
# the skills are there and just calls native tools instead.
if selected_runner == 'local-agent' and self.ap.skill_mgr:
pipeline_data = await self.ap.pipeline_service.get_pipeline(query.pipeline_uuid)
extensions_prefs = (pipeline_data or {}).get('extensions_preferences', {})
enable_all_skills = extensions_prefs.get('enable_all_skills', True)
if enable_all_skills:
bound_skills = None # None = all loaded skills are visible
else:
bound_skills = extensions_prefs.get('skills', [])
query.variables['_pipeline_bound_skills'] = bound_skills
skill_addition = self.ap.skill_mgr.build_skill_aware_prompt_addition(
bound_skills=bound_skills,
)
if skill_addition:
# Append to the first system message; create one if the
# prompt has none. Handles both plain-string and
# content-element (list) message bodies.
if query.prompt.messages and query.prompt.messages[0].role == 'system':
head = query.prompt.messages[0]
if isinstance(head.content, str):
head.content = head.content + skill_addition
elif isinstance(head.content, list):
appended = False
for ce in head.content:
if getattr(ce, 'type', None) == 'text':
ce.text = (ce.text or '') + skill_addition
appended = True
break
if not appended:
head.content.append(provider_message.ContentElement(type='text', text=skill_addition))
else:
query.prompt.messages.insert(
0,
provider_message.Message(role='system', content=skill_addition.strip()),
)
self.ap.logger.debug(
f'Skill index injected into system prompt: '
f'pipeline={query.pipeline_uuid} '
f'bound_skills={bound_skills or "all"} '
f'loaded_skills={len(self.ap.skill_mgr.skills)}'
)
else:
self.ap.logger.debug(
f'No skills available for prompt injection: '
f'pipeline={query.pipeline_uuid} '
f'loaded_skills={len(self.ap.skill_mgr.skills)} '
f'bound_skills={bound_skills}'
)
return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query)
@@ -5,6 +5,7 @@ import abc
from ...core import app
from .. import entities
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
class MessageHandler(metaclass=abc.ABCMeta):
@@ -31,3 +32,29 @@ class MessageHandler(metaclass=abc.ABCMeta):
if len(s0) > 20 or '\n' in s:
s0 = s0[:20] + '...'
return s0
def format_result_log(
self,
result: provider_message.Message | provider_message.MessageChunk,
) -> str | None:
if result.tool_calls:
tool_names = [tc.function.name for tc in result.tool_calls if tc.function and tc.function.name]
if tool_names:
return f'{result.role}: requested tools: {", ".join(tool_names)}'
return f'{result.role}: requested tool calls'
content = result.content
if isinstance(content, str):
if not content.strip():
return None
if result.role == 'tool':
if content.startswith('err:'):
return f'tool error: {self.cut_str(content)}'
return self.cut_str(result.readable_str())
if isinstance(content, list) and len(content) == 0:
return None
return self.cut_str(result.readable_str())
@@ -113,9 +113,11 @@ class ChatMessageHandler(handler.MessageHandler):
# This prevents memory overflow from thousands of log entries per conversation
# First chunk uses INFO level to confirm connection establishment
if chunk_count == 1:
self.ap.logger.info(
f'Conversation({query.query_id}) Streaming started: {self.cut_str(result.readable_str())}'
)
summary = self.format_result_log(result)
if summary is not None:
self.ap.logger.info(f'Conversation({query.query_id}) Streaming started: {summary}')
else:
self.ap.logger.info(f'Conversation({query.query_id}) Streaming started')
elif chunk_count % 10 == 0:
self.ap.logger.debug(
f'Conversation({query.query_id}) Streaming chunk {chunk_count}: {self.cut_str(result.readable_str())}'
@@ -135,9 +137,9 @@ class ChatMessageHandler(handler.MessageHandler):
async for result in runner.run(query):
query.resp_messages.append(result)
self.ap.logger.info(
f'Conversation({query.query_id}) Response: {self.cut_str(result.readable_str())}'
)
summary = self.format_result_log(result)
if summary is not None:
self.ap.logger.info(f'Conversation({query.query_id}) Response: {summary}')
if result.content is not None:
text_length += len(result.content)
@@ -3,6 +3,7 @@ import typing
import asyncio
import traceback
import datetime
import json
import aiocqhttp
import pydantic
@@ -293,6 +294,29 @@ class AiocqhttpMessageConverter(abstract_platform_adapter.AbstractMessageConvert
elif msg.type == 'dice':
face_id = msg.data['result']
yiri_msg_list.append(platform_message.Face(face_type='dice', face_id=int(face_id), face_name='骰子'))
elif msg.type == 'json':
try:
raw = msg.data.get('data', {})
if isinstance(raw, str):
raw = json.loads(raw)
if isinstance(raw, dict):
_meta = raw.get('meta', {}) or {}
if isinstance(_meta, dict):
_detail = _meta.get('detail_1') or _meta.get('music') or _meta.get('news') or {}
else:
_detail = {}
if isinstance(_detail, dict):
preview = _detail.get('preview', '')
title = _detail.get('desc', '') or _detail.get('title', '')
url = _detail.get('qqdocurl', '') or _detail.get('jumpUrl', '')
else:
preview = title = url = ''
text = ' '.join([f'[{raw.get("app", "")}]', preview, title, url]).strip()
yiri_msg_list.append(platform_message.Plain(text=text or '[收到一张JSON卡片]'))
else:
yiri_msg_list.append(platform_message.Plain(text=str(raw)))
except Exception:
yiri_msg_list.append(platform_message.Plain(text='[收到一张JSON卡片]'))
chain = platform_message.MessageChain(yiri_msg_list)
@@ -19,6 +19,18 @@ spec:
en: https://link.langbot.app/en/platforms/dingtalk
ja: https://link.langbot.app/ja/platforms/dingtalk
config:
- name: one-click-create
label:
en_US: One-Click Create App
zh_Hans: 一键创建应用
zh_Hant: 一鍵建立應用
description:
en_US: "Scan QR code with DingTalk to automatically create an app and fill in credentials. Note: Robot Code cannot be obtained automatically, you need to copy it from the DingTalk Developer Backend manually."
zh_Hans: "使用钉钉扫码自动创建应用并填写凭据。注意:机器人代码无法自动获取,需前往钉钉开发者后台手动复制。"
zh_Hant: "使用釘釘掃碼自動建立應用並填寫憑證。注意:機器人代碼無法自動取得,需前往釘釘開發者後台手動複製。"
type: qr-code-login
login_platform: dingtalk
required: false
- name: client_id
label:
en_US: Client ID
@@ -40,6 +52,10 @@ spec:
en_US: Robot Code
zh_Hans: 机器人代码
zh_Hant: 機器人代碼
description:
en_US: "Required for image recognition, file upload and other features. Get it from DingTalk Developer Backend > Robot Configuration."
zh_Hans: "识图、上传文件等功能必填。请前往钉钉开发者后台 > 机器人配置中获取。"
zh_Hant: "識圖、上傳檔案等功能必填。請前往釘釘開發者後台 > 機器人設定中取得。"
type: string
required: true
default: ""
+103 -4
View File
@@ -881,7 +881,8 @@ class LarkAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
bot_account_id = config['bot_name']
bot = lark_oapi.ws.Client(config['app_id'], config['app_secret'], event_handler=event_handler)
domain = self._resolve_domain(config)
bot = lark_oapi.ws.Client(config['app_id'], config['app_secret'], event_handler=event_handler, domain=domain)
api_client = self.build_api_client(config)
cipher = AESCipher(config.get('encrypt-key', ''))
self.request_app_ticket(api_client, config)
@@ -1014,18 +1015,116 @@ class LarkAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
return None
@staticmethod
def _resolve_domain(config) -> str:
domain = config.get('domain', lark_oapi.FEISHU_DOMAIN)
if domain == 'custom':
domain = config.get('custom_domain', '')
if not domain:
raise ValueError('Custom domain is required when domain is set to "custom"')
return domain.rstrip('/')
def build_api_client(self, config):
app_id = config['app_id']
app_secret = config['app_secret']
api_client = lark_oapi.Client.builder().app_id(app_id).app_secret(app_secret).build()
domain = self._resolve_domain(config)
api_client = lark_oapi.Client.builder().app_id(app_id).app_secret(app_secret).domain(domain).build()
if 'isv' == config.get('app_type', 'self'):
api_client = (
lark_oapi.Client.builder().app_id(app_id).app_secret(app_secret).app_type(lark_oapi.AppType.ISV).build()
lark_oapi.Client.builder()
.app_id(app_id)
.app_secret(app_secret)
.app_type(lark_oapi.AppType.ISV)
.domain(domain)
.build()
)
return api_client
async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain):
pass
text_elements, media_items = await self.message_converter.yiri2target(message, self.api_client)
# Map standard target_type to Feishu receive_id_type
if target_type == 'person':
receive_id_type = 'open_id'
elif target_type == 'group':
receive_id_type = 'chat_id'
else:
receive_id_type = target_type
# Send text message if there are text elements
if text_elements:
needs_post = any(ele['tag'] == 'at' for paragraph in text_elements for ele in paragraph)
if needs_post:
msg_type = 'post'
final_content = json.dumps(
{
'zh_Hans': {
'title': '',
'content': text_elements,
},
}
)
else:
msg_type = 'text'
parts = []
for paragraph in text_elements:
para_text = ''.join(ele.get('text', '') for ele in paragraph)
if para_text:
parts.append(para_text)
final_content = json.dumps({'text': '\n\n'.join(parts)})
request: CreateMessageRequest = (
CreateMessageRequest.builder()
.receive_id_type(receive_id_type)
.request_body(
CreateMessageRequestBody.builder()
.receive_id(target_id)
.content(final_content)
.msg_type(msg_type)
.uuid(str(uuid.uuid4()))
.build()
)
.build()
)
app_access_token = self.get_app_access_token()
req_opt: RequestOption = (
RequestOption.builder().app_ticket(self.app_ticket).app_access_token(app_access_token).build()
)
response: CreateMessageResponse = self.api_client.im.v1.message.create(request, req_opt)
if not response.success():
raise Exception(
f'client.im.v1.message.create failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}'
)
# Send media messages separately (image, audio, file, etc.)
for media in media_items:
request: CreateMessageRequest = (
CreateMessageRequest.builder()
.receive_id_type(receive_id_type)
.request_body(
CreateMessageRequestBody.builder()
.receive_id(target_id)
.content(json.dumps(media['content']))
.msg_type(media['msg_type'])
.uuid(str(uuid.uuid4()))
.build()
)
.build()
)
app_access_token = self.get_app_access_token()
req_opt: RequestOption = (
RequestOption.builder().app_ticket(self.app_ticket).app_access_token(app_access_token).build()
)
response: CreateMessageResponse = self.api_client.im.v1.message.create(request, req_opt)
if not response.success():
raise Exception(
f'client.im.v1.message.create ({media["msg_type"]}) failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}'
)
async def is_stream_output_supported(self) -> bool:
is_stream = False
+69 -4
View File
@@ -23,6 +23,71 @@ spec:
en: https://link.langbot.app/en/platforms/lark
ja: https://link.langbot.app/ja/platforms/lark
config:
- name: domain
label:
en_US: Platform Domain
zh_Hans: 平台域名
zh_Hant: 平台域名
ja_JP: プラットフォームドメイン
description:
en_US: Select the open platform domain. Use Feishu for Chinese mainland, Lark for international
zh_Hans: 选择开放平台域名,国内使用飞书,海外使用 Lark
zh_Hant: 選擇開放平台域名,國內使用飛書,海外使用 Lark
ja_JP: オープンプラットフォームのドメインを選択。中国国内は飛書、海外は Lark を使用
type: select
options:
- name: https://open.feishu.cn
label:
en_US: Feishu (open.feishu.cn)
zh_Hans: 飞书 (open.feishu.cn)
zh_Hant: 飛書 (open.feishu.cn)
ja_JP: 飛書 (open.feishu.cn)
- name: https://open.larksuite.com
label:
en_US: Lark (open.larksuite.com)
zh_Hans: Lark (open.larksuite.com)
zh_Hant: Lark (open.larksuite.com)
ja_JP: Lark (open.larksuite.com)
- name: custom
label:
en_US: Custom
zh_Hans: 自定义
zh_Hant: 自定義
ja_JP: カスタム
required: false
default: https://open.feishu.cn
- name: custom_domain
label:
en_US: Custom Domain
zh_Hans: 自定义域名
zh_Hant: 自定義域名
ja_JP: カスタムドメイン
description:
en_US: "Enter the full domain URL, e.g. https://open.example.com"
zh_Hans: "输入完整的域名 URL,例如 https://open.example.com"
zh_Hant: "輸入完整的域名 URL,例如 https://open.example.com"
ja_JP: "完全なドメイン URL を入力(例: https://open.example.com"
type: string
required: false
default: ""
show_if:
field: domain
operator: eq
value: custom
- name: one-click-create
label:
en_US: One-Click Create App
zh_Hans: 一键创建应用
zh_Hant: 一鍵建立應用
ja_JP: ワンクリックでアプリ作成
description:
en_US: Scan QR code to automatically create a Feishu app and fill in credentials
zh_Hans: 扫码自动创建飞书应用并填写凭据
zh_Hant: 掃碼自動建立飛書應用並填寫憑證
ja_JP: QRコードをスキャンしてFeishuアプリを自動作成し、認証情報を入力
type: qr-code-login
login_platform: feishu
required: false
- name: app_id
label:
en_US: App ID
@@ -126,10 +191,10 @@ spec:
zh_Hant: 應用類型
ja_JP: アプリタイプ
description:
en_US: Default to self-built application, refer to https://open.feishu.cn/document/platform-overveiw/overview
zh_Hans: 默认为企业自建应用,参考 https://open.feishu.cn/document/platform-overveiw/overview
zh_Hant: 預設為企業自建應用,參考 https://open.feishu.cn/document/platform-overveiw/overview
ja_JP: デフォルトはカスタムアプリです。詳細は https://open.feishu.cn/document/platform-overveiw/overview を参照してください
en_US: "Default to self-built application, refer to https://open.feishu.cn/document/platform-overveiw/overview"
zh_Hans: "默认为企业自建应用,参考 https://open.feishu.cn/document/platform-overveiw/overview"
zh_Hant: "預設為企業自建應用,參考 https://open.feishu.cn/document/platform-overveiw/overview"
ja_JP: "デフォルトはカスタムアプリです。詳細は https://open.feishu.cn/document/platform-overveiw/overview を参照してください"
type: select
options:
- name: self
@@ -31,6 +31,18 @@ spec:
type: webhook-url
required: false
default: ""
- name: __system.outbound_ips
label:
en_US: IP Whitelist
zh_Hans: IP 白名单
zh_Hant: IP 白名單
description:
en_US: Add these outbound IPs of the LangBot server to the IP whitelist in the "Basic Configuration" of the WeChat Official Account platform
zh_Hans: 请将这些 LangBot 服务器的出网 IP 添加到微信公众平台「基本配置」中的 IP 白名单
zh_Hant: 請將這些 LangBot 伺服器的出網 IP 加入微信公眾平台「基本配置」中的 IP 白名單
type: array[string]
required: false
default: []
- name: token
label:
en_US: Token
@@ -32,6 +32,20 @@ spec:
type: string
required: true
default: "https://ilinkai.weixin.qq.com"
- name: qr-login
label:
en_US: Scan QR Login
zh_Hans: 扫码登录
zh_Hant: 掃碼登入
ja_JP: QRコードでログイン
description:
en_US: Scan QR code with WeChat to authorize and automatically fill in the token
zh_Hans: 使用微信扫码授权,自动填写令牌
zh_Hant: 使用微信掃碼授權,自動填寫令牌
ja_JP: WeChatでQRコードをスキャンし、トークンを自動入力
type: qr-code-login
login_platform: weixin
required: false
- name: token
label:
en_US: Token
@@ -19,6 +19,18 @@ spec:
en: https://link.langbot.app/en/platforms/qqofficial
ja: https://link.langbot.app/ja/platforms/qqofficial
config:
- name: __system.outbound_ips
label:
en_US: IP Whitelist
zh_Hans: IP 白名单
zh_Hant: IP 白名單
description:
en_US: Add these outbound IPs of the LangBot server to the IP whitelist in the development settings of the QQ Open Platform
zh_Hans: 请将这些 LangBot 服务器的出网 IP 添加到 QQ 开放平台开发设置中的 IP 白名单
zh_Hant: 請將這些 LangBot 伺服器的出網 IP 加入 QQ 開放平台開發設定中的 IP 白名單
type: array[string]
required: false
default: []
- name: appid
label:
en_US: App ID
@@ -27,10 +27,7 @@ class WebPageBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter
listeners: dict = pydantic.Field(default_factory=dict, exclude=True)
_ws_adapter: typing.Any = None
class Config:
arbitrary_types_allowed = True
# Allow private attributes
underscore_attrs_are_private = True
model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)
def __init__(self, config: dict, logger: abstract_platform_logger.AbstractEventLogger, **kwargs):
super().__init__(config=config, logger=logger, **kwargs)
@@ -32,6 +32,18 @@ spec:
type: webhook-url
required: false
default: ""
- name: __system.outbound_ips
label:
en_US: Trusted IPs
zh_Hans: 企业可信 IP
zh_Hant: 企業可信 IP
description:
en_US: Add these outbound IPs of the LangBot server to the "Trusted Enterprise IPs" of your app in the WeCom admin console
zh_Hans: 请将这些 LangBot 服务器的出网 IP 添加到企业微信管理后台应用详情页的「企业可信 IP」中
zh_Hant: 請將這些 LangBot 伺服器的出網 IP 加入企業微信管理後台應用詳情頁的「企業可信 IP」中
type: array[string]
required: false
default: []
- name: corpid
label:
en_US: Corpid
@@ -19,6 +19,18 @@ spec:
en: https://link.langbot.app/en/platforms/wecombot
ja: https://link.langbot.app/ja/platforms/wecombot
config:
- name: one-click-create
label:
en_US: One-Click Create Bot
zh_Hans: 一键创建机器人
zh_Hant: 一鍵建立機器人
description:
en_US: "Scan QR code with WeCom to automatically create a bot and fill in BotId and Secret. Note: Robot Name needs to be filled in manually."
zh_Hans: "使用企业微信扫码自动创建机器人并填写 BotId 和 Secret。注意:机器人名称需手动填写。"
zh_Hant: "使用企業微信掃碼自動建立機器人並填寫 BotId 和 Secret。注意:機器人名稱需手動填寫。"
type: qr-code-login
login_platform: wecombot
required: false
- name: BotId
label:
en_US: BotId
@@ -63,6 +75,18 @@ spec:
field: enable-webhook
operator: eq
value: true
- name: __system.outbound_ips
label:
en_US: Trusted IPs
zh_Hans: 企业可信 IP
zh_Hant: 企業可信 IP
description:
en_US: Add these outbound IPs of the LangBot server to the "Trusted Enterprise IPs" of the bot configuration in the WeCom admin console
zh_Hans: 请将这些 LangBot 服务器的出网 IP 添加到企业微信管理后台智能机器人配置的「企业可信 IP」中
zh_Hant: 請將這些 LangBot 伺服器的出網 IP 加入企業微信管理後台智慧機器人設定的「企業可信 IP」中
type: array[string]
required: false
default: []
- name: Secret
label:
en_US: Secret
@@ -31,6 +31,18 @@ spec:
type: webhook-url
required: false
default: ""
- name: __system.outbound_ips
label:
en_US: Trusted IPs
zh_Hans: 企业可信 IP
zh_Hant: 企業可信 IP
description:
en_US: Add these outbound IPs of the LangBot server to the "Trusted Enterprise IPs" of WeChat Customer Service in the WeCom admin console
zh_Hans: 请将这些 LangBot 服务器的出网 IP 添加到企业微信管理后台微信客服的「企业可信 IP」中
zh_Hant: 請將這些 LangBot 伺服器的出網 IP 加入企業微信管理後台微信客服的「企業可信 IP」中
type: array[string]
required: false
default: []
- name: corpid
label:
en_US: Corpid
+314 -46
View File
@@ -11,12 +11,14 @@ import os
import sys
import httpx
import sqlalchemy
import yaml
from async_lru import alru_cache
from langbot_plugin.api.entities.builtin.pipeline.query import provider_session
from ..core import app
from . import handler
from ..utils import platform
from ..utils.managed_runtime import ManagedRuntimeConnector
from langbot_plugin.runtime.io.controllers.stdio import (
client as stdio_client_controller,
)
@@ -34,10 +36,12 @@ from ..core import taskmgr
from ..entity.persistence import plugin as persistence_plugin
class PluginRuntimeConnector:
"""Plugin runtime connector"""
class PluginRuntimeNotConnectedError(RuntimeError):
"""Raised when plugin runtime operations are requested before connection."""
ap: app.Application
class PluginRuntimeConnector(ManagedRuntimeConnector):
"""Plugin runtime connector"""
handler: handler.RuntimeConnectionHandler
@@ -49,10 +53,6 @@ class PluginRuntimeConnector:
ctrl: stdio_client_controller.StdioClientController | ws_client_controller.WebSocketClientController
runtime_subprocess_on_windows: asyncio.subprocess.Process | None = None
runtime_subprocess_on_windows_task: asyncio.Task | None = None
runtime_disconnect_callback: typing.Callable[
[PluginRuntimeConnector], typing.Coroutine[typing.Any, typing.Any, None]
]
@@ -67,7 +67,7 @@ class PluginRuntimeConnector:
[PluginRuntimeConnector], typing.Coroutine[typing.Any, typing.Any, None]
],
):
self.ap = ap
super().__init__(ap)
self.runtime_disconnect_callback = runtime_disconnect_callback
self.is_enable_plugin = self.ap.instance_config.data.get('plugin', {}).get('enable', True)
@@ -103,6 +103,16 @@ class PluginRuntimeConnector:
self.handler_task = asyncio.create_task(self.handler.run())
_ = await self.handler.ping()
# Push the configured marketplace (Space) URL to the runtime so it
# downloads plugins from the same Space LangBot is bound to, rather
# than relying on the runtime's own env/default.
space_url = self.ap.instance_config.data.get('space', {}).get('url', '').rstrip('/')
if space_url:
try:
await self.handler.set_runtime_config(cloud_service_url=space_url)
self.ap.logger.info(f'Pushed marketplace URL to plugin runtime: {space_url}')
except Exception as e:
self.ap.logger.warning(f'Failed to push runtime config: {e}')
self.ap.logger.info('Connected to plugin runtime.')
await self.handler_task
@@ -135,19 +145,7 @@ class PluginRuntimeConnector:
# We have to launch runtime via cmd but communicate via ws.
self.ap.logger.info('(windows) use cmd to launch plugin runtime and communicate via ws')
if self.runtime_subprocess_on_windows is None: # only launch once
python_path = sys.executable
env = os.environ.copy()
self.runtime_subprocess_on_windows = await asyncio.create_subprocess_exec(
python_path,
'-m',
'langbot_plugin.cli.__init__',
'rt',
env=env,
)
# hold the process
self.runtime_subprocess_on_windows_task = asyncio.create_task(self.runtime_subprocess_on_windows.wait())
await self._start_runtime_subprocess('-m', 'langbot_plugin.cli.__init__', 'rt')
ws_url = 'ws://localhost:5400/control/ws'
@@ -191,44 +189,304 @@ class PluginRuntimeConnector:
async def ping_plugin_runtime(self):
if not hasattr(self, 'handler'):
raise Exception('Plugin runtime is not connected')
raise PluginRuntimeNotConnectedError('Plugin runtime is not connected')
return await self.handler.ping()
def _extract_deps_metadata(
def _inspect_plugin_package(
self,
file_bytes: bytes,
task_context: taskmgr.TaskContext | None,
):
"""Extract dependency count from requirements.txt inside plugin zip."""
if task_context is None:
return
) -> tuple[str | None, str | None]:
"""Extract plugin identity and dependency metadata from a plugin package."""
plugin_author = None
plugin_name = None
try:
with zipfile.ZipFile(io.BytesIO(file_bytes)) as zf:
for name in zf.namelist():
if name.endswith('requirements.txt'):
content = zf.read(name).decode('utf-8', errors='ignore')
deps = [
line.strip()
for line in content.splitlines()
if line.strip() and not line.strip().startswith('#')
]
task_context.metadata['deps_total'] = len(deps)
task_context.metadata['deps_list'] = deps
break
try:
manifest = yaml.safe_load(zf.read('manifest.yaml').decode('utf-8', errors='ignore')) or {}
metadata = manifest.get('metadata', {})
plugin_author = metadata.get('author')
plugin_name = metadata.get('name')
except Exception:
pass
if task_context is not None:
for name in zf.namelist():
if name.endswith('requirements.txt'):
content = zf.read(name).decode('utf-8', errors='ignore')
deps = [
line.strip()
for line in content.splitlines()
if line.strip() and not line.strip().startswith('#')
]
task_context.metadata['deps_total'] = len(deps)
task_context.metadata['deps_list'] = deps
break
except Exception:
pass
return plugin_author, plugin_name
async def _install_mcp_from_marketplace(
self,
mcp_data: dict[str, Any],
task_context: taskmgr.TaskContext | None = None,
):
"""Install an MCP server from marketplace data.
Marketplace MCP records carry the runtime-ready ``mode`` and
``extra_args`` directly (the same shape LangBot stores in
``mcp_servers``), so they are used as-is rather than reconstructed.
For ``stdio`` this preserves ``command``/``args``/``env``/``box``;
for ``http``/``sse`` it preserves ``url``/``headers``/``timeout``/
``ssereadtimeout``.
"""
from ..entity.persistence import mcp as persistence_mcp
import uuid
mode = mcp_data.get('mode') or 'stdio'
extra_args = mcp_data.get('extra_args') or {}
# Marketplace records carry the rendered README markdown; persist it so
# the detail page Docs tab works offline and without a marketplace round-trip.
readme = mcp_data.get('readme') or ''
# Use __ instead of / to avoid URL routing issues with slashes
name = f'{mcp_data.get("author", "")}__{mcp_data.get("name", "")}'
# Check if MCP server already exists
existing = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_mcp.MCPServer).where(persistence_mcp.MCPServer.name == name)
)
if existing.scalar_one_or_none():
self.ap.logger.info(f'MCP server {name} already exists, skipping installation')
return
# Create MCP server record
server_uuid = str(uuid.uuid4())
server_data = {
'uuid': server_uuid,
'name': name,
'enable': True,
'mode': mode,
'extra_args': extra_args,
'readme': readme,
}
await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_mcp.MCPServer).values(server_data))
# Start the MCP server
result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_mcp.MCPServer).where(persistence_mcp.MCPServer.uuid == server_uuid)
)
server_entity = result.first()
if server_entity:
server_config = self.ap.persistence_mgr.serialize_model(persistence_mcp.MCPServer, server_entity)
if self.ap.tool_mgr.mcp_tool_loader:
mcp_task = asyncio.create_task(self.ap.tool_mgr.mcp_tool_loader.host_mcp_server(server_config))
self.ap.tool_mgr.mcp_tool_loader._hosted_mcp_tasks.append(mcp_task)
self.ap.logger.info(f'Installed MCP server {name} from marketplace')
async def _install_skill_from_zip(
self,
file_bytes: bytes,
filename: str,
task_context: taskmgr.TaskContext | None = None,
):
"""Install a skill from marketplace ZIP data."""
from ..api.http.service.skill import SkillService
skill_service = SkillService(self.ap)
self.ap.logger.info(f'Installing skill from marketplace ZIP ({len(file_bytes)} bytes)')
# Install from ZIP using skill service
result = await skill_service.install_from_zip_upload(
file_bytes=file_bytes,
filename=filename + '.zip',
)
self.ap.logger.info(f'Skill installed successfully: {result}')
def _build_plugin_startup_failure_message(
self,
plugin_author: str,
plugin_name: str,
task_context: taskmgr.TaskContext | None,
) -> str:
dep_hint = ''
if task_context is not None:
current_dep = task_context.metadata.get('current_dep')
if current_dep:
dep_hint = f' Last dependency: {current_dep}.'
return (
f'Plugin {plugin_author}/{plugin_name} failed to start after installation. '
f'Dependency installation or plugin initialization may have failed.{dep_hint} '
f'Please check the plugin requirements and runtime logs.'
)
async def _wait_for_installed_plugin_ready(
self,
plugin_author: str | None,
plugin_name: str | None,
task_context: taskmgr.TaskContext | None,
timeout: float = 30,
):
"""Wait until the installed plugin is registered by the runtime.
The plugin runtime launches plugins asynchronously. If dependency installation
fails, the plugin process exits before registration; without this check the
install task can incorrectly finish successfully.
"""
if not plugin_author or not plugin_name:
return
deadline = time.time() + timeout
last_error: Exception | None = None
while time.time() < deadline:
try:
plugin = await self.get_plugin_info(plugin_author, plugin_name)
if plugin is not None:
status = plugin.get('status')
if status == 'initialized':
return
except Exception as e:
last_error = e
await asyncio.sleep(0.5)
message = self._build_plugin_startup_failure_message(plugin_author, plugin_name, task_context)
if last_error is not None:
message = f'{message} Last runtime error: {last_error}'
raise RuntimeError(message)
async def install_plugin(
self,
install_source: PluginInstallSource,
install_info: dict[str, Any],
task_context: taskmgr.TaskContext | None = None,
):
plugin_author = install_info.get('plugin_author')
plugin_name = install_info.get('plugin_name')
if install_source == PluginInstallSource.MARKETPLACE:
# Handle marketplace plugin/mcp/skill installation
plugin_author = install_info.get('plugin_author', '')
plugin_name = install_info.get('plugin_name', '')
space_url = (
self.ap.instance_config.data.get('space', {}).get('url', 'https://space.langbot.app').rstrip('/')
)
# Try MCP endpoint first
async with httpx.AsyncClient(trust_env=True, timeout=15) as client:
mcp_resp = await client.get(f'{space_url}/api/v1/marketplace/mcps/{plugin_author}/{plugin_name}')
if mcp_resp.status_code == 200:
mcp_data = mcp_resp.json().get('data', {}).get('mcp', {})
if mcp_data.get('mode'):
# It's an MCP - create server locally
self.ap.logger.info(f'Installing MCP from marketplace: {plugin_author}/{plugin_name}')
if task_context:
task_context.set_current_action('installing mcp server')
await self._install_mcp_from_marketplace(mcp_data, task_context)
# Best-effort install report (bumps marketplace install_count).
try:
await client.post(
f'{space_url}/api/v1/marketplace/mcps/{plugin_author}/{plugin_name}/install'
)
except Exception as report_err:
self.ap.logger.debug(f'Failed to report MCP install: {report_err}')
return
else:
raise Exception(f'MCP {plugin_author}/{plugin_name} has no mode')
elif mcp_resp.status_code == 404:
# Try skill endpoint - download ZIP and install
self.ap.logger.info(f'Trying skill endpoint for: {plugin_author}/{plugin_name}')
if task_context:
task_context.set_current_action('checking skill marketplace')
# Get skill detail to find version
skill_resp = await client.get(
f'{space_url}/api/v1/marketplace/skills/{plugin_author}/{plugin_name}'
)
if skill_resp.status_code == 200:
self.ap.logger.info(f'Installing skill from marketplace: {plugin_author}/{plugin_name}')
if task_context:
task_context.set_current_action('installing skill from marketplace')
# Download the skill ZIP (no version needed - uses latest)
if task_context:
task_context.set_current_action('downloading skill package')
download_resp = await client.get(
f'{space_url}/api/v1/marketplace/skills/download/{plugin_author}/{plugin_name}'
)
if download_resp.status_code != 200:
raise Exception(
f'Failed to download skill {plugin_author}/{plugin_name}: {download_resp.status_code}'
)
file_bytes = download_resp.content
file_size = len(file_bytes)
self.ap.logger.info(f'Downloaded skill ZIP ({file_size} bytes)')
# Install skill from ZIP using skill service
await self._install_skill_from_zip(file_bytes, f'{plugin_author}-{plugin_name}', task_context)
return
elif skill_resp.status_code == 404:
# Try plugin endpoint - get versions and download
self.ap.logger.info(f'Trying plugin endpoint for: {plugin_author}/{plugin_name}')
if task_context:
task_context.set_current_action('checking plugin marketplace')
# Get plugin versions to find latest
versions_resp = await client.get(
f'{space_url}/api/v1/marketplace/plugins/{plugin_author}/{plugin_name}/versions'
)
if versions_resp.status_code == 200:
versions_data = versions_resp.json().get('data', {}).get('versions', [])
if versions_data:
latest_version = versions_data[0].get('version', '')
if latest_version:
self.ap.logger.info(
f'Installing plugin from marketplace: {plugin_author}/{plugin_name} v{latest_version}'
)
if task_context:
task_context.set_current_action('downloading plugin package')
download_resp = await client.get(
f'{space_url}/api/v1/marketplace/plugins/download/{plugin_author}/{plugin_name}/{latest_version}'
)
if download_resp.status_code != 200:
raise Exception(
f'Failed to download plugin {plugin_author}/{plugin_name}: {download_resp.status_code}'
)
file_bytes = download_resp.content
self._inspect_plugin_package(file_bytes, task_context)
file_key = await self.handler.send_file(file_bytes, 'lbpkg')
install_info['plugin_file_key'] = file_key
self.ap.logger.info(f'Transfered file {file_key} to plugin runtime')
# Continue to install via runtime
else:
raise Exception(f'No version found for plugin {plugin_author}/{plugin_name}')
else:
raise Exception(f'Plugin {plugin_author}/{plugin_name} has no versions')
else:
raise Exception(f'Plugin {plugin_author}/{plugin_name} not found in marketplace')
else:
skill_resp.raise_for_status()
raise Exception(f'Failed to get skill {plugin_author}/{plugin_name}')
else:
mcp_resp.raise_for_status()
raise Exception(f'Failed to get MCP {plugin_author}/{plugin_name}')
if install_source == PluginInstallSource.LOCAL:
# transfer file before install
file_bytes = install_info['plugin_file']
self._extract_deps_metadata(file_bytes, task_context)
plugin_author, plugin_name = self._inspect_plugin_package(file_bytes, task_context)
if task_context is not None and plugin_author and plugin_name:
task_context.metadata['plugin_name'] = f'{plugin_author}/{plugin_name}'
file_key = await self.handler.send_file(file_bytes, 'lbpkg')
install_info['plugin_file_key'] = file_key
del install_info['plugin_file']
@@ -265,7 +523,9 @@ class PluginRuntimeConnector:
task_context.metadata['download_speed'] = downloaded / elapsed if elapsed > 0 else 0
file_bytes = b''.join(chunks)
self._extract_deps_metadata(file_bytes, task_context)
plugin_author, plugin_name = self._inspect_plugin_package(file_bytes, task_context)
if task_context is not None and plugin_author and plugin_name:
task_context.metadata['plugin_name'] = f'{plugin_author}/{plugin_name}'
file_key = await self.handler.send_file(file_bytes, 'lbpkg')
install_info['plugin_file_key'] = file_key
self.ap.logger.info(f'Transfered file {file_key} to plugin runtime')
@@ -289,6 +549,8 @@ class PluginRuntimeConnector:
if metadata is not None and task_context is not None:
task_context.metadata.update(metadata)
await self._wait_for_installed_plugin_ready(plugin_author, plugin_name, task_context)
async def upgrade_plugin(
self,
plugin_author: str,
@@ -534,13 +796,18 @@ class PluginRuntimeConnector:
return await self.handler.retrieve_knowledge(plugin_author, plugin_name, retriever_name, retrieval_context)
def dispose(self):
# No need to consider the shutdown on Windows
# for Windows can kill processes and subprocesses chainly
if self.is_enable_plugin and isinstance(self.ctrl, stdio_client_controller.StdioClientController):
# On non-Windows stdio mode, terminate via the controller's process handle.
# On Windows, the managed subprocess is cleaned up by the base class.
if (
self.is_enable_plugin
and hasattr(self, 'ctrl')
and isinstance(self.ctrl, stdio_client_controller.StdioClientController)
):
self.ap.logger.info('Terminating plugin runtime process...')
self.ctrl.process.terminate()
self._dispose_subprocess()
if self.heartbeat_task is not None:
self.heartbeat_task.cancel()
self.heartbeat_task = None
@@ -558,11 +825,12 @@ class PluginRuntimeConnector:
Raises:
ValueError: If plugin_id is not in the expected 'author/name' format.
"""
if '/' not in plugin_id:
segments = plugin_id.split('/')
if len(segments) != 2 or not all(segments):
raise ValueError(
f"Invalid plugin_id format: '{plugin_id}'. Expected 'author/name' format (e.g. 'langbot/rag-engine')."
)
return plugin_id.split('/', 1)
return segments[0], segments[1]
async def call_rag_ingest(self, plugin_id: str, context_data: dict[str, Any]) -> dict[str, Any]:
"""Call plugin to ingest document.
+10
View File
@@ -840,6 +840,16 @@ class RuntimeConnectionHandler(handler.Handler):
timeout=10,
)
async def set_runtime_config(self, cloud_service_url: str) -> dict[str, Any]:
"""Push runtime configuration (e.g. marketplace URL) to the runtime."""
return await self.call_action(
LangBotToRuntimeAction.SET_RUNTIME_CONFIG,
{
'cloud_service_url': cloud_service_url,
},
timeout=10,
)
async def install_plugin(
self, install_source: str, install_info: dict[str, Any]
) -> typing.AsyncGenerator[dict[str, Any], None]:
+65 -31
View File
@@ -143,49 +143,83 @@ class ModelManager:
# get the latest models from space
space_models = await self.ap.space_service.get_models()
exists_llm_models_uuids = [m['uuid'] for m in await self.ap.llm_model_service.get_llm_models()]
exists_embedding_models_uuids = [
m['uuid'] for m in await self.ap.embedding_models_service.get_embedding_models()
]
# Index existing models by uuid. Space reuses a model's uuid across
# renames / re-specs (e.g. the uuid that used to be ``claude-opus-4-6``
# may later become ``claude-opus-4-7``). So for Space-managed models we
# upsert: create when the uuid is new, otherwise update name/abilities/
# ranking to track Space. Models owned by other providers are never
# touched, even on an (unexpected) uuid collision.
existing_llm_models = {m['uuid']: m for m in await self.ap.llm_model_service.get_llm_models()}
existing_embedding_models = {
m['uuid']: m for m in await self.ap.embedding_models_service.get_embedding_models()
}
created = 0
updated = 0
for space_model in space_models:
if space_model.category == 'chat':
uuid = space_model.uuid
if uuid in exists_llm_models_uuids:
continue
# model will be automatically loaded
await self.ap.llm_model_service.create_llm_model(
{
'uuid': space_model.uuid,
existing = existing_llm_models.get(space_model.uuid)
if existing is None:
# model will be automatically loaded
await self.ap.llm_model_service.create_llm_model(
{
'uuid': space_model.uuid,
'name': space_model.model_id,
'provider_uuid': space_model_provider.uuid,
'abilities': space_model.llm_abilities or [],
'extra_args': {},
'prefered_ranking': space_model.featured_order,
},
preserve_uuid=True,
auto_set_to_default_pipeline=False,
)
created += 1
elif existing.get('provider_uuid') == space_model_provider.uuid:
desired = {
'name': space_model.model_id,
'provider_uuid': space_model_provider.uuid,
'abilities': space_model.llm_abilities or [],
'extra_args': {},
'prefered_ranking': space_model.featured_order,
},
preserve_uuid=True,
auto_set_to_default_pipeline=False,
)
}
if (
existing.get('name') != desired['name']
or list(existing.get('abilities') or []) != list(desired['abilities'])
or existing.get('prefered_ranking') != desired['prefered_ranking']
):
await self.ap.llm_model_service.update_llm_model(space_model.uuid, dict(desired))
updated += 1
elif space_model.category == 'embedding':
uuid = space_model.uuid
if uuid in exists_embedding_models_uuids:
continue
# model will be automatically loaded
await self.ap.embedding_models_service.create_embedding_model(
{
'uuid': space_model.uuid,
existing = existing_embedding_models.get(space_model.uuid)
if existing is None:
# model will be automatically loaded
await self.ap.embedding_models_service.create_embedding_model(
{
'uuid': space_model.uuid,
'name': space_model.model_id,
'provider_uuid': space_model_provider.uuid,
'extra_args': {},
'prefered_ranking': space_model.featured_order,
},
preserve_uuid=True,
)
created += 1
elif existing.get('provider_uuid') == space_model_provider.uuid:
desired = {
'name': space_model.model_id,
'provider_uuid': space_model_provider.uuid,
'extra_args': {},
'prefered_ranking': space_model.featured_order,
},
preserve_uuid=True,
)
}
if (
existing.get('name') != desired['name']
or existing.get('prefered_ranking') != desired['prefered_ranking']
):
await self.ap.embedding_models_service.update_embedding_model(space_model.uuid, dict(desired))
updated += 1
if created or updated:
self.ap.logger.info(f'Synced models from LangBot Space: {created} added, {updated} updated.')
async def init_temporary_runtime_llm_model(
self,
@@ -340,6 +340,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
"""Provider API请求器"""
name: str = None
init_api_key: str = 'langbot-init-placeholder'
ap: app.Application
@@ -25,7 +25,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):
async def initialize(self):
self.client = openai.AsyncClient(
api_key='',
api_key=self.init_api_key,
base_url=self.requester_cfg['base_url'].replace(' ', ''),
timeout=self.requester_cfg['timeout'],
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
@@ -25,7 +25,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):
async def initialize(self):
self.client = openai.AsyncClient(
api_key='',
api_key=self.init_api_key,
base_url=self.requester_cfg['base_url'],
timeout=self.requester_cfg['timeout'],
http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
+10 -1
View File
@@ -14,7 +14,14 @@ class TokenManager:
def __init__(self, name: str, tokens: list[str]):
self.name = name
self.tokens = tokens
self.tokens = []
seen_tokens = set()
for token in tokens:
normalized_token = token.strip() if isinstance(token, str) else ''
if not normalized_token or normalized_token in seen_tokens:
continue
self.tokens.append(normalized_token)
seen_tokens.add(normalized_token)
self.using_token_index = 0
def get_token(self) -> str:
@@ -23,4 +30,6 @@ class TokenManager:
return self.tokens[self.using_token_index]
def next_token(self):
if len(self.tokens) == 0:
return
self.using_token_index = (self.using_token_index + 1) % len(self.tokens)
+7 -3
View File
@@ -2,8 +2,12 @@ from __future__ import annotations
import abc
import typing
from typing import TYPE_CHECKING
from ..core import app
if TYPE_CHECKING:
from ..core import app
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
preregistered_runners: list[typing.Type[RequestRunner]] = []
@@ -35,7 +39,7 @@ class RequestRunner(abc.ABC):
@abc.abstractmethod
async def run(
self, query: core_entities.Query
) -> typing.AsyncGenerator[llm_entities.Message | llm_entities.MessageChunk, None]:
self, query: pipeline_query.Query
) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]:
"""运行请求"""
pass
@@ -0,0 +1,511 @@
"""DeerFlow LangGraph API Runner
参考 astrbot deerflow_agent_runner 实现适配 LangBot Runner 接口
特点
- 使用 LangGraph HTTP API 接入 deer-flow 后端
- 自动管理 thread_id session 隔离
- 支持 SSE 流式响应解析
- 支持 streaming/非流式两种输出
- 处理 values / messages-tuple / custom 三种事件
"""
from __future__ import annotations
import asyncio
import hashlib
import json
import typing
from collections import deque
from dataclasses import dataclass, field
from langbot.pkg.provider import runner
from langbot.pkg.core import app
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
from langbot.libs.deerflow_api import client, errors, stream_utils
_MAX_VALUES_HISTORY = 200
@dataclass
class _StreamState:
"""流式状态跟踪"""
latest_text: str = ''
prev_text_for_streaming: str = ''
clarification_text: str = ''
task_failures: list[str] = field(default_factory=list)
seen_message_ids: set[str] = field(default_factory=set)
seen_message_order: deque[str] = field(default_factory=deque)
no_id_message_fingerprints: dict[int, str] = field(default_factory=dict)
baseline_initialized: bool = False
has_values_text: bool = False
run_values_messages: list[dict[str, typing.Any]] = field(default_factory=list)
timed_out: bool = False
@runner.runner_class('deerflow-api')
class DeerFlowAPIRunner(runner.RequestRunner):
"""DeerFlow LangGraph API 对话请求器"""
deerflow_client: client.AsyncDeerFlowClient
def __init__(self, ap: app.Application, pipeline_config: dict):
super().__init__(ap, pipeline_config)
cfg = self.pipeline_config['ai']['deerflow-api']
api_base = cfg.get('api-base', '').strip()
if not api_base or not api_base.startswith(('http://', 'https://')):
raise errors.DeerFlowAPIError(
message='DeerFlow API Base URL 格式错误,必须以 http:// 或 https:// 开头',
)
self.api_base = api_base
self.api_key = cfg.get('api-key', '')
self.auth_header = cfg.get('auth-header', '')
self.assistant_id = cfg.get('assistant-id', 'lead_agent')
self.model_name = cfg.get('model-name', '')
self.thinking_enabled = bool(cfg.get('thinking-enabled', False))
self.plan_mode = bool(cfg.get('plan-mode', False))
self.subagent_enabled = bool(cfg.get('subagent-enabled', False))
self.max_concurrent_subagents = int(cfg.get('max-concurrent-subagents', 3))
self.timeout = int(cfg.get('timeout', 300))
self.recursion_limit = int(cfg.get('recursion-limit', 1000))
self.deerflow_client = client.AsyncDeerFlowClient(
api_base=self.api_base,
api_key=self.api_key,
auth_header=self.auth_header,
)
# ------------------------------------------------------------------
# 辅助方法
# ------------------------------------------------------------------
def _fingerprint_message(self, message: dict[str, typing.Any]) -> str:
try:
raw = json.dumps(message, sort_keys=True, ensure_ascii=False, default=str)
except (TypeError, ValueError):
raw = repr(message)
return hashlib.sha1(raw.encode('utf-8', errors='ignore')).hexdigest()
def _remember_seen_message_id(self, state: _StreamState, msg_id: str) -> None:
if not msg_id or msg_id in state.seen_message_ids:
return
state.seen_message_ids.add(msg_id)
state.seen_message_order.append(msg_id)
while len(state.seen_message_order) > _MAX_VALUES_HISTORY:
dropped = state.seen_message_order.popleft()
state.seen_message_ids.discard(dropped)
def _extract_new_messages_from_values(
self,
values_messages: list[typing.Any],
state: _StreamState,
) -> list[dict[str, typing.Any]]:
new_messages: list[dict[str, typing.Any]] = []
no_id_indexes_seen: set[int] = set()
for idx, msg in enumerate(values_messages):
if not isinstance(msg, dict):
continue
msg_id = stream_utils.get_message_id(msg)
if msg_id:
if msg_id in state.seen_message_ids:
continue
self._remember_seen_message_id(state, msg_id)
new_messages.append(msg)
continue
no_id_indexes_seen.add(idx)
fp = self._fingerprint_message(msg)
if state.no_id_message_fingerprints.get(idx) == fp:
continue
state.no_id_message_fingerprints[idx] = fp
new_messages.append(msg)
for idx in list(state.no_id_message_fingerprints.keys()):
if idx not in no_id_indexes_seen:
state.no_id_message_fingerprints.pop(idx, None)
return new_messages
# ------------------------------------------------------------------
# 用户输入处理
# ------------------------------------------------------------------
def _build_user_content(
self,
prompt: str,
image_urls: list[str],
) -> typing.Any:
"""构建 LangGraph 兼容的 user content(支持多模态)"""
if not image_urls:
return prompt
content: list[dict[str, typing.Any]] = []
if prompt:
content.append({'type': 'text', 'text': prompt})
for url in image_urls:
if not isinstance(url, str):
continue
url = url.strip()
if not url:
continue
if url.startswith(('http://', 'https://', 'data:')):
content.append({'type': 'image_url', 'image_url': {'url': url}})
return content if content else prompt
def _preprocess_user_message(
self,
query: pipeline_query.Query,
) -> tuple[str, list[str]]:
"""提取用户消息的纯文本与图片 URL 列表"""
plain_text = ''
image_urls: list[str] = []
if isinstance(query.user_message.content, str):
plain_text = query.user_message.content
elif isinstance(query.user_message.content, list):
for ce in query.user_message.content:
if ce.type == 'text':
plain_text += ce.text
elif ce.type == 'image_base64':
# 转换为 data URI 形式
b64 = getattr(ce, 'image_base64', '')
if b64:
if not b64.startswith('data:'):
b64 = f'data:image/png;base64,{b64}'
image_urls.append(b64)
elif ce.type == 'image_url':
url = getattr(ce, 'image_url', '')
if url:
image_urls.append(url)
return plain_text, image_urls
# ------------------------------------------------------------------
# 请求构造
# ------------------------------------------------------------------
def _build_messages(
self,
prompt: str,
image_urls: list[str],
system_prompt: str = '',
) -> list[dict[str, typing.Any]]:
messages: list[dict[str, typing.Any]] = []
if system_prompt:
messages.append({'role': 'system', 'content': system_prompt})
messages.append(
{
'role': 'user',
'content': self._build_user_content(prompt, image_urls),
}
)
return messages
def _build_runtime_configurable(self, thread_id: str) -> dict[str, typing.Any]:
cfg: dict[str, typing.Any] = {
'thread_id': thread_id,
'thinking_enabled': self.thinking_enabled,
'is_plan_mode': self.plan_mode,
'subagent_enabled': self.subagent_enabled,
}
if self.subagent_enabled:
cfg['max_concurrent_subagents'] = self.max_concurrent_subagents
if self.model_name:
cfg['model_name'] = self.model_name
return cfg
def _build_payload(
self,
thread_id: str,
prompt: str,
image_urls: list[str],
system_prompt: str = '',
) -> dict[str, typing.Any]:
runtime_configurable = self._build_runtime_configurable(thread_id)
return {
'assistant_id': self.assistant_id,
'input': {
'messages': self._build_messages(prompt, image_urls, system_prompt),
},
'stream_mode': ['values', 'messages-tuple', 'custom'],
# DeerFlow 2.0 从 config.configurable 读取运行时覆盖
# 同时保留 context 字段做向后兼容
'context': dict(runtime_configurable),
'config': {
'recursion_limit': self.recursion_limit,
'configurable': runtime_configurable,
},
}
# ------------------------------------------------------------------
# Session/Thread 管理
# ------------------------------------------------------------------
async def _ensure_thread_id(self, query: pipeline_query.Query) -> str:
"""从 query.session 取/创建 deerflow thread_id
LangBot 使用 `query.session.using_conversation.uuid` 持久化 conversation id
我们复用这个字段存储 deerflow thread_id Dify Runner 同样做法
"""
thread_id = query.session.using_conversation.uuid or ''
if thread_id:
return thread_id
thread = await self.deerflow_client.create_thread(timeout=min(30, self.timeout))
thread_id = thread.get('thread_id', '')
if not thread_id:
raise errors.DeerFlowAPIError(message=f'DeerFlow create thread 返回数据缺少 thread_id: {thread}')
query.session.using_conversation.uuid = thread_id
return thread_id
# ------------------------------------------------------------------
# 流式事件处理
# ------------------------------------------------------------------
def _handle_values_event(
self,
data: typing.Any,
state: _StreamState,
) -> str | None:
"""处理 values 事件,返回新的完整文本(增量基础上的全量)"""
values_messages = stream_utils.extract_messages_from_values_data(data)
if not values_messages:
return None
new_messages: list[dict[str, typing.Any]] = []
if not state.baseline_initialized:
state.baseline_initialized = True
for idx, msg in enumerate(values_messages):
if not isinstance(msg, dict):
continue
new_messages.append(msg)
msg_id = stream_utils.get_message_id(msg)
if msg_id:
self._remember_seen_message_id(state, msg_id)
continue
state.no_id_message_fingerprints[idx] = self._fingerprint_message(msg)
else:
new_messages = self._extract_new_messages_from_values(values_messages, state)
latest_text = ''
if new_messages:
state.run_values_messages.extend(new_messages)
if len(state.run_values_messages) > _MAX_VALUES_HISTORY:
state.run_values_messages = state.run_values_messages[-_MAX_VALUES_HISTORY:]
latest_text = stream_utils.extract_latest_ai_text(state.run_values_messages)
if latest_text:
state.has_values_text = True
latest_clarification = stream_utils.extract_latest_clarification_text(
state.run_values_messages,
)
if latest_clarification:
state.clarification_text = latest_clarification
return latest_text or None
def _handle_message_event(
self,
data: typing.Any,
state: _StreamState,
) -> str | None:
"""处理 messages-tuple 事件,返回增量文本
values 事件已经提供完整文本时跳过 messages-tuple 的增量
"""
delta = stream_utils.extract_ai_delta_from_event_data(data)
if delta and not state.has_values_text:
state.latest_text += delta
return delta
maybe_clar = stream_utils.extract_clarification_from_event_data(data)
if maybe_clar:
state.clarification_text = maybe_clar
return None
def _build_final_text(self, state: _StreamState) -> str:
"""构建最终输出文本"""
if state.clarification_text:
return state.clarification_text
# 优先使用最后一条 AI message 的文本
latest_ai = stream_utils.extract_latest_ai_message(state.run_values_messages)
if latest_ai:
text = stream_utils.extract_text(latest_ai.get('content'))
if text:
if state.timed_out:
text += f'\n\nDeerFlow stream 在 {self.timeout}s 后超时,返回部分结果。'
return text
if state.latest_text:
text = state.latest_text
if state.timed_out:
text += f'\n\nDeerFlow stream 在 {self.timeout}s 后超时,返回部分结果。'
return text
# 提取任务失败信息作兜底
failure_text = stream_utils.build_task_failure_summary(state.task_failures)
if failure_text:
return failure_text
return 'DeerFlow 返回空响应'
# ------------------------------------------------------------------
# 主流程
# ------------------------------------------------------------------
async def _stream_messages_chunk(
self,
query: pipeline_query.Query,
) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
"""流式输出生成器"""
plain_text, image_urls = self._preprocess_user_message(query)
system_prompt = ''
# LangBot 的 pipeline 通常通过 prompt-preprocess 已注入 system prompt
# 这里保持空,让 prompt-preprocess 的内容作为 user message 一并送给 deerflow
thread_id = await self._ensure_thread_id(query)
payload = self._build_payload(
thread_id=thread_id,
prompt=plain_text or 'continue',
image_urls=image_urls,
system_prompt=system_prompt,
)
state = _StreamState()
prev_text = ''
message_idx = 0
try:
async for event in self.deerflow_client.stream_run(
thread_id=thread_id,
payload=payload,
timeout=self.timeout,
):
event_type = event.get('event')
data = event.get('data')
if event_type == 'values':
new_full = self._handle_values_event(data, state)
if new_full and new_full != prev_text:
delta = new_full[len(prev_text) :] if new_full.startswith(prev_text) else new_full
prev_text = new_full
if delta:
message_idx += 1
yield provider_message.MessageChunk(
role='assistant',
content=new_full,
is_final=False,
)
continue
if event_type in {'messages-tuple', 'messages', 'message'}:
delta = self._handle_message_event(data, state)
if delta:
prev_text = state.latest_text
message_idx += 1
yield provider_message.MessageChunk(
role='assistant',
content=prev_text,
is_final=False,
)
continue
if event_type == 'custom':
state.task_failures.extend(
stream_utils.extract_task_failures_from_custom_event(data),
)
continue
if event_type == 'error':
raise errors.DeerFlowAPIError(message=f'DeerFlow stream error event: {data}')
if event_type == 'end':
break
except (asyncio.TimeoutError, TimeoutError):
self.ap.logger.warning(f'DeerFlow stream timed out after {self.timeout}s for thread_id={thread_id}')
state.timed_out = True
# 最终消息
final_text = self._build_final_text(state)
yield provider_message.MessageChunk(
role='assistant',
content=final_text,
is_final=True,
)
async def _messages(
self,
query: pipeline_query.Query,
) -> typing.AsyncGenerator[provider_message.Message, None]:
"""非流式聚合输出"""
plain_text, image_urls = self._preprocess_user_message(query)
thread_id = await self._ensure_thread_id(query)
payload = self._build_payload(
thread_id=thread_id,
prompt=plain_text or 'continue',
image_urls=image_urls,
)
state = _StreamState()
try:
async for event in self.deerflow_client.stream_run(
thread_id=thread_id,
payload=payload,
timeout=self.timeout,
):
event_type = event.get('event')
data = event.get('data')
if event_type == 'values':
self._handle_values_event(data, state)
continue
if event_type in {'messages-tuple', 'messages', 'message'}:
self._handle_message_event(data, state)
continue
if event_type == 'custom':
state.task_failures.extend(
stream_utils.extract_task_failures_from_custom_event(data),
)
continue
if event_type == 'error':
raise errors.DeerFlowAPIError(message=f'DeerFlow stream error event: {data}')
if event_type == 'end':
break
except (asyncio.TimeoutError, TimeoutError):
self.ap.logger.warning(f'DeerFlow stream timed out after {self.timeout}s for thread_id={thread_id}')
state.timed_out = True
final_text = self._build_final_text(state)
yield provider_message.Message(
role='assistant',
content=final_text,
)
async def run(
self,
query: pipeline_query.Query,
) -> typing.AsyncGenerator[provider_message.Message, None]:
"""主入口:根据 adapter 是否支持流式输出,选择流式或非流式"""
if await query.adapter.is_stream_output_supported():
msg_idx = 0
async for msg in self._stream_messages_chunk(query):
msg_idx += 1
msg.msg_sequence = msg_idx
yield msg
else:
async for msg in self._messages(query):
yield msg
+60 -3
View File
@@ -5,6 +5,7 @@ import copy
import typing
from .. import runner
from ..modelmgr import requester as modelmgr_requester
from ..tools.loaders.native import EXEC_TOOL_NAME
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.rag.context as rag_context
@@ -24,11 +25,44 @@ Respond in the same language as the user's input.
</user_message>
"""
SANDBOX_EXEC_TOOL_NAME = 'sandbox_exec'
SANDBOX_EXEC_SYSTEM_GUIDANCE = (
'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
'and then answer from the tool result.'
)
# Hard cap on tool-call rounds within a single agent turn. A looping or
# adversarial model can otherwise emit tool calls indefinitely (each potentially
# a sandbox exec), yielding a non-terminating request and runaway cost. Set
# generously so it never interrupts legitimate multi-step agentic workflows.
MAX_TOOL_CALL_ROUNDS = 128
@runner.runner_class('local-agent')
class LocalAgentRunner(runner.RequestRunner):
"""Local agent request runner"""
def _build_request_messages(
self,
query: pipeline_query.Query,
user_message: provider_message.Message,
) -> list[provider_message.Message]:
req_messages = query.prompt.messages.copy() + query.messages.copy()
if any(getattr(tool, 'name', None) == EXEC_TOOL_NAME for tool in query.use_funcs or []):
req_messages.append(
provider_message.Message(
role='system',
content=self.ap.box_service.get_system_guidance(),
)
)
req_messages.append(user_message)
return req_messages
async def _get_model_candidates(
self,
query: pipeline_query.Query,
@@ -131,6 +165,7 @@ class LocalAgentRunner(runner.RequestRunner):
) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]:
"""Run request"""
pending_tool_calls = []
initial_response_emitted = False
# Get knowledge bases list from query variables (set by PreProcessor,
# may have been modified by plugins during PromptPreProcessing)
@@ -236,7 +271,7 @@ class LocalAgentRunner(runner.RequestRunner):
ce.text = final_user_message_text
break
req_messages = query.prompt.messages.copy() + query.messages.copy() + [user_message]
req_messages = self._build_request_messages(query, user_message)
try:
is_stream = await query.adapter.is_stream_output_supported()
@@ -264,7 +299,6 @@ class LocalAgentRunner(runner.RequestRunner):
query.use_funcs,
remove_think,
)
yield msg
final_msg = msg
else:
# Streaming: invoke with fallback
@@ -312,6 +346,7 @@ class LocalAgentRunner(runner.RequestRunner):
is_final=msg.is_final,
msg_sequence=msg_sequence,
)
initial_response_emitted = True
final_msg = provider_message.MessageChunk(
role=last_role,
@@ -325,11 +360,25 @@ class LocalAgentRunner(runner.RequestRunner):
if isinstance(final_msg, provider_message.MessageChunk):
first_end_sequence = final_msg.msg_sequence
if not is_stream:
yield final_msg
elif not initial_response_emitted:
yield final_msg
initial_response_emitted = True
req_messages.append(final_msg)
# Once a model succeeds, commit to it for the tool call loop
# (no fallback mid-conversation — different models may interpret tool results differently)
tool_call_round = 0
while pending_tool_calls:
tool_call_round += 1
if tool_call_round > MAX_TOOL_CALL_ROUNDS:
self.ap.logger.warning(
f'Tool-call loop reached the {MAX_TOOL_CALL_ROUNDS}-round cap '
f'(query_id={query.query_id}); stopping to avoid a non-terminating request.'
)
break
for tool_call in pending_tool_calls:
try:
func = tool_call.function
@@ -369,7 +418,15 @@ class LocalAgentRunner(runner.RequestRunner):
req_messages.append(msg)
except Exception as e:
err_msg = provider_message.Message(role='tool', content=f'err: {e}', tool_call_id=tool_call.id)
if is_stream:
err_msg = provider_message.MessageChunk(
role='tool',
content=f'err: {e}',
tool_call_id=tool_call.id,
is_final=True,
)
else:
err_msg = provider_message.Message(role='tool', content=f'err: {e}', tool_call_id=tool_call.id)
yield err_msg
@@ -0,0 +1,351 @@
from __future__ import annotations
import typing
import json
from langbot.pkg.provider import runner
from langbot.pkg.core import app
import langbot_plugin.api.entities.builtin.provider.message as provider_message
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
from langbot.libs.weknora_api import client, errors
@runner.runner_class('weknora-api')
class WeKnoraAPIRunner(runner.RequestRunner):
"""WeKnora API 对话请求器"""
weknora_client: client.AsyncWeKnoraClient
def __init__(self, ap: app.Application, pipeline_config: dict):
super().__init__(ap, pipeline_config)
valid_app_types = ['chat', 'agent']
if self.pipeline_config['ai']['weknora-api']['app-type'] not in valid_app_types:
raise errors.WeKnoraAPIError(
f'不支持的 WeKnora 应用类型: {self.pipeline_config["ai"]["weknora-api"]["app-type"]}'
)
api_key = self.pipeline_config['ai']['weknora-api'].get('api-key', '').strip()
if not api_key:
raise errors.WeKnoraAPIError(
'WeKnora API Key 未配置,请在流水线的 WeKnora API 配置中填入 API Key '
'(从 WeKnora 前端 设置 → API Keys 生成)'
)
base_url = self.pipeline_config['ai']['weknora-api'].get('base-url', '').strip()
if not base_url:
raise errors.WeKnoraAPIError('WeKnora Base URL 未配置,请填入服务器地址,例如 http://localhost:8080/api/v1')
self.weknora_client = client.AsyncWeKnoraClient(
api_key=api_key,
base_url=base_url,
)
async def _extract_plain_text(self, query: pipeline_query.Query) -> str:
"""从用户消息中提取纯文本内容"""
plain_text = ''
if isinstance(query.user_message.content, str):
plain_text = query.user_message.content
elif isinstance(query.user_message.content, list):
for ce in query.user_message.content:
if ce.type == 'text':
plain_text += ce.text
if not plain_text:
plain_text = self.pipeline_config['ai']['weknora-api'].get('base-prompt', '')
return plain_text
async def _ensure_session(self, query: pipeline_query.Query) -> str:
"""确保会话存在,如果不存在则创建"""
session_id = query.session.using_conversation.uuid or ''
if not session_id:
user_tag = f'{query.session.launcher_type.value}_{query.session.launcher_id}'
session_id = await self.weknora_client.create_session(title=f'IM Chat - {user_tag}')
query.session.using_conversation.uuid = session_id
return session_id
async def _agent_chat_messages(
self, query: pipeline_query.Query
) -> typing.AsyncGenerator[provider_message.Message, None]:
"""调用 Agent 智能对话(非流式聚合输出)"""
session_id = await self._ensure_session(query)
plain_text = await self._extract_plain_text(query)
user_tag = f'{query.session.launcher_type.value}_{query.session.launcher_id}'
config = self.pipeline_config['ai']['weknora-api']
agent_id = config.get('agent-id', 'builtin-smart-reasoning')
knowledge_base_ids = config.get('knowledge-base-ids', [])
web_search_enabled = config.get('web-search-enabled', False)
timeout = config.get('timeout', 120)
full_answer = ''
chunk = None
async for chunk in self.weknora_client.agent_chat(
session_id=session_id,
query=plain_text,
user=user_tag,
agent_id=agent_id,
knowledge_base_ids=knowledge_base_ids,
web_search_enabled=web_search_enabled,
timeout=timeout,
):
self.ap.logger.debug('weknora-agent-chunk: ' + str(chunk))
response_type = chunk.get('response_type', '')
content = chunk.get('content', '')
if response_type == 'tool_call':
# 工具调用
tool_data = chunk.get('data', {})
tool_name = tool_data.get('tool_name', '')
if tool_name:
yield provider_message.Message(
role='assistant',
tool_calls=[
provider_message.ToolCall(
id=chunk.get('id', ''),
type='function',
function=provider_message.FunctionCall(
name=tool_name,
arguments=json.dumps(tool_data.get('arguments', {})),
),
)
],
)
elif response_type == 'answer':
if content:
full_answer += content
elif response_type == 'error':
raise errors.WeKnoraAPIError(f'WeKnora 服务错误: {content}')
if chunk is None:
raise errors.WeKnoraAPIError('WeKnora API 没有返回任何响应,请检查网络连接和API配置')
if full_answer:
yield provider_message.Message(
role='assistant',
content=full_answer,
)
async def _chat_messages(
self, query: pipeline_query.Query
) -> typing.AsyncGenerator[provider_message.Message, None]:
"""调用知识库 RAG 问答(非流式聚合输出)"""
session_id = await self._ensure_session(query)
plain_text = await self._extract_plain_text(query)
user_tag = f'{query.session.launcher_type.value}_{query.session.launcher_id}'
config = self.pipeline_config['ai']['weknora-api']
agent_id = config.get('agent-id', 'builtin-quick-answer')
knowledge_base_ids = config.get('knowledge-base-ids', [])
timeout = config.get('timeout', 120)
full_answer = ''
chunk = None
async for chunk in self.weknora_client.knowledge_chat(
session_id=session_id,
query=plain_text,
user=user_tag,
agent_id=agent_id,
knowledge_base_ids=knowledge_base_ids,
timeout=timeout,
):
self.ap.logger.debug('weknora-chat-chunk: ' + str(chunk))
response_type = chunk.get('response_type', '')
content = chunk.get('content', '')
if response_type == 'answer':
if content:
full_answer += content
elif response_type == 'error':
raise errors.WeKnoraAPIError(f'WeKnora 服务错误: {content}')
if chunk is None:
raise errors.WeKnoraAPIError('WeKnora API 没有返回任何响应,请检查网络连接和API配置')
if full_answer:
yield provider_message.Message(
role='assistant',
content=full_answer,
)
async def _agent_chat_messages_chunk(
self, query: pipeline_query.Query
) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
"""调用 Agent 智能对话(流式输出)"""
session_id = await self._ensure_session(query)
plain_text = await self._extract_plain_text(query)
user_tag = f'{query.session.launcher_type.value}_{query.session.launcher_id}'
config = self.pipeline_config['ai']['weknora-api']
agent_id = config.get('agent-id', 'builtin-smart-reasoning')
knowledge_base_ids = config.get('knowledge-base-ids', [])
web_search_enabled = config.get('web-search-enabled', False)
timeout = config.get('timeout', 120)
pending_answer = ''
message_idx = 0
is_final = False
chunk = None
async for chunk in self.weknora_client.agent_chat(
session_id=session_id,
query=plain_text,
user=user_tag,
agent_id=agent_id,
knowledge_base_ids=knowledge_base_ids,
web_search_enabled=web_search_enabled,
timeout=timeout,
):
self.ap.logger.debug('weknora-agent-chunk: ' + str(chunk))
response_type = chunk.get('response_type', '')
content = chunk.get('content', '')
done = chunk.get('done', False)
if response_type == 'tool_call':
tool_data = chunk.get('data', {})
tool_name = tool_data.get('tool_name', '')
if tool_name:
message_idx += 1
yield provider_message.MessageChunk(
role='assistant',
tool_calls=[
provider_message.ToolCall(
id=chunk.get('id', ''),
type='function',
function=provider_message.FunctionCall(
name=tool_name,
arguments=json.dumps(tool_data.get('arguments', {})),
),
)
],
)
elif response_type == 'answer':
message_idx += 1
if content:
pending_answer += content
if done:
is_final = True
# 每 8 个 chunk 输出一次,或最终输出
if message_idx % 8 == 0 or is_final:
yield provider_message.MessageChunk(
role='assistant',
content=pending_answer,
is_final=is_final,
)
elif response_type == 'error':
raise errors.WeKnoraAPIError(f'WeKnora 服务错误: {content}')
if chunk is None:
raise errors.WeKnoraAPIError('WeKnora API 没有返回任何响应,请检查网络连接和API配置')
# 确保最终消息已发出
if not is_final and pending_answer:
yield provider_message.MessageChunk(
role='assistant',
content=pending_answer,
is_final=True,
)
async def _chat_messages_chunk(
self, query: pipeline_query.Query
) -> typing.AsyncGenerator[provider_message.MessageChunk, None]:
"""调用知识库 RAG 问答(流式输出)"""
session_id = await self._ensure_session(query)
plain_text = await self._extract_plain_text(query)
user_tag = f'{query.session.launcher_type.value}_{query.session.launcher_id}'
config = self.pipeline_config['ai']['weknora-api']
agent_id = config.get('agent-id', 'builtin-quick-answer')
knowledge_base_ids = config.get('knowledge-base-ids', [])
timeout = config.get('timeout', 120)
pending_answer = ''
message_idx = 0
is_final = False
chunk = None
async for chunk in self.weknora_client.knowledge_chat(
session_id=session_id,
query=plain_text,
user=user_tag,
agent_id=agent_id,
knowledge_base_ids=knowledge_base_ids,
timeout=timeout,
):
self.ap.logger.debug('weknora-chat-chunk: ' + str(chunk))
response_type = chunk.get('response_type', '')
content = chunk.get('content', '')
done = chunk.get('done', False)
if response_type == 'answer':
message_idx += 1
if content:
pending_answer += content
if done:
is_final = True
if message_idx % 8 == 0 or is_final:
yield provider_message.MessageChunk(
role='assistant',
content=pending_answer,
is_final=is_final,
)
elif response_type == 'error':
raise errors.WeKnoraAPIError(f'WeKnora 服务错误: {content}')
if chunk is None:
raise errors.WeKnoraAPIError('WeKnora API 没有返回任何响应,请检查网络连接和API配置')
if not is_final and pending_answer:
yield provider_message.MessageChunk(
role='assistant',
content=pending_answer,
is_final=True,
)
async def run(self, query: pipeline_query.Query) -> typing.AsyncGenerator[provider_message.Message, None]:
"""运行请求"""
app_type = self.pipeline_config['ai']['weknora-api']['app-type']
if await query.adapter.is_stream_output_supported():
msg_idx = 0
if app_type == 'agent':
async for msg in self._agent_chat_messages_chunk(query):
msg_idx += 1
msg.msg_sequence = msg_idx
yield msg
elif app_type == 'chat':
async for msg in self._chat_messages_chunk(query):
msg_idx += 1
msg.msg_sequence = msg_idx
yield msg
else:
raise errors.WeKnoraAPIError(f'不支持的 WeKnora 应用类型: {app_type}')
else:
if app_type == 'agent':
async for msg in self._agent_chat_messages(query):
yield msg
elif app_type == 'chat':
async for msg in self._chat_messages(query):
yield msg
else:
raise errors.WeKnoraAPIError(f'不支持的 WeKnora 应用类型: {app_type}')
+4 -2
View File
@@ -2,12 +2,14 @@ from __future__ import annotations
import abc
import typing
from typing import TYPE_CHECKING
from langbot_plugin.api.entities.events import pipeline_query
from ...core import app
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
if TYPE_CHECKING:
from ...core import app
preregistered_loaders: list[typing.Type[ToolLoader]] = []
+202 -18
View File
@@ -20,6 +20,7 @@ from ....core import app
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
import langbot_plugin.api.entities.builtin.provider.message as provider_message
from ....entity.persistence import mcp as persistence_mcp
from .mcp_stdio import BoxStdioSessionRuntime, MCPServerBoxConfig, MCPSessionErrorPhase # noqa: F401
class MCPSessionStatus(enum.Enum):
@@ -58,6 +59,12 @@ class RuntimeMCPSession:
error_message: str | None = None
error_phase: MCPSessionErrorPhase | None = None
retry_count: int = 0
_box_stdio_runtime: BoxStdioSessionRuntime
def __init__(self, server_name: str, server_config: dict, enable: bool, ap: app.Application):
self.server_name = server_name
self.server_uuid = server_config.get('uuid', '')
@@ -66,6 +73,13 @@ class RuntimeMCPSession:
self.enable = enable
self.session = None
# Transient test sessions (created from the config page "test" button,
# which carry no persisted server UUID) must NOT share the live
# "mcp-shared" Box session. Otherwise a failing test churns the shared
# session and tears down healthy, already-connected servers. Callers
# flag these via server_config['_transient'] = True.
self.is_transient = bool(server_config.get('_transient', False))
self.exit_stack = AsyncExitStack()
self.functions = []
@@ -75,7 +89,33 @@ class RuntimeMCPSession:
self._shutdown_event = asyncio.Event()
self._ready_event = asyncio.Event()
self._box_stdio_runtime = BoxStdioSessionRuntime(self)
self.box_config = self._box_stdio_runtime.config
async def _init_stdio_python_server(self):
if self._uses_box_stdio():
await self._box_stdio_runtime.initialize()
return
# Box is configured (ap.box_service exists) but currently unavailable
# (disabled by config or connection failed). Refuse stdio MCP rather
# than silently falling through to host-stdio — the operator asked
# for the sandbox and the failure mode should be visible.
#
# Set ``error_phase = BOX_UNAVAILABLE`` BEFORE raising so the retry
# wrapper can short-circuit (retrying is pointless when Box is
# deliberately off) and the frontend can render a localized,
# actionable message instead of this raw RuntimeError. Keep the
# message itself short — the frontend ignores it for this phase.
box_service = getattr(self.ap, 'box_service', None)
if box_service is not None and not getattr(box_service, 'available', False):
self.error_phase = MCPSessionErrorPhase.BOX_UNAVAILABLE
if not getattr(box_service, 'enabled', True):
raise RuntimeError('box_disabled_in_config')
raise RuntimeError('box_unavailable')
# Legacy: no box_service installed at all (pre-Box dev mode). Fall
# through to host-stdio for backward compatibility.
server_params = StdioServerParameters(
command=self.server_config['command'],
args=self.server_config['args'],
@@ -90,6 +130,9 @@ class RuntimeMCPSession:
await self.session.initialize()
async def _init_box_stdio_server(self):
await self._box_stdio_runtime.initialize()
async def _init_sse_server(self):
sse_transport = await self.exit_stack.enter_async_context(
sse_client(
@@ -124,8 +167,11 @@ class RuntimeMCPSession:
await self.session.initialize()
_MAX_RETRIES = 3
_RETRY_DELAYS = [2, 4, 8]
async def _lifecycle_loop(self):
"""在后台任务中管理整个MCP会话的生命周期"""
"""Manage the full MCP session lifecycle in a background task."""
try:
if self.server_config['mode'] == 'stdio':
await self._init_stdio_python_server()
@@ -134,49 +180,134 @@ class RuntimeMCPSession:
elif self.server_config['mode'] == 'http':
await self._init_streamable_http_server()
else:
raise ValueError(f'无法识别 MCP 服务器类型: {self.server_name}: {self.server_config}')
raise ValueError(f'Unknown MCP server mode: {self.server_name}: {self.server_config}')
await self.refresh()
self.status = MCPSessionStatus.CONNECTED
# 通知start()方法连接已建立
# Notify start() that connection is established
self._ready_event.set()
# 等待shutdown信号
await self._shutdown_event.wait()
# Wait for shutdown signal, with optional health monitoring for Box stdio
if self._uses_box_stdio():
monitor_task = asyncio.create_task(self._box_stdio_runtime.monitor_process_health())
shutdown_task = asyncio.create_task(self._shutdown_event.wait())
done, pending = await asyncio.wait(
[shutdown_task, monitor_task],
return_when=asyncio.FIRST_COMPLETED,
)
for task in pending:
task.cancel()
for task in done:
if task is monitor_task and not self._shutdown_event.is_set():
self.error_phase = MCPSessionErrorPhase.RUNTIME
raise Exception('Box managed process exited unexpectedly')
else:
await self._shutdown_event.wait()
except Exception as e:
self.status = MCPSessionStatus.ERROR
self.error_message = str(e)
self.ap.logger.error(f'Error in MCP session lifecycle {self.server_name}: {e}\n{traceback.format_exc()}')
# 即使出错也要设置ready事件,让start()方法知道初始化已完成
self._ready_event.set()
# Do NOT set _ready_event here — let _lifecycle_loop_with_retry
# handle retries first. It will set the event when all retries
# are exhausted or on success.
raise # Re-raise so _lifecycle_loop_with_retry can catch it
finally:
# 在同一个任务中清理所有资源
# Clean up all resources in the same task
try:
if self.exit_stack:
await self.exit_stack.aclose()
self.exit_stack = AsyncExitStack()
self.functions.clear()
self.session = None
except Exception as e:
self.ap.logger.error(f'Error cleaning up MCP session {self.server_name}: {e}\n{traceback.format_exc()}')
finally:
await self._cleanup_box_stdio_session()
async def _lifecycle_loop_with_retry(self):
"""Wrap _lifecycle_loop with retry and exponential backoff."""
for attempt in range(self._MAX_RETRIES + 1):
try:
await self._lifecycle_loop()
return # Normal shutdown, don't retry
except Exception as e:
self.retry_count = attempt + 1
if self._shutdown_event.is_set():
return # Shutdown requested, don't retry
# BOX_UNAVAILABLE is a deliberate refusal, not a transient
# failure — retrying produces log spam and a misleading
# "Failed after N attempts" message. Surface it immediately.
if self.error_phase == MCPSessionErrorPhase.BOX_UNAVAILABLE:
self.status = MCPSessionStatus.ERROR
self.error_message = str(e)
self._ready_event.set()
return
if attempt >= self._MAX_RETRIES:
self.status = MCPSessionStatus.ERROR
self.error_message = f'Failed after {self._MAX_RETRIES + 1} attempts: {self._describe_exception(e)}'
self._ready_event.set()
return
delay = self._RETRY_DELAYS[attempt]
self.ap.logger.warning(
f'MCP session {self.server_name} failed (attempt {attempt + 1}), '
f'retrying in {delay}s: {self._describe_exception(e)}'
)
await self._cleanup_box_stdio_session()
# Reset status for retry
self.status = MCPSessionStatus.CONNECTING
self.error_message = None
self.error_phase = None
await asyncio.sleep(delay)
@staticmethod
def _describe_exception(exc: BaseException) -> str:
"""Flatten an exception into its underlying leaf messages.
anyio / the MCP client wrap real failures in a TaskGroup, whose own
message is the unhelpful "unhandled errors in a TaskGroup (N
sub-exception)". Recurse into ExceptionGroups so the actual cause
(e.g. ``httpx.HTTPStatusError: Client error '410 Gone'``) is surfaced.
"""
leaves: list[str] = []
def visit(e: BaseException) -> None:
sub = getattr(e, 'exceptions', None)
if sub: # ExceptionGroup / BaseExceptionGroup
for child in sub:
visit(child)
else:
leaves.append(f'{type(e).__name__}: {e}')
visit(exc)
seen: set[str] = set()
unique = [m for m in leaves if not (m in seen or seen.add(m))]
return '; '.join(unique) if unique else f'{type(exc).__name__}: {exc}'
_MONITOR_POLL_INTERVAL = 5
_MONITOR_MAX_CONSECUTIVE_ERRORS = 3
async def _monitor_box_process_health(self):
await self._box_stdio_runtime.monitor_process_health()
async def start(self):
if not self.enable:
return
# 创建后台任务来管理生命周期
self._lifecycle_task = asyncio.create_task(self._lifecycle_loop())
# Create background task for lifecycle management with retry
self._lifecycle_task = asyncio.create_task(self._lifecycle_loop_with_retry())
# 等待连接建立或失败(带超时)
# Wait for connection or failure (with timeout)
startup_timeout = (self.box_config.startup_timeout_sec + 30) if self._uses_box_stdio() else 30.0
try:
await asyncio.wait_for(self._ready_event.wait(), timeout=30.0)
await asyncio.wait_for(self._ready_event.wait(), timeout=startup_timeout)
except asyncio.TimeoutError:
self.status = MCPSessionStatus.ERROR
raise Exception('Connection timeout after 30 seconds')
raise Exception(f'Connection timeout after {startup_timeout} seconds')
# 检查是否有错误
# Check for errors
if self.status == MCPSessionStatus.ERROR:
raise Exception('Connection failed, please check URL')
@@ -232,18 +363,25 @@ class RuntimeMCPSession:
return self.functions
def get_runtime_info_dict(self) -> dict:
return {
info = {
'status': self.status.value,
'error_message': self.error_message,
'error_phase': self.error_phase.value if self.error_phase else None,
'retry_count': self.retry_count,
'tool_count': len(self.get_tools()),
'tools': [
{
'name': tool.name,
'description': tool.description,
'parameters': tool.parameters,
}
for tool in self.get_tools()
],
}
if self._uses_box_stdio():
info['box_session_id'] = self._build_box_session_id()
info['box_enabled'] = True
return info
async def shutdown(self):
"""关闭会话并清理资源"""
@@ -267,6 +405,46 @@ class RuntimeMCPSession:
except Exception as e:
self.ap.logger.error(f'Error shutting down MCP session {self.server_name}: {e}\n{traceback.format_exc()}')
def _uses_box_stdio(self) -> bool:
return self._box_stdio_runtime.uses_box_stdio()
def _build_box_session_id(self) -> str:
# Transient test sessions get their own isolated Box session so a
# failing/short-lived test can never disturb the shared session that
# hosts live, already-connected MCP servers.
if self.is_transient:
return f'mcp-test-{self.server_uuid}'
return 'mcp-shared'
def _rewrite_path(self, path: str, host_path: str | None) -> str:
return self._box_stdio_runtime.rewrite_path(path, host_path)
def _infer_host_path(self) -> str | None:
return self._box_stdio_runtime.infer_host_path()
@staticmethod
def _unwrap_venv_path(directory: str) -> str:
return BoxStdioSessionRuntime.unwrap_venv_path(directory)
def _resolve_host_path(self) -> str | None:
return self._box_stdio_runtime.resolve_host_path()
@staticmethod
def _detect_install_command(host_path: str) -> str | None:
return BoxStdioSessionRuntime.detect_install_command(host_path)
def _build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict:
return self._box_stdio_runtime.build_box_session_payload(session_id, host_path)
def _build_box_process_payload(self, host_path: str | None = None) -> dict:
return self._box_stdio_runtime.build_box_process_payload(host_path)
def _rewrite_venv_command(self, command: str, host_path: str) -> str:
return self._box_stdio_runtime.rewrite_venv_command(command, host_path)
async def _cleanup_box_stdio_session(self) -> None:
await self._box_stdio_runtime.cleanup_session()
# @loader.loader_class('mcp')
class MCPLoader(loader.ToolLoader):
@@ -332,15 +510,19 @@ class MCPLoader(loader.ToolLoader):
Args:
server_config: 服务器配置字典必须包含:
- name: 服务器名称
- mode: 连接模式 (stdio/sse)
- mode: 连接模式 (stdio/sse/http)
- enable: 是否启用
- extra_args: 额外的配置参数 (可选)
"""
uuid_ = server_config.get('uuid')
is_transient = False
if not uuid_:
self.ap.logger.warning('Server UUID is None for MCP server, maybe testing in the config page.')
uuid_ = str(uuid_module.uuid4())
server_config['uuid'] = uuid_
# No persisted UUID => this is a throwaway "test" session from the
# config page. Isolate it from the shared live Box session.
is_transient = True
name = server_config['name']
uuid = server_config['uuid']
@@ -353,6 +535,7 @@ class MCPLoader(loader.ToolLoader):
'uuid': uuid,
'mode': mode,
'enable': enable,
'_transient': is_transient,
**extra_args,
}
@@ -431,12 +614,13 @@ class MCPLoader(loader.ToolLoader):
"""获取所有服务器的信息"""
info = {}
for server_name, session in self.sessions.items():
tools = session.get_tools()
info[server_name] = {
'name': server_name,
'mode': session.server_config.get('mode'),
'enable': session.enable,
'tools_count': len(session.get_tools()),
'tool_names': [f.name for f in session.get_tools()],
'tools_count': len(tools),
'tool_names': [f.name for f in tools],
}
return info
@@ -0,0 +1,381 @@
from __future__ import annotations
import enum
import asyncio
import os
import shutil
import shlex
from typing import TYPE_CHECKING, Any
import pydantic
from mcp import ClientSession
from mcp.client.websocket import websocket_client
from ....box.workspace import (
BoxWorkspaceSession,
classify_python_workspace,
infer_workspace_host_path,
normalize_host_path,
rewrite_mounted_path,
rewrite_venv_command,
unwrap_venv_path,
)
if TYPE_CHECKING:
from .mcp import RuntimeMCPSession
class MCPSessionErrorPhase(enum.Enum):
"""Which phase of the MCP lifecycle failed."""
SESSION_CREATE = 'session_create'
DEP_INSTALL = 'dep_install'
PROCESS_START = 'process_start'
RELAY_CONNECT = 'relay_connect'
MCP_INIT = 'mcp_init'
RUNTIME = 'runtime'
TOOL_CALL = 'tool_call'
# Stdio MCP refused because Box is disabled in config or currently
# unavailable. Not transient — retries would be pointless. The frontend
# uses this phase to render a localized actionable message instead of
# the raw RuntimeError text.
BOX_UNAVAILABLE = 'box_unavailable'
class MCPServerBoxConfig(pydantic.BaseModel):
"""Structured configuration for running an MCP server inside a Box container."""
image: str | None = None
network: str = 'on' # MCP servers need network for dependency installation
host_path: str | None = None
host_path_mode: str = 'ro' # MCP servers default to read-write mount only when explicitly requested
env: dict[str, str] = pydantic.Field(default_factory=dict)
startup_timeout_sec: int = 120 # Longer default to allow dependency bootstrap
cpus: float | None = None
memory_mb: int | None = None
pids_limit: int | None = None
read_only_rootfs: bool | None = None
model_config = pydantic.ConfigDict(extra='ignore')
class BoxStdioSessionRuntime:
"""Encapsulate Box-backed stdio MCP session orchestration."""
def __init__(self, owner: RuntimeMCPSession):
self.owner = owner
self.config = MCPServerBoxConfig.model_validate(owner.server_config.get('box', {}))
@property
def ap(self):
return self.owner.ap
@property
def server_name(self) -> str:
return self.owner.server_name
@property
def server_config(self) -> dict:
return self.owner.server_config
def _build_workspace(
self,
*,
host_path: str | None | object = ...,
workdir: str = '/workspace',
mount_path: str = '/workspace',
) -> BoxWorkspaceSession:
resolved_host_path = self.resolve_host_path() if host_path is ... else host_path
return BoxWorkspaceSession(
self.ap.box_service,
self.owner._build_box_session_id(),
host_path=resolved_host_path,
host_path_mode=self.config.host_path_mode,
workdir=workdir,
env=self.config.env,
mount_path=mount_path,
network=self.config.network,
read_only_rootfs=self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False,
image=self.config.image,
cpus=self.config.cpus,
memory_mb=self.config.memory_mb,
pids_limit=self.config.pids_limit,
persistent=True,
)
@property
def process_id(self) -> str:
"""Each MCP server gets a unique process_id within the shared session."""
return self.owner.server_uuid
def uses_box_stdio(self) -> bool:
if self.server_config.get('mode') != 'stdio':
return False
box_service = getattr(self.ap, 'box_service', None)
if box_service is None:
return False
# When Box is configured but currently unavailable (disabled or
# connection failed), do NOT silently fall through to host-stdio —
# that would bypass the sandbox the operator asked for. The caller
# is expected to refuse the stdio MCP server with a clear error.
return bool(getattr(box_service, 'available', False))
async def initialize(self) -> None:
await self._wait_for_box_runtime()
# All stdio MCP servers share one Box session. Per-server host paths
# are staged into the shared workspace instead of becoming session
# mounts, because an existing Docker container cannot add bind mounts.
workspace = self._build_workspace(host_path=None)
host_path = self.resolve_host_path()
process_cwd = '/workspace'
try:
await workspace.create_session()
except Exception:
self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE
raise
if host_path:
process_cwd = await self._stage_host_path_to_shared_workspace(host_path)
install_cmd = self.detect_install_command(host_path, process_cwd)
if install_cmd:
self.ap.logger.info(
f'MCP server {self.server_name}: installing dependencies in Box with: {install_cmd}'
)
try:
result = await workspace.execute_raw(
install_cmd,
workdir=process_cwd,
timeout_sec=self.config.startup_timeout_sec or 120,
)
except Exception:
self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL
raise
if not result.ok:
self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL
stderr_preview = (result.stderr or '')[:500]
raise Exception(f'Dependency install failed (exit code {result.exit_code}): {stderr_preview}')
try:
process_workspace = (
self._build_workspace(host_path=host_path, workdir=process_cwd, mount_path=process_cwd)
if host_path
else workspace
)
payload = process_workspace.build_process_payload(
self.server_config['command'],
self.server_config.get('args', []),
env=self.server_config.get('env', {}),
cwd=process_cwd,
)
payload['process_id'] = self.process_id
await workspace.box_service.start_managed_process(workspace.session_id, payload)
except Exception:
self.owner.error_phase = MCPSessionErrorPhase.PROCESS_START
raise
try:
websocket_url = workspace.get_managed_process_websocket_url(self.process_id)
transport = await self.owner.exit_stack.enter_async_context(websocket_client(websocket_url))
read_stream, write_stream = transport
self.owner.session = await self.owner.exit_stack.enter_async_context(
ClientSession(read_stream, write_stream)
)
except Exception:
self.owner.error_phase = MCPSessionErrorPhase.RELAY_CONNECT
raise
try:
await self.owner.session.initialize()
except Exception:
self.owner.error_phase = MCPSessionErrorPhase.MCP_INIT
raise
async def monitor_process_health(self) -> None:
from langbot_plugin.box.models import BoxManagedProcessStatus
workspace = self._build_workspace()
consecutive_errors = 0
while not self.owner._shutdown_event.is_set():
try:
info = await workspace.get_managed_process(self.process_id)
if isinstance(info, dict):
status = info.get('status', '')
else:
status = getattr(info, 'status', '')
if status == BoxManagedProcessStatus.EXITED.value or status == BoxManagedProcessStatus.EXITED:
return
consecutive_errors = 0
except Exception as exc:
consecutive_errors += 1
self.ap.logger.warning(
f'MCP monitor for {self.server_name}: get_managed_process failed '
f'({consecutive_errors}/{self.owner._MONITOR_MAX_CONSECUTIVE_ERRORS}): '
f'{type(exc).__name__}: {exc}'
)
if consecutive_errors >= self.owner._MONITOR_MAX_CONSECUTIVE_ERRORS:
return
await asyncio.sleep(self.owner._MONITOR_POLL_INTERVAL)
async def _stage_host_path_to_shared_workspace(self, host_path: str) -> str:
source_path = normalize_host_path(host_path)
if not source_path:
return '/workspace'
if not os.path.isdir(source_path):
raise FileNotFoundError(f'MCP host_path does not exist or is not a directory: {host_path}')
self._validate_host_path(source_path)
shared_host_path = self._shared_workspace_host_path()
process_host_root = os.path.join(shared_host_path, '.mcp', self.process_id)
process_host_workspace = os.path.join(process_host_root, 'workspace')
await asyncio.to_thread(self._copy_workspace_tree, source_path, process_host_root, process_host_workspace)
return f'/workspace/.mcp/{self.process_id}/workspace'
def _validate_host_path(self, host_path: str) -> None:
self.ap.box_service.build_spec(
{
'session_id': f'mcp-validate-{self.process_id}',
'host_path': host_path,
'host_path_mode': self.config.host_path_mode,
'network': self.config.network,
'read_only_rootfs': self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False,
}
)
def _shared_workspace_host_path(self) -> str:
default_workspace = getattr(self.ap.box_service, 'default_workspace', None)
if not default_workspace:
raise RuntimeError('Box default workspace is required for shared MCP host_path staging')
shared_host_path = normalize_host_path(default_workspace)
os.makedirs(shared_host_path, exist_ok=True)
return shared_host_path
@staticmethod
def _copy_workspace_tree(source_path: str, process_host_root: str, process_host_workspace: str) -> None:
shutil.rmtree(process_host_root, ignore_errors=True)
os.makedirs(process_host_root, exist_ok=True)
shutil.copytree(
source_path,
process_host_workspace,
symlinks=True,
ignore=shutil.ignore_patterns('.git', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache'),
)
async def _cleanup_staged_workspace(self) -> None:
if not self.resolve_host_path():
return
try:
process_host_root = os.path.join(self._shared_workspace_host_path(), '.mcp', self.process_id)
await asyncio.to_thread(shutil.rmtree, process_host_root, True)
except Exception as exc:
self.ap.logger.warning(
f'MCP server {self.server_name}: failed to clean staged workspace '
f'process_id={self.process_id}: {type(exc).__name__}: {exc}'
)
async def _wait_for_box_runtime(self) -> None:
timeout_sec = max(float(self.config.startup_timeout_sec or 120), 1.0)
deadline = asyncio.get_running_loop().time() + timeout_sec
warned = False
while not getattr(self.ap.box_service, 'available', False):
if not warned:
self.ap.logger.warning(
f'MCP server {self.server_name}: waiting for Box runtime before starting stdio process'
)
warned = True
if asyncio.get_running_loop().time() >= deadline:
self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE
raise Exception(f'Box runtime is not available after {int(timeout_sec)} seconds')
await asyncio.sleep(1)
async def cleanup_session(self) -> None:
if not self.uses_box_stdio():
return
workspace = self._build_workspace(host_path=None)
# Transient test sessions own their isolated Box session, so tear the
# whole session down rather than leaking it. This cannot affect live
# servers because they live in the separate shared session.
if getattr(self.owner, 'is_transient', False):
try:
await workspace.cleanup()
except Exception as exc:
self.ap.logger.warning(
f'MCP server {self.server_name}: failed to delete transient test session '
f'{self.owner._build_box_session_id()}: {type(exc).__name__}: {exc}'
)
await self._cleanup_staged_workspace()
return
# In the shared-session model, we do not delete the session itself.
# Stop only this MCP server's managed process; deleting the session
# would kill other MCP servers sharing the same container.
try:
await workspace.stop_managed_process(self.process_id)
except Exception as exc:
self.ap.logger.warning(
f'MCP server {self.server_name}: failed to stop managed process '
f'process_id={self.process_id}: {type(exc).__name__}: {exc}'
)
await self._cleanup_staged_workspace()
return
await self._cleanup_staged_workspace()
self.ap.logger.info(
f'MCP server {self.server_name}: stopped process_id={self.process_id} '
f'(shared session {self.owner._build_box_session_id()} kept alive)'
)
def rewrite_path(self, path: str, host_path: str | None) -> str:
return rewrite_mounted_path(path, host_path)
def infer_host_path(self) -> str | None:
return infer_workspace_host_path(self.server_config.get('command', ''), self.server_config.get('args', []))
@staticmethod
def unwrap_venv_path(directory: str) -> str:
return unwrap_venv_path(directory)
def resolve_host_path(self) -> str | None:
return self.config.host_path or self.infer_host_path()
@staticmethod
def detect_install_command(host_path: str, workspace_path: str = '/workspace') -> str | None:
workspace_kind = classify_python_workspace(host_path)
quoted_workspace_path = shlex.quote(workspace_path)
if workspace_kind == 'package':
return (
'mkdir -p /opt/_lb_src'
f' && tar -C {quoted_workspace_path}'
' --exclude=.venv --exclude=.git --exclude=__pycache__'
' --exclude=node_modules --exclude=.tox --exclude=.nox'
' --exclude="*.egg-info" --exclude=.uv-cache'
' -cf - .'
' | tar -C /opt/_lb_src -xf -'
' && pip install --no-cache-dir /opt/_lb_src'
' && rm -rf /opt/_lb_src'
)
if workspace_kind == 'requirements':
return f'pip install --no-cache-dir -r {quoted_workspace_path}/requirements.txt'
return None
def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]:
workspace = self._build_workspace()
workspace.session_id = session_id
if host_path is not None:
workspace.host_path = host_path
return workspace.build_session_payload()
def build_box_process_payload(self, host_path: str | None = None) -> dict[str, Any]:
workspace = self._build_workspace()
if host_path is not None:
workspace.host_path = host_path
return workspace.build_process_payload(
self.server_config['command'],
self.server_config.get('args', []),
env=self.server_config.get('env', {}),
)
def rewrite_venv_command(self, command: str, host_path: str) -> str:
return rewrite_venv_command(command, host_path)
@@ -0,0 +1,846 @@
from __future__ import annotations
import json
import os
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from langbot_plugin.api.entities.events import pipeline_query
from .. import loader
from . import skill as skill_loader
EXEC_TOOL_NAME = 'exec'
READ_TOOL_NAME = 'read'
WRITE_TOOL_NAME = 'write'
EDIT_TOOL_NAME = 'edit'
GLOB_TOOL_NAME = 'glob'
GREP_TOOL_NAME = 'grep'
_ALL_TOOL_NAMES = {EXEC_TOOL_NAME, READ_TOOL_NAME, WRITE_TOOL_NAME, EDIT_TOOL_NAME, GLOB_TOOL_NAME, GREP_TOOL_NAME}
# Skip these dirs during grep walk to avoid noise
_SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', 'dist', 'build'}
class NativeToolLoader(loader.ToolLoader):
def __init__(self, ap):
super().__init__(ap)
self._tools: list[resource_tool.LLMTool] | None = None
self._backend_available: bool | None = None
async def initialize(self):
"""Check if backend is truly available at startup."""
self._backend_available = await self._check_backend_available()
if self._backend_available:
self.ap.logger.info('Native sandbox tools (exec/read/write/edit/glob/grep) are available.')
else:
self.ap.logger.warning(
'Native sandbox tools (exec/read/write/edit/glob/grep) are NOT available. '
'No sandbox backend (Docker/nsjail/E2B) is ready. '
'The LLM will not have access to code execution or file operation tools.'
)
async def _check_backend_available(self) -> bool:
"""Check if the box backend is truly available (not just the runtime)."""
box_service = getattr(self.ap, 'box_service', None)
if box_service is None:
return False
if not getattr(box_service, 'available', False):
return False
# Check if backend is truly available via get_status
try:
status = await box_service.get_status()
backend_info = status.get('backend', {})
return backend_info.get('available', False)
except Exception:
return False
async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
if not self._is_sandbox_available():
return []
if self._tools is None:
self._tools = [
self._build_exec_tool(),
self._build_read_tool(),
self._build_write_tool(),
self._build_edit_tool(),
self._build_glob_tool(),
self._build_grep_tool(),
]
return list(self._tools)
async def has_tool(self, name: str) -> bool:
return name in _ALL_TOOL_NAMES and self._is_sandbox_available()
async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query):
if name == EXEC_TOOL_NAME:
self.ap.logger.info(
'exec tool invoked: '
f'query_id={query.query_id} '
f'parameters={json.dumps(self._summarize_parameters(parameters), ensure_ascii=False)}'
)
return await self._invoke_exec(parameters, query)
if name == READ_TOOL_NAME:
return await self._invoke_read(parameters, query)
if name == WRITE_TOOL_NAME:
return await self._invoke_write(parameters, query)
if name == EDIT_TOOL_NAME:
return await self._invoke_edit(parameters, query)
if name == GLOB_TOOL_NAME:
return await self._invoke_glob(parameters, query)
if name == GREP_TOOL_NAME:
return await self._invoke_grep(parameters, query)
raise ValueError(f'未找到工具: {name}')
async def shutdown(self):
pass
async def _invoke_exec(self, parameters: dict, query: pipeline_query.Query) -> dict:
command = str(parameters['command'])
workdir = str(parameters.get('workdir', '/workspace') or '/workspace')
# Validate that skill references target activated skills.
selected_skill, _ = skill_loader.resolve_virtual_skill_path(
self.ap,
query,
workdir,
include_visible=False,
include_activated=True,
)
referenced_skill_names = skill_loader.find_referenced_skill_names(command)
if selected_skill is None and referenced_skill_names:
if len(referenced_skill_names) > 1:
raise ValueError('exec can target at most one activated skill package per call.')
selected_skill = skill_loader.get_activated_skill(query, referenced_skill_names[0])
if selected_skill is None:
raise ValueError(
f'Skill "{referenced_skill_names[0]}" must be activated before exec can run in its package.'
)
if selected_skill is not None:
selected_skill_name = str(selected_skill.get('name', '') or '')
if referenced_skill_names and any(name != selected_skill_name for name in referenced_skill_names):
raise ValueError('exec can reference files from only one activated skill package per call.')
package_root = str(selected_skill.get('package_root', '') or '').strip()
if not package_root:
raise ValueError(f'Activated skill "{selected_skill_name}" has no package_root.')
# Wrap command with Python venv bootstrap if the skill has a Python project.
# The venv is created inside the skill's mount path.
skill_mount = f'/workspace/.skills/{selected_skill_name}'
if skill_loader.should_prepare_skill_python_env(package_root):
parameters = dict(parameters)
parameters['command'] = skill_loader.wrap_skill_command_with_python_env(command, mount_path=skill_mount)
# All exec calls (with or without skills) go through the same container
# via execute_tool. Skills are mounted at /workspace/.skills/{name}/
# via extra_mounts built by BoxService.
result = await self.ap.box_service.execute_tool(parameters, query)
if selected_skill is not None:
self._refresh_skill_from_disk(selected_skill)
return result
def _resolve_host_path(
self,
query: pipeline_query.Query,
sandbox_path: str,
*,
include_visible: bool,
include_activated: bool,
) -> tuple[str, dict | None]:
selected_skill, rewritten_path = skill_loader.resolve_virtual_skill_path(
self.ap,
query,
sandbox_path,
include_visible=include_visible,
include_activated=include_activated,
)
box_service = self.ap.box_service
host_root = selected_skill.get('package_root') if selected_skill is not None else box_service.default_workspace
if not host_root:
raise ValueError('No host workspace configured for file operations.')
mount_path = '/workspace'
if not rewritten_path.startswith(mount_path):
raise ValueError(f'Path must be under {mount_path}.')
relative = rewritten_path[len(mount_path) :].lstrip('/')
host_path = os.path.realpath(os.path.join(host_root, relative))
host_root = os.path.realpath(host_root)
if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
raise ValueError('Path escapes the workspace boundary.')
return host_path, selected_skill
def _resolve_skill_relative_path(
self,
query: pipeline_query.Query,
sandbox_path: str,
*,
include_visible: bool,
include_activated: bool,
) -> tuple[dict, str] | None:
selected_skill, rewritten_path = skill_loader.resolve_virtual_skill_path(
self.ap,
query,
sandbox_path,
include_visible=include_visible,
include_activated=include_activated,
)
if selected_skill is None:
return None
mount_path = '/workspace'
if not rewritten_path.startswith(mount_path):
raise ValueError(f'Path must be under {mount_path}.')
relative = rewritten_path[len(mount_path) :].lstrip('/') or '.'
return selected_skill, relative
def _should_use_box_workspace_files(self, selected_skill: dict | None) -> bool:
if selected_skill is not None:
return False
box_service = getattr(self.ap, 'box_service', None)
if box_service is None or not hasattr(box_service, 'execute_tool'):
return False
default_workspace = getattr(box_service, 'default_workspace', None)
return bool(default_workspace and not os.path.isdir(os.path.realpath(default_workspace)))
async def _run_workspace_file_script(self, script: str, query: pipeline_query.Query) -> dict:
result = await self.ap.box_service.execute_tool(
{
'command': f"python - <<'PY'\n{script}\nPY",
'timeout_sec': 30,
},
query,
)
if not result.get('ok'):
return {'ok': False, 'error': result.get('stderr') or result.get('stdout') or 'Box execution failed'}
stdout = str(result.get('stdout') or '').strip()
try:
return json.loads(stdout.splitlines()[-1])
except Exception:
return {'ok': False, 'error': stdout or 'Box file operation returned no result'}
async def _read_workspace_via_box(self, path: str, query: pipeline_query.Query) -> dict:
script = f"""
import json, os
path = {json.dumps(path)}
if not path.startswith('/workspace'):
print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
elif not os.path.exists(path):
print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
elif os.path.isdir(path):
print(json.dumps({{'ok': True, 'content': '\\n'.join(sorted(os.listdir(path))), 'is_directory': True}}))
else:
with open(path, 'r', encoding='utf-8', errors='replace') as f:
print(json.dumps({{'ok': True, 'content': f.read()}}))
""".strip()
return await self._run_workspace_file_script(script, query)
async def _write_workspace_via_box(self, path: str, content: str, query: pipeline_query.Query) -> dict:
script = f"""
import json, os
path = {json.dumps(path)}
content = {json.dumps(content)}
if not path.startswith('/workspace'):
print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
else:
os.makedirs(os.path.dirname(path) or '/workspace', exist_ok=True)
with open(path, 'w', encoding='utf-8') as f:
f.write(content)
print(json.dumps({{'ok': True, 'path': path}}))
""".strip()
return await self._run_workspace_file_script(script, query)
async def _edit_workspace_via_box(
self,
path: str,
old_string: str,
new_string: str,
query: pipeline_query.Query,
) -> dict:
script = f"""
import json, os
path = {json.dumps(path)}
old_string = {json.dumps(old_string)}
new_string = {json.dumps(new_string)}
if not path.startswith('/workspace'):
print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
elif not os.path.isfile(path):
print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
else:
with open(path, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
count = content.count(old_string)
if count == 0:
print(json.dumps({{'ok': False, 'error': 'old_string not found in file.'}}))
elif count > 1:
print(json.dumps({{'ok': False, 'error': f'old_string matches {{count}} locations; provide a more unique string.'}}))
else:
with open(path, 'w', encoding='utf-8') as f:
f.write(content.replace(old_string, new_string, 1))
print(json.dumps({{'ok': True, 'path': path}}))
""".strip()
return await self._run_workspace_file_script(script, query)
async def _glob_workspace_via_box(self, path: str, pattern: str, query: pipeline_query.Query) -> dict:
script = f"""
import json, os
from pathlib import Path
path = {json.dumps(path)}
pattern = {json.dumps(pattern)}
skip_dirs = {json.dumps(sorted(_SKIP_DIRS))}
if not path.startswith('/workspace'):
print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
elif not os.path.isdir(path):
print(json.dumps({{'ok': False, 'error': f'Path is not a directory: {{path}}'}}))
else:
base = Path(path)
hits = [
item for item in base.rglob(pattern)
if not any(part in skip_dirs for part in item.parts)
]
hits.sort(key=lambda item: item.stat().st_mtime if item.exists() else 0, reverse=True)
shown = hits[:100]
matches = []
for item in shown:
rel = os.path.relpath(str(item), path)
matches.append(os.path.join(path, rel).replace(os.sep, '/'))
print(json.dumps({{'ok': True, 'matches': matches, 'total': len(hits), 'truncated': len(hits) > 100}}))
""".strip()
return await self._run_workspace_file_script(script, query)
async def _grep_workspace_via_box(
self,
path: str,
pattern: str,
include: str | None,
query: pipeline_query.Query,
) -> dict:
script = f"""
import json, os, re
from pathlib import Path
path = {json.dumps(path)}
pattern = {json.dumps(pattern)}
include = {json.dumps(include)}
skip_dirs = {json.dumps(sorted(_SKIP_DIRS))}
try:
regex = re.compile(pattern)
except re.error as exc:
print(json.dumps({{'ok': False, 'error': f'Invalid regex: {{exc}}'}}))
else:
if not path.startswith('/workspace'):
print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
elif not os.path.exists(path):
print(json.dumps({{'ok': False, 'error': f'Path not found: {{path}}'}}))
else:
base = Path(path)
if base.is_file():
files = [base]
else:
files = []
for item in base.rglob(include or '*'):
if any(part in skip_dirs for part in item.parts):
continue
if item.is_file():
files.append(item)
if len(files) >= 5000:
break
matches = []
for fp in files:
try:
text = fp.read_text(errors='ignore')
except OSError:
continue
for lineno, line in enumerate(text.splitlines(), 1):
if regex.search(line):
if base.is_file():
file_path = path
else:
rel = os.path.relpath(str(fp), path)
file_path = os.path.join(path, rel).replace(os.sep, '/')
matches.append({{'file': file_path, 'line': lineno, 'content': line.rstrip()}})
if len(matches) >= 200:
break
if len(matches) >= 200:
break
print(json.dumps({{'ok': True, 'matches': matches, 'total': len(matches), 'truncated': len(matches) >= 200}}))
""".strip()
return await self._run_workspace_file_script(script, query)
async def _invoke_read(self, parameters: dict, query: pipeline_query.Query) -> dict:
path = parameters['path']
self.ap.logger.info(f'read tool invoked: query_id={query.query_id} path={path}')
skill_request = self._resolve_skill_relative_path(
query,
path,
include_visible=True,
include_activated=True,
)
if skill_request is not None and hasattr(self.ap.box_service, 'read_skill_file'):
selected_skill, relative = skill_request
try:
result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
return {'ok': True, 'content': result.get('content', '')}
except Exception:
try:
result = await self.ap.box_service.list_skill_files(selected_skill['name'], relative)
entries = [entry['name'] for entry in result.get('entries', [])]
return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
except Exception as exc:
return {'ok': False, 'error': str(exc)}
host_path, selected_skill = self._resolve_host_path(
query,
path,
include_visible=True,
include_activated=True,
)
if self._should_use_box_workspace_files(selected_skill):
return await self._read_workspace_via_box(path, query)
if not os.path.exists(host_path):
return {'ok': False, 'error': f'File not found: {path}'}
if os.path.isdir(host_path):
entries = os.listdir(host_path)
return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
with open(host_path, 'r', errors='replace') as f:
content = f.read()
return {'ok': True, 'content': content}
async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict:
path = parameters['path']
content = parameters['content']
self.ap.logger.info(f'write tool invoked: query_id={query.query_id} path={path} length={len(content)}')
skill_request = self._resolve_skill_relative_path(
query,
path,
include_visible=False,
include_activated=True,
)
if skill_request is not None and hasattr(self.ap.box_service, 'write_skill_file'):
selected_skill, relative = skill_request
await self.ap.box_service.write_skill_file(selected_skill['name'], relative, content)
await self.ap.skill_mgr.reload_skills()
return {'ok': True, 'path': path}
host_path, selected_skill = self._resolve_host_path(
query,
path,
include_visible=False,
include_activated=True,
)
if self._should_use_box_workspace_files(selected_skill):
return await self._write_workspace_via_box(path, content, query)
os.makedirs(os.path.dirname(host_path), exist_ok=True)
with open(host_path, 'w', encoding='utf-8') as f:
f.write(content)
self._refresh_skill_from_disk(selected_skill)
return {'ok': True, 'path': path}
async def _invoke_edit(self, parameters: dict, query: pipeline_query.Query) -> dict:
path = parameters['path']
old_string = parameters['old_string']
new_string = parameters['new_string']
self.ap.logger.info(
f'edit tool invoked: query_id={query.query_id} path={path} '
f'old_len={len(old_string)} new_len={len(new_string)}'
)
skill_request = self._resolve_skill_relative_path(
query,
path,
include_visible=False,
include_activated=True,
)
if (
skill_request is not None
and hasattr(self.ap.box_service, 'read_skill_file')
and hasattr(self.ap.box_service, 'write_skill_file')
):
selected_skill, relative = skill_request
try:
result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
except Exception:
return {'ok': False, 'error': f'File not found: {path}'}
content = result.get('content', '')
count = content.count(old_string)
if count == 0:
return {'ok': False, 'error': 'old_string not found in file.'}
if count > 1:
return {'ok': False, 'error': f'old_string matches {count} locations; provide a more unique string.'}
new_content = content.replace(old_string, new_string, 1)
await self.ap.box_service.write_skill_file(selected_skill['name'], relative, new_content)
await self.ap.skill_mgr.reload_skills()
return {'ok': True, 'path': path}
host_path, selected_skill = self._resolve_host_path(
query,
path,
include_visible=False,
include_activated=True,
)
if self._should_use_box_workspace_files(selected_skill):
return await self._edit_workspace_via_box(path, old_string, new_string, query)
if not os.path.isfile(host_path):
return {'ok': False, 'error': f'File not found: {path}'}
with open(host_path, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
count = content.count(old_string)
if count == 0:
return {'ok': False, 'error': 'old_string not found in file.'}
if count > 1:
return {'ok': False, 'error': f'old_string matches {count} locations; provide a more unique string.'}
new_content = content.replace(old_string, new_string, 1)
with open(host_path, 'w', encoding='utf-8') as f:
f.write(new_content)
self._refresh_skill_from_disk(selected_skill)
return {'ok': True, 'path': path}
def _refresh_skill_from_disk(self, selected_skill: dict | None) -> None:
if selected_skill is None:
return
skill_mgr = getattr(self.ap, 'skill_mgr', None)
if skill_mgr is None:
return
refresh_skill = getattr(skill_mgr, 'refresh_skill_from_disk', None)
if callable(refresh_skill):
refresh_skill(selected_skill.get('name', ''))
def _is_sandbox_available(self) -> bool:
"""Check if sandbox backend is available.
This checks the cached backend availability from initialization,
not just whether the box_service process is running.
"""
return bool(self._backend_available)
def _build_exec_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=EXEC_TOOL_NAME,
human_desc='Execute a command in an isolated environment',
description=(
'Run shell commands in an isolated execution environment. '
'Use this tool for bash commands, Python execution, and exact calculations over '
'user-provided data. Activated skill packages are addressable under '
'/workspace/.skills/<skill-name>; when running inside one, set workdir to that path. '
'To create a new skill package, prepare it under /workspace first, then use register_skill.'
),
parameters={
'type': 'object',
'properties': {
'command': {
'type': 'string',
'description': 'Shell command to execute.',
},
'workdir': {
'type': 'string',
'description': 'Working directory for the command. Defaults to /workspace.',
'default': '/workspace',
},
'timeout_sec': {
'type': 'integer',
'description': 'Execution timeout in seconds. Defaults to 30.',
'default': 30,
'minimum': 1,
},
'env': {
'type': 'object',
'description': 'Optional environment variables for the execution.',
'additionalProperties': {'type': 'string'},
'default': {},
},
'description': {
'type': 'string',
'description': 'Brief description of what this command does, for logging and audit.',
},
},
'required': ['command'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
def _build_read_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=READ_TOOL_NAME,
human_desc='Read a file from the workspace',
description=(
'Read the contents of a file at the given path under /workspace. '
'Visible skill packages can be inspected through /workspace/.skills/<skill-name>/... .'
),
parameters={
'type': 'object',
'properties': {
'path': {
'type': 'string',
'description': 'Absolute path to the file (must be under /workspace).',
},
},
'required': ['path'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
def _build_write_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=WRITE_TOOL_NAME,
human_desc='Write a file to the workspace',
description=(
'Create or overwrite a file at the given path under /workspace with the provided content. '
'Activated skill packages can be modified through /workspace/.skills/<skill-name>/... . '
'For new skills, write files under /workspace and then call register_skill.'
),
parameters={
'type': 'object',
'properties': {
'path': {
'type': 'string',
'description': 'Absolute path to the file (must be under /workspace).',
},
'content': {
'type': 'string',
'description': 'Content to write to the file.',
},
},
'required': ['path', 'content'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
def _build_edit_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=EDIT_TOOL_NAME,
human_desc='Edit a file in the workspace',
description=(
'Perform an exact string replacement in a file under /workspace. '
'The old_string must appear exactly once in the file. Activated skill packages '
'can be edited through /workspace/.skills/<skill-name>/... . '
'For new skills, edit files under /workspace and then call register_skill.'
),
parameters={
'type': 'object',
'properties': {
'path': {
'type': 'string',
'description': 'Absolute path to the file (must be under /workspace).',
},
'old_string': {
'type': 'string',
'description': 'The exact string to find and replace.',
},
'new_string': {
'type': 'string',
'description': 'The replacement string.',
},
},
'required': ['path', 'old_string', 'new_string'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
def _build_glob_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=GLOB_TOOL_NAME,
human_desc='Find files matching a glob pattern',
description=(
'Find files matching a glob pattern under /workspace. '
'Supports ** for recursive matching (e.g. **/*.py). '
'Results are sorted by modification time (newest first). '
'Visible and activated skill packages can be searched through /workspace/.skills/<skill-name>/...'
),
parameters={
'type': 'object',
'properties': {
'pattern': {
'type': 'string',
'description': 'Glob pattern, e.g. **/*.py or src/**/*.ts',
},
'path': {
'type': 'string',
'description': 'Directory to search in (must be under /workspace, default: /workspace)',
'default': '/workspace',
},
},
'required': ['pattern'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
def _build_grep_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=GREP_TOOL_NAME,
human_desc='Search file contents with regex',
description=(
'Search file contents with regex pattern under /workspace. '
'Returns matching lines with file path and line number. '
'Visible and activated skill packages can be searched through /workspace/.skills/<skill-name>/...'
),
parameters={
'type': 'object',
'properties': {
'pattern': {
'type': 'string',
'description': 'Regex pattern to search for',
},
'path': {
'type': 'string',
'description': 'File or directory to search (must be under /workspace, default: /workspace)',
'default': '/workspace',
},
'include': {
'type': 'string',
'description': 'Only search files matching this glob (e.g. *.py)',
},
},
'required': ['pattern'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
async def _invoke_glob(self, parameters: dict, query: pipeline_query.Query) -> dict:
pattern = parameters['pattern']
path = str(parameters.get('path', '/workspace') or '/workspace')
self.ap.logger.info(f'glob tool invoked: query_id={query.query_id} pattern={pattern} path={path}')
host_path, selected_skill = self._resolve_host_path(
query,
path,
include_visible=True,
include_activated=True,
)
if self._should_use_box_workspace_files(selected_skill):
return await self._glob_workspace_via_box(path, pattern, query)
if not os.path.isdir(host_path):
return {'ok': False, 'error': f'Path is not a directory: {path}'}
from pathlib import Path
base = Path(host_path)
hits = list(base.rglob(pattern))
# Filter out skipped directories
hits = [h for h in hits if not any(skip in h.parts for skip in _SKIP_DIRS)]
# Sort by mtime, newest first
hits.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
total = len(hits)
shown = hits[:100]
# Convert back to sandbox paths
sandbox_paths = []
for h in shown:
rel = os.path.relpath(str(h), host_path)
sandbox_path = os.path.join(path, rel)
sandbox_paths.append(sandbox_path)
result_lines = sandbox_paths
result = '\n'.join(result_lines)
if total > 100:
result += f'\n... ({total} matches, showing first 100)'
return {'ok': True, 'matches': result_lines, 'total': total, 'truncated': total > 100}
async def _invoke_grep(self, parameters: dict, query: pipeline_query.Query) -> dict:
pattern = parameters['pattern']
path = str(parameters.get('path', '/workspace') or '/workspace')
include = parameters.get('include')
self.ap.logger.info(f'grep tool invoked: query_id={query.query_id} pattern={pattern} path={path}')
import re
from pathlib import Path
try:
regex = re.compile(pattern)
except re.error as e:
return {'ok': False, 'error': f'Invalid regex: {e}'}
host_path, selected_skill = self._resolve_host_path(
query,
path,
include_visible=True,
include_activated=True,
)
if self._should_use_box_workspace_files(selected_skill):
return await self._grep_workspace_via_box(path, pattern, include, query)
if not os.path.exists(host_path):
return {'ok': False, 'error': f'Path not found: {path}'}
base = Path(host_path)
if base.is_file():
files = [base]
else:
files = self._grep_walk(base, include)
matches = []
for fp in files:
try:
text = fp.read_text(errors='ignore')
except OSError:
continue
for lineno, line in enumerate(text.splitlines(), 1):
if regex.search(line):
rel = os.path.relpath(str(fp), host_path)
sandbox_path = os.path.join(path, rel)
matches.append(
{
'file': sandbox_path,
'line': lineno,
'content': line.rstrip(),
}
)
if len(matches) >= 200:
break
if len(matches) >= 200:
break
return {
'ok': True,
'matches': matches,
'total': len(matches),
'truncated': len(matches) >= 200,
}
@staticmethod
def _grep_walk(root, include: str | None) -> list:
"""Walk dir tree for grep, skipping junk dirs."""
results = []
for item in root.rglob(include or '*'):
if any(skip in item.parts for skip in _SKIP_DIRS):
continue
if item.is_file():
results.append(item)
if len(results) >= 5000:
break
return results
def _summarize_parameters(self, parameters: dict) -> dict:
summary = dict(parameters)
cmd = str(summary.get('command', '')).strip()
if len(cmd) > 400:
cmd = f'{cmd[:397]}...'
summary['command'] = cmd
env = summary.get('env')
if isinstance(env, dict):
summary['env_keys'] = sorted(str(key) for key in env.keys())
del summary['env']
return summary
@@ -0,0 +1,157 @@
from __future__ import annotations
import re
import typing
from ....box import workspace as box_workspace
if typing.TYPE_CHECKING:
from ....core import app
from langbot_plugin.api.entities.events import pipeline_query
ACTIVATED_SKILLS_KEY = '_activated_skills'
PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills'
SKILL_MOUNT_PREFIX = '/workspace/.skills'
_SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)')
def get_virtual_skill_mount_path(skill_name: str) -> str:
return f'{SKILL_MOUNT_PREFIX}/{skill_name}'
def get_bound_skill_names(query: pipeline_query.Query) -> list[str] | None:
if query.variables is None:
return None
bound_skills = query.variables.get(PIPELINE_BOUND_SKILLS_KEY)
if bound_skills is None:
return None
if isinstance(bound_skills, list):
return [str(item) for item in bound_skills]
return None
def get_visible_skills(ap: app.Application, query: pipeline_query.Query) -> dict[str, dict]:
skill_mgr = getattr(ap, 'skill_mgr', None)
if skill_mgr is None:
return {}
visible_skills = getattr(skill_mgr, 'skills', {})
bound_skills = get_bound_skill_names(query)
if bound_skills is None:
return visible_skills
return {skill_name: skill_data for skill_name, skill_data in visible_skills.items() if skill_name in bound_skills}
def get_visible_skill(ap: app.Application, query: pipeline_query.Query, skill_name: str) -> dict | None:
return get_visible_skills(ap, query).get(skill_name)
def get_activated_skills(query: pipeline_query.Query) -> dict[str, dict]:
if query.variables is None:
return {}
activated = query.variables.get(ACTIVATED_SKILLS_KEY, {})
if not isinstance(activated, dict):
return {}
return activated
def get_activated_skill(query: pipeline_query.Query, skill_name: str) -> dict | None:
return get_activated_skills(query).get(skill_name)
def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> None:
if query.variables is None:
query.variables = {}
activated = query.variables.setdefault(ACTIVATED_SKILLS_KEY, {})
skill_name = str(skill_data.get('name', '') or '').strip()
if skill_name and skill_name not in activated:
activated[skill_name] = skill_data
def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]:
normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace'
if normalized_path == SKILL_MOUNT_PREFIX:
raise ValueError(f'Path must include a skill name under {SKILL_MOUNT_PREFIX}/<skill-name>.')
prefix = f'{SKILL_MOUNT_PREFIX}/'
if not normalized_path.startswith(prefix):
return None, normalized_path
remainder = normalized_path[len(prefix) :]
skill_name, separator, tail = remainder.partition('/')
if not skill_name:
raise ValueError(f'Path must include a skill name under {SKILL_MOUNT_PREFIX}/<skill-name>.')
rewritten_path = '/workspace'
if separator:
rewritten_path = f'/workspace/{tail}'
return skill_name, rewritten_path
def resolve_virtual_skill_path(
ap: app.Application,
query: pipeline_query.Query,
sandbox_path: str,
*,
include_visible: bool,
include_activated: bool,
) -> tuple[dict | None, str]:
skill_name, rewritten_path = parse_skill_mount_path(sandbox_path)
if skill_name is None:
return None, rewritten_path
if include_activated:
activated_skill = get_activated_skill(query, skill_name)
if activated_skill is not None:
return activated_skill, rewritten_path
if include_visible:
visible_skill = get_visible_skill(ap, query, skill_name)
if visible_skill is not None:
return visible_skill, rewritten_path
activated_names = ', '.join(sorted(get_activated_skills(query).keys())) or 'none'
visible_names = ', '.join(sorted(get_visible_skills(ap, query).keys())) or 'none'
raise ValueError(
f'Skill "{skill_name}" is not available at this path. '
f'Activated skills: {activated_names}. Visible skills: {visible_names}.'
)
def find_referenced_skill_names(text: str) -> list[str]:
if not text:
return []
seen: list[str] = []
for match in _SKILL_MOUNT_PATTERN.findall(text):
if match not in seen:
seen.append(match)
return seen
def rewrite_command_for_skill_mount(command: str, skill_name: str) -> str:
virtual_root = get_virtual_skill_mount_path(skill_name)
rewritten = command.replace(f'{virtual_root}/', '/workspace/')
return rewritten.replace(virtual_root, '/workspace')
def build_skill_session_id(skill_data: dict, query: pipeline_query.Query) -> str:
skill_identifier = str(skill_data.get('name', 'unknown') or 'unknown')
launcher_type = getattr(query, 'launcher_type', None)
launcher_id = getattr(query, 'launcher_id', None)
query_id = getattr(query, 'query_id', 'unknown')
if launcher_type is not None and launcher_id is not None:
return f'skill-{launcher_type}_{launcher_id}-{skill_identifier}'
return f'skill-{query_id}-{skill_identifier}'
def should_prepare_skill_python_env(package_root: str | None) -> bool:
return box_workspace.should_prepare_python_env(package_root)
def wrap_skill_command_with_python_env(command: str, *, mount_path: str = '/workspace') -> str:
return box_workspace.wrap_python_command_with_env(command, mount_path=mount_path).rstrip()
@@ -0,0 +1,304 @@
from __future__ import annotations
import os
import typing
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from .. import loader
# Align with Claude Code's Skill tool design:
# - activate: Activate a skill via Tool Call, returns SKILL.md content
# - register_skill: Register a skill from sandbox directory to data/skills/
# - This protects KV Cache and follows industry standard
ACTIVATE_SKILL_TOOL_NAME = 'activate'
REGISTER_SKILL_TOOL_NAME = 'register_skill'
SKILL_TOOL_NAMES = {
ACTIVATE_SKILL_TOOL_NAME,
REGISTER_SKILL_TOOL_NAME,
}
class SkillToolLoader(loader.ToolLoader):
"""Skill tools aligned with Claude Code's design."""
def __init__(self, ap):
super().__init__(ap)
self._tools: list[resource_tool.LLMTool] = []
self._sandbox_available: bool = False
async def initialize(self):
# Check if sandbox backend is available (same check as native tools)
self._sandbox_available = await self._check_sandbox_available()
if self._sandbox_available:
self._tools = [
self._build_activate_skill_tool(),
self._build_register_skill_tool(),
]
else:
self.ap.logger.info(
'Skill tools (activate/register_skill) are NOT available. '
'No sandbox backend (Docker/nsjail/E2B) is ready.'
)
async def _check_sandbox_available(self) -> bool:
"""Check if the box backend is truly available (not just the runtime)."""
box_service = getattr(self.ap, 'box_service', None)
if box_service is None:
return False
if not getattr(box_service, 'available', False):
return False
# Check if backend is truly available via get_status
try:
status = await box_service.get_status()
backend_info = status.get('backend', {})
return backend_info.get('available', False)
except Exception:
return False
async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
if not self._is_available():
return []
return list(self._tools)
async def has_tool(self, name: str) -> bool:
return self._is_available() and name in SKILL_TOOL_NAMES
def _is_available(self) -> bool:
"""Check if skill tools should be available.
Skill tools require both a skill manager and a sandbox backend.
"""
return self._has_skill_manager() and self._sandbox_available
async def invoke_tool(self, name: str, parameters: dict, query) -> typing.Any:
if name == ACTIVATE_SKILL_TOOL_NAME:
return await self._invoke_activate_skill(parameters, query)
if name == REGISTER_SKILL_TOOL_NAME:
return await self._invoke_register_skill(parameters)
raise ValueError(f'Unknown skill tool: {name}')
async def shutdown(self):
pass
def _has_skill_manager(self) -> bool:
return getattr(self.ap, 'skill_mgr', None) is not None
async def _invoke_activate_skill(self, parameters: dict, query) -> typing.Any:
"""Activate a skill and return SKILL.md content via Tool Result."""
skill_name = str(parameters.get('skill_name', '') or '').strip()
if not skill_name:
raise ValueError('skill_name is required')
skill_mgr = self.ap.skill_mgr
skill_data = skill_mgr.get_skill_by_name(skill_name)
if skill_data is None:
visible_skills = getattr(skill_mgr, 'skills', {})
available_names = ', '.join(sorted(visible_skills.keys())) or 'none'
raise ValueError(f'Skill "{skill_name}" not found. Available skills: {available_names}')
# Register activated skill for sandbox mount path resolution
from . import skill as skill_loader
skill_loader.register_activated_skill(query, skill_data)
# Return SKILL.md content as Tool Result (injects into context)
instructions = skill_data.get('instructions', '')
package_root = skill_data.get('package_root', '')
mount_path = skill_loader.get_virtual_skill_mount_path(skill_name)
# Build Tool Result content
result_content = f'<command-message>The "{skill_name}" skill is activated</command-message>\n'
result_content += '<skill-activation>\n'
result_content += f'<skill-name>{skill_name}</skill-name>\n'
result_content += f'<mount-path>{mount_path}</mount-path>\n'
result_content += f'<package-root>{package_root}</package-root>\n'
result_content += f'\n## Instructions\n{instructions}\n'
result_content += '\n## Runtime Context\n'
result_content += f'The skill package is mounted at {mount_path}. Use the standard tools to interact with it:\n'
result_content += f'- Use `read` to inspect files under {mount_path}\n'
result_content += f'- Use `exec` with workdir set to {mount_path} to run commands in that package\n'
result_content += '- Use `write` and `edit` on that path when the instructions require updating files\n'
result_content += '</skill-activation>\n'
return {
'activated': True,
'skill_name': skill_name,
'mount_path': mount_path,
'content': result_content,
}
async def _invoke_register_skill(self, parameters: dict) -> typing.Any:
"""Register a skill from sandbox directory to data/skills/."""
sandbox_path = str(parameters.get('path', '') or '').strip()
if not sandbox_path:
raise ValueError('path is required')
# Resolve sandbox path to host path
host_path = self._resolve_workspace_directory(sandbox_path)
# Get or create skill service
skill_service = getattr(self.ap, 'skill_service', None)
if skill_service is None:
raise ValueError('Skill service not available')
# Scan and register the skill
scanned = await skill_service.scan_directory_async(host_path)
# Override name if provided
skill_name = str(parameters.get('name') or scanned['name']).strip()
if not skill_name:
raise ValueError('skill name is required')
# Create the skill
created = await skill_service.create_skill(
{
'name': skill_name,
'display_name': str(parameters.get('display_name') or scanned.get('display_name', '')).strip(),
'description': str(parameters.get('description') or scanned.get('description', '')).strip(),
'instructions': str(parameters.get('instructions') or scanned.get('instructions', '')),
'package_root': host_path,
}
)
return {
'registered': True,
'skill_name': skill_name,
'source_path': sandbox_path,
'skill': created,
}
def _resolve_workspace_directory(self, sandbox_path: str) -> str:
"""Resolve sandbox path to host filesystem path."""
box_service = getattr(self.ap, 'box_service', None)
workspace_root = getattr(box_service, 'default_workspace', None)
if not workspace_root:
raise ValueError('No default workspace configured')
normalized_path = str(sandbox_path).strip() or '/workspace'
if not normalized_path.startswith('/workspace'):
raise ValueError('path must be under /workspace')
relative = normalized_path[len('/workspace') :].lstrip('/')
host_root = os.path.realpath(workspace_root)
host_path = os.path.realpath(os.path.join(host_root, relative))
# Security check: ensure path doesn't escape workspace
if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
raise ValueError('path escapes the workspace boundary')
if getattr(box_service, 'available', False):
return host_path
if not os.path.isdir(host_path):
raise ValueError(f'Directory does not exist: {sandbox_path}')
return host_path
def _build_activate_skill_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=ACTIVATE_SKILL_TOOL_NAME,
human_desc='Activate a skill',
description=self._build_activate_tool_description(),
parameters={
'type': 'object',
'properties': {
'skill_name': {
'type': 'string',
'description': 'The skill name to activate (no arguments). E.g., "pdf" or "data-analysis"',
},
},
'required': ['skill_name'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
def _build_register_skill_tool(self) -> resource_tool.LLMTool:
return resource_tool.LLMTool(
name=REGISTER_SKILL_TOOL_NAME,
human_desc='Register a skill from sandbox',
description=(
"Register a skill package from a directory under /workspace into LangBot's skill store. "
'Use this after creating or preparing a skill in the sandbox with exec/read/write/edit. '
'The directory must contain a SKILL.md file. '
'After registration, the skill can be activated with the activate tool.'
),
parameters={
'type': 'object',
'properties': {
'path': {
'type': 'string',
'description': 'Directory path under /workspace containing the skill package (must have SKILL.md)',
},
'name': {
'type': 'string',
'description': 'Optional skill name override. Defaults to the name in SKILL.md or directory name.',
},
'display_name': {
'type': 'string',
'description': 'Optional display name override.',
},
'description': {
'type': 'string',
'description': 'Optional description override.',
},
'instructions': {
'type': 'string',
'description': 'Optional instructions override.',
},
},
'required': ['path'],
'additionalProperties': False,
},
func=lambda parameters: parameters,
)
def _build_activate_tool_description(self) -> str:
"""Build tool description with embedded available_skills list."""
skill_mgr = getattr(self.ap, 'skill_mgr', None)
if skill_mgr is None:
return 'Activate a skill. No skills are currently available.'
skills = getattr(skill_mgr, 'skills', {})
if not skills:
return 'Activate a skill. No skills are currently available.'
# Build <available_skills> section
available_skills_lines = ['<available_skills>']
for skill_name, skill_data in sorted(skills.items()):
description = skill_data.get('description', '')
available_skills_lines.append('<skill>')
available_skills_lines.append(f'<name>{skill_name}</name>')
available_skills_lines.append(f'<description>{description}</description>')
available_skills_lines.append('</skill>')
available_skills_lines.append('</available_skills>')
available_skills_block = '\n'.join(available_skills_lines)
return f"""Activate a skill within the main conversation.
<skills_instructions>
When users ask you to perform tasks, check if any of the available skills
below can help complete the task more effectively. Skills provide specialized
capabilities and domain knowledge.
How to use skills:
- Invoke skills using this tool with the skill name only (no arguments)
- When you invoke a skill, you will see <command-message>
The skill is activated
</command-message>
- The skill's instructions will be provided in the tool result
- Examples:
- skill_name: "pdf" - invoke the pdf skill
- skill_name: "data-analysis" - invoke the data-analysis skill
Important:
- Only use skills listed in <available_skills> below
- Do not invoke a skill that is already running
- To create a new skill: prepare it in /workspace, then use register_skill tool
</skills_instructions>
{available_skills_block}"""
+42 -36
View File
@@ -1,15 +1,19 @@
from __future__ import annotations
import typing
from typing import TYPE_CHECKING
from ...core import app
from langbot.pkg.utils import importutil
from langbot.pkg.provider.tools import loaders
from langbot.pkg.provider.tools.loaders import mcp as mcp_loader, plugin as plugin_loader
import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
from langbot_plugin.api.entities.events import pipeline_query
importutil.import_modules_in_pkg(loaders)
if TYPE_CHECKING:
from ...core import app
from langbot.pkg.provider.tools.loaders import (
mcp as mcp_loader,
native as native_loader,
plugin as plugin_loader,
skill_authoring as skill_authoring_loader,
)
class ToolManager:
@@ -17,31 +21,53 @@ class ToolManager:
ap: app.Application
native_tool_loader: native_loader.NativeToolLoader
plugin_tool_loader: plugin_loader.PluginToolLoader
mcp_tool_loader: mcp_loader.MCPLoader
skill_tool_loader: skill_authoring_loader.SkillToolLoader
def __init__(self, ap: app.Application):
self.ap = ap
async def initialize(self):
from langbot.pkg.utils import importutil
from langbot.pkg.provider.tools import loaders
from langbot.pkg.provider.tools.loaders import (
mcp as mcp_loader,
native as native_loader,
plugin as plugin_loader,
skill_authoring as skill_authoring_loader,
)
importutil.import_modules_in_pkg(loaders)
self.native_tool_loader = native_loader.NativeToolLoader(self.ap)
await self.native_tool_loader.initialize()
self.plugin_tool_loader = plugin_loader.PluginToolLoader(self.ap)
await self.plugin_tool_loader.initialize()
self.mcp_tool_loader = mcp_loader.MCPLoader(self.ap)
await self.mcp_tool_loader.initialize()
self.skill_tool_loader = skill_authoring_loader.SkillToolLoader(self.ap)
await self.skill_tool_loader.initialize()
async def get_all_tools(
self, bound_plugins: list[str] | None = None, bound_mcp_servers: list[str] | None = None
self,
bound_plugins: list[str] | None = None,
bound_mcp_servers: list[str] | None = None,
include_skill_authoring: bool = False,
) -> list[resource_tool.LLMTool]:
"""获取所有函数"""
all_functions: list[resource_tool.LLMTool] = []
all_functions.extend(await self.native_tool_loader.get_tools())
if include_skill_authoring:
all_functions.extend(await self.skill_tool_loader.get_tools())
all_functions.extend(await self.plugin_tool_loader.get_tools(bound_plugins))
all_functions.extend(await self.mcp_tool_loader.get_tools(bound_mcp_servers))
return all_functions
async def generate_tools_for_openai(self, use_funcs: list[resource_tool.LLMTool]) -> list:
"""生成函数列表"""
tools = []
for function in use_funcs:
@@ -58,28 +84,6 @@ class ToolManager:
return tools
async def generate_tools_for_anthropic(self, use_funcs: list[resource_tool.LLMTool]) -> list:
"""为anthropic生成函数列表
e.g.
[
{
"name": "get_stock_price",
"description": "Get the current stock price for a given ticker symbol.",
"input_schema": {
"type": "object",
"properties": {
"ticker": {
"type": "string",
"description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
}
},
"required": ["ticker"]
}
}
]
"""
tools = []
for function in use_funcs:
@@ -93,16 +97,18 @@ class ToolManager:
return tools
async def execute_func_call(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
"""执行函数调用"""
if await self.native_tool_loader.has_tool(name):
return await self.native_tool_loader.invoke_tool(name, parameters, query)
if await self.plugin_tool_loader.has_tool(name):
return await self.plugin_tool_loader.invoke_tool(name, parameters, query)
elif await self.mcp_tool_loader.has_tool(name):
if await self.mcp_tool_loader.has_tool(name):
return await self.mcp_tool_loader.invoke_tool(name, parameters, query)
else:
raise ValueError(f'未找到工具: {name}')
if await self.skill_tool_loader.has_tool(name):
return await self.skill_tool_loader.invoke_tool(name, parameters, query)
raise ValueError(f'未找到工具: {name}')
async def shutdown(self):
"""关闭所有工具"""
await self.native_tool_loader.shutdown()
await self.plugin_tool_loader.shutdown()
await self.mcp_tool_loader.shutdown()
await self.skill_tool_loader.shutdown()
+17 -4
View File
@@ -1,8 +1,12 @@
from __future__ import annotations
import posixpath
from typing import Any
from langbot.pkg.core import app
import re
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote
if TYPE_CHECKING:
from langbot.pkg.core import app
class RAGRuntimeService:
@@ -109,8 +113,17 @@ class RAGRuntimeService:
regardless of the underlying storage provider.
"""
# Validate storage_path to prevent path traversal
normalized = posixpath.normpath(storage_path)
if normalized.startswith('/') or '..' in normalized.split('/'):
decoded_path = unquote(storage_path).replace('\\', '/')
decoded_segments = decoded_path.split('/')
normalized = posixpath.normpath(decoded_path)
if (
not storage_path
or '\x00' in decoded_path
or normalized.startswith('/')
or '..' in decoded_segments
or '..' in normalized.split('/')
or re.match(r'^[A-Za-z]:/', normalized)
):
raise ValueError('Invalid storage path')
content_bytes = await self.ap.storage_mgr.storage_provider.load(normalized)
return content_bytes if content_bytes else b''
+3
View File
@@ -0,0 +1,3 @@
from .manager import SkillManager
__all__ = ['SkillManager']
+35
View File
@@ -0,0 +1,35 @@
from __future__ import annotations
import typing
from ..provider.tools.loaders import skill as skill_loader
if typing.TYPE_CHECKING:
from ..core import app
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
# Skill activation is now handled through Tool Call mechanism (activate tool).
# This file is kept for potential future extensions but the text marker
# detection mechanism has been removed.
def register_activated_skill(
ap: app.Application,
query: pipeline_query.Query,
skill_name: str,
) -> bool:
"""Register an activated skill for sandbox mount path resolution.
This is called by the activate tool when a skill is activated via Tool Call.
"""
skill_mgr = getattr(ap, 'skill_mgr', None)
if skill_mgr is None:
return False
skill_data = skill_mgr.get_skill_by_name(skill_name)
if skill_data is None:
return False
skill_loader.register_activated_skill(query, skill_data)
return True
+142
View File
@@ -0,0 +1,142 @@
from __future__ import annotations
import os
import typing
from ..core import app
if typing.TYPE_CHECKING:
pass
class SkillManager:
"""Skill manager backed by Box-managed or local filesystem packages.
In sandbox deployments, skills are loaded from the Box runtime. Local
data/skills remains as the fallback for non-Box development.
Skills are activated through the `activate` tool (Tool Call mechanism),
aligned with Claude Code's design. This protects KV Cache and follows
industry standard.
"""
ap: app.Application
skills: dict[str, dict]
def __init__(self, ap: app.Application):
self.ap = ap
self.skills = {}
async def initialize(self):
await self.reload_skills()
async def reload_skills(self):
"""Reload all skills from the Box runtime.
Box is the only source of truth for skills. When Box is unavailable
(disabled in config or unreachable) the cache is emptied there is
no local filesystem fallback. Skills whose ``package_root`` is no
longer visible on the LangBot-side filesystem are dropped so they
don't surface as stale ``extra_mounts``.
"""
self.skills = {}
box_service = getattr(self.ap, 'box_service', None)
if box_service is None or not getattr(box_service, 'available', False):
self.ap.logger.info('Box runtime unavailable; skill cache is empty.')
return
# LangBot may only validate Box-reported paths against its own
# filesystem when the two share one (local stdio mode). In separated
# deployments (Docker Compose, k8s sidecar, --standalone-box, remote
# endpoint) the package_root lives on the Box runtime's filesystem and
# is not resolvable here, so we trust what Box reports.
validate_locally = bool(getattr(box_service, 'shares_filesystem_with_box', False))
try:
dropped = 0
for skill_data in await box_service.list_skills():
skill_name = skill_data.get('name')
if not skill_name:
continue
package_root = str(skill_data.get('package_root', '') or '').strip()
if validate_locally and package_root and not os.path.isdir(package_root):
self.ap.logger.warning(
f'Skill "{skill_name}" reported by Box runtime but '
f'package_root missing on LangBot filesystem '
f'({package_root}); dropping from in-memory cache.'
)
dropped += 1
continue
self.skills[skill_name] = skill_data
if dropped:
self.ap.logger.warning(
f'Loaded {len(self.skills)} skills from Box runtime '
f'({dropped} dropped due to missing package_root).'
)
else:
self.ap.logger.info(f'Loaded {len(self.skills)} skills from Box runtime')
except Exception as exc:
self.ap.logger.warning(f'Failed to load skills from Box runtime: {exc}')
def refresh_skill_from_disk(self, skill_name: str) -> bool:
"""Confirm a single skill is present in the cache.
With Box as the only source of truth, the actual reload is driven by
SkillService callers awaiting ``reload_skills``; this method only
reports whether the cache still has the skill.
"""
if not skill_name:
return False
return skill_name in self.skills
def get_skill_by_name(self, name: str) -> dict | None:
"""Get skill data by name."""
return self.skills.get(name)
def get_skill_index(self, bound_skills: list[str] | None = None) -> str:
"""Render the pipeline-visible skills as a short ``name: description``
index suitable for the system prompt.
``bound_skills`` follows the same convention as
``query.variables['_pipeline_bound_skills']``: ``None`` means every
loaded skill is exposed; an explicit list filters to that subset.
Returns an empty string when no skills are visible.
"""
lines: list[str] = []
for skill in self.skills.values():
name = skill.get('name')
if not name:
continue
if bound_skills is not None and name not in bound_skills:
continue
display = skill.get('display_name') or name
description = (skill.get('description') or '').strip().replace('\n', ' ')
lines.append(f'- {name} ({display}): {description}')
if not lines:
return ''
return 'Available Skills:\n' + '\n'.join(lines)
def build_skill_aware_prompt_addition(self, bound_skills: list[str] | None = None) -> str:
"""Build the system-prompt addendum that makes the LLM aware of the
pipeline-visible skills.
Only metadata (name + description) is injected the full SKILL.md is
loaded later via the ``activate`` Tool Call, protecting KV cache and
matching Claude Code's progressive disclosure pattern. Returns an
empty string when no skills are visible (no prompt change at all).
"""
skill_index = self.get_skill_index(bound_skills)
if not skill_index:
return ''
return (
'\n\n'
f'{skill_index}\n\n'
"When the user's request clearly matches one or more skills "
'based on their descriptions above, call the `activate` tool with '
'the skill name to load its full instructions. Only the name and '
'description are visible here; the actual instructions arrive as '
'the tool result. If no skill is a clear match, respond normally '
'without activating any skill.'
)
+37
View File
@@ -0,0 +1,37 @@
"""Shared utilities for skill file parsing."""
import yaml
def parse_frontmatter(content: str) -> tuple[dict, str]:
"""Parse YAML frontmatter from markdown content.
Expects format:
---
name: my-skill
description: Does something
---
# Actual instructions...
Returns:
Tuple of (metadata dict, remaining content)
"""
if not content.startswith('---'):
return {}, content
parts = content.split('---', 2)
if len(parts) < 3:
return {}, content
frontmatter_str = parts[1].strip()
instructions = parts[2].strip()
try:
metadata = yaml.safe_load(frontmatter_str) or {}
except yaml.YAMLError:
metadata = {}
if not isinstance(metadata, dict):
metadata = {}
return metadata, instructions
+1 -2
View File
@@ -13,12 +13,11 @@ class TelemetryManager:
await telemetry.send({ ... })
"""
send_tasks: list[asyncio.Task] = []
def __init__(self, ap: core_app.Application):
self.ap = ap
self.telemetry_config = {}
self.send_tasks: list[asyncio.Task] = []
async def initialize(self):
self.telemetry_config = self.ap.instance_config.data.get('space', {})
+1 -1
View File
@@ -83,7 +83,7 @@ def get_func_schema(function: typing.Callable) -> dict:
parameters['properties'][param.name] = {
'type': param_type,
'description': args_doc[param.name],
'description': args_doc.get(param.name, ''),
}
# add schema for array
+2 -1
View File
@@ -145,7 +145,8 @@ def get_qq_image_downloadable_url(image_url: str) -> tuple[str, dict]:
"""获取QQ图片的下载链接"""
parsed = urlparse(image_url)
query = parse_qs(parsed.query)
return f'http://{parsed.netloc}{parsed.path}', query
scheme = parsed.scheme or 'http'
return f'{scheme}://{parsed.netloc}{parsed.path}', query
async def get_qq_image_bytes(image_url: str, query: dict = {}) -> tuple[bytes, str]:
+88
View File
@@ -0,0 +1,88 @@
"""Base class for connectors that may manage a local runtime subprocess."""
from __future__ import annotations
import asyncio
import os
import sys
from typing import TYPE_CHECKING, Awaitable, Callable
if TYPE_CHECKING:
from ..core import app as core_app
class ManagedRuntimeConnector:
"""Base class for connectors that may manage a local runtime subprocess.
Provides shared lifecycle helpers: subprocess launch, health-check retry,
and graceful termination. Concrete connectors (plugin, box, ) inherit
this and add their own protocol-specific logic.
"""
ap: 'core_app.Application'
runtime_subprocess: asyncio.subprocess.Process | None
runtime_subprocess_task: asyncio.Task | None
def __init__(self, ap: 'core_app.Application'):
self.ap = ap
self.runtime_subprocess = None
self.runtime_subprocess_task = None
async def _start_runtime_subprocess(self, *args: str) -> None:
"""Launch a local runtime as a subprocess of the current Python interpreter.
If a subprocess is already running (no *returncode* yet), this is a no-op.
"""
if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is None:
return
python_path = sys.executable
env = os.environ.copy()
self.runtime_subprocess = await asyncio.create_subprocess_exec(
python_path,
*args,
env=env,
)
self.runtime_subprocess_task = asyncio.create_task(self.runtime_subprocess.wait())
async def _wait_until_ready(
self,
check: Callable[[], Awaitable[None]],
retries: int = 40,
interval: float = 0.25,
runtime_name: str = 'runtime',
) -> None:
"""Repeatedly call *check* until it succeeds or retries are exhausted.
Between attempts the method sleeps for *interval* seconds. If the
managed subprocess exits before readiness is confirmed, a
``RuntimeError`` is raised immediately.
"""
last_exc: Exception | None = None
for _ in range(retries):
# Fast-fail if the process already died.
if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is not None:
raise RuntimeError(
f'local {runtime_name} exited before becoming ready (code {self.runtime_subprocess.returncode})'
)
try:
await check()
return
except Exception as exc:
last_exc = exc
await asyncio.sleep(interval)
if last_exc is not None:
raise last_exc
raise RuntimeError(f'local {runtime_name} did not become ready')
def _dispose_subprocess(self) -> None:
"""Terminate the managed subprocess and cancel its wait task."""
if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is None:
self.ap.logger.info('Terminating managed runtime process...')
self.runtime_subprocess.terminate()
if self.runtime_subprocess_task is not None:
self.runtime_subprocess_task.cancel()
self.runtime_subprocess_task = None
+55 -19
View File
@@ -1,37 +1,70 @@
"""Utility functions for finding package resources"""
"""Utility functions for finding package resources and runtime data roots."""
import os
from pathlib import Path
_is_source_install = None
_source_root = None
def _find_source_root() -> Path | None:
"""Locate the LangBot repository root when running from source."""
global _source_root
if _source_root is not None:
return _source_root
current = Path(__file__).resolve()
for parent in current.parents:
if (parent / 'pyproject.toml').exists() and (parent / 'main.py').exists():
_source_root = parent
return parent
_source_root = None
return None
def _check_if_source_install() -> bool:
"""
Check if we're running from source directory or an installed package.
Cached to avoid repeated file I/O.
Check if we're running from the LangBot source tree.
Cached to avoid repeated filesystem scans.
"""
global _is_source_install
if _is_source_install is not None:
return _is_source_install
# Check if main.py exists in current directory with LangBot marker
if os.path.exists('main.py'):
try:
with open('main.py', 'r', encoding='utf-8') as f:
# Only read first 500 chars to check for marker
content = f.read(500)
if 'LangBot/main.py' in content:
_is_source_install = True
return True
except (IOError, OSError, UnicodeDecodeError):
# If we can't read the file, assume not a source install
pass
_is_source_install = _find_source_root() is not None
return _is_source_install
_is_source_install = False
return False
def get_data_root() -> str:
"""
Get the runtime data root.
Priority:
1. LANGBOT_DATA_ROOT environment override
2. Source checkout root /data when running from source
3. Current working directory /data for installed-package usage
"""
env_root = os.environ.get('LANGBOT_DATA_ROOT', '').strip()
if env_root:
return str(Path(env_root).expanduser().resolve())
source_root = _find_source_root()
if source_root is not None:
return str((source_root / 'data').resolve())
return str((Path.cwd() / 'data').resolve())
def get_data_path(*parts: str) -> str:
"""Join path segments under the resolved data root."""
data_root = Path(get_data_root())
if not parts:
return str(data_root)
return str((data_root.joinpath(*parts)).resolve())
def get_frontend_path() -> str:
@@ -76,8 +109,11 @@ def get_resource_path(resource: str) -> str:
Absolute path to the resource
"""
# First, check if resource exists in current directory (source install)
if _check_if_source_install() and os.path.exists(resource):
return resource
source_root = _find_source_root()
if source_root is not None:
source_resource = source_root / resource
if source_resource.exists():
return str(source_resource)
# Second, check current directory anyway
if os.path.exists(resource):
+4 -1
View File
@@ -23,7 +23,10 @@ def run_pip(params: list):
pipmain(params)
def install_requirements(file, extra_params: list = []):
def install_requirements(file, extra_params: list | None = None):
if extra_params is None:
extra_params = []
pipmain(
[
'install',
+7
View File
@@ -16,7 +16,14 @@ def get_platform() -> str:
standalone_runtime = False
standalone_box = False
def use_websocket_to_connect_plugin_runtime() -> bool:
"""是否使用 websocket 连接插件运行时"""
return standalone_runtime
def use_websocket_to_connect_box_runtime() -> bool:
"""Whether to use WebSocket to connect to an external box runtime."""
return standalone_box
+42 -3
View File
@@ -1,5 +1,7 @@
from __future__ import annotations
import ipaddress
import re
from urllib.parse import urlparse
@@ -44,6 +46,40 @@ LOCAL_PATTERNS = [
'172.31.',
]
HOST_LABEL_PATTERN = re.compile(r'^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$')
def _is_valid_hostname(host: str) -> bool:
if host == 'localhost':
return True
try:
ipaddress.ip_address(host)
return True
except ValueError:
pass
if not host or len(host) > 253 or any(char.isspace() for char in host):
return False
host = host.rstrip('.')
if not host:
return False
return all(HOST_LABEL_PATTERN.match(label) for label in host.split('.'))
def _is_local_host(host: str) -> bool:
if host == 'localhost':
return True
try:
ip_address = ipaddress.ip_address(host)
except ValueError:
return False
return ip_address.is_private or ip_address.is_loopback or ip_address.is_unspecified
def get_runner_category(runner_name: str, runner_url: str) -> str:
if not runner_url:
@@ -52,12 +88,15 @@ def get_runner_category(runner_name: str, runner_url: str) -> str:
try:
parsed_url = urlparse(runner_url)
host = parsed_url.hostname.lower() if parsed_url.hostname else ''
_ = parsed_url.port
except Exception:
return RunnerCategory.UNKNOWN
for pattern in LOCAL_PATTERNS:
if host.startswith(pattern):
return RunnerCategory.LOCAL
if not parsed_url.scheme or not host or not _is_valid_hostname(host):
return RunnerCategory.UNKNOWN
if _is_local_host(host):
return RunnerCategory.LOCAL
for domain in CLOUD_DOMAINS:
if host.endswith(domain):
+32 -155
View File
@@ -1,6 +1,5 @@
from __future__ import annotations
import os
import typing
import logging
@@ -11,7 +10,7 @@ from . import constants
class VersionManager:
"""版本管理器"""
"""Version manager"""
ap: app.Application
@@ -22,190 +21,68 @@ class VersionManager:
pass
def get_current_version(self) -> str:
current_tag = constants.semantic_version
return current_tag
return constants.semantic_version
async def get_release_list(self) -> list:
"""获取发行列表"""
"""Fetch release list from Space API (cached GitHub releases)."""
try:
rls_list_resp = requests.get(
url='https://api.github.com/repos/langbot-app/LangBot/releases',
url='https://space.langbot.app/api/v1/dist/info/releases',
proxies=self.ap.proxy_mgr.get_forward_proxies(),
timeout=5,
timeout=10,
)
rls_list_resp.raise_for_status() # 检查请求是否成功
rls_list = rls_list_resp.json()
return rls_list
rls_list_resp.raise_for_status()
resp_json = rls_list_resp.json()
if resp_json.get('code') == 0 and isinstance(resp_json.get('data'), list):
return resp_json['data']
self.ap.logger.warning(f'Failed to fetch release list: unexpected response: {resp_json.get("msg", "")}')
return []
except Exception as e:
self.ap.logger.warning(f'获取发行列表失败: {e}')
pass
self.ap.logger.warning(f'Failed to fetch release list: {e}')
return []
async def update_all(self):
"""检查更新并下载源码"""
current_tag = self.get_current_version()
rls_list = await self.get_release_list()
latest_rls = {}
rls_notes = []
latest_tag_name = ''
for rls in rls_list:
rls_notes.append(rls['name']) # 使用发行名称作为note
if latest_tag_name == '':
latest_tag_name = rls['tag_name']
if rls['tag_name'] == current_tag:
break
if latest_rls == {}:
latest_rls = rls
self.ap.logger.info('更新日志: {}'.format(rls_notes))
if latest_rls == {} and not self.is_newer(latest_tag_name, current_tag): # 没有新版本
return False
# 下载最新版本的zip到temp目录
self.ap.logger.info('开始下载最新版本: {}'.format(latest_rls['zipball_url']))
zip_url = latest_rls['zipball_url']
zip_resp = requests.get(url=zip_url, proxies=self.ap.proxy_mgr.get_forward_proxies())
zip_data = zip_resp.content
# 检查temp/updater目录
if not os.path.exists('temp'):
os.mkdir('temp')
if not os.path.exists('temp/updater'):
os.mkdir('temp/updater')
with open('temp/updater/{}.zip'.format(latest_rls['tag_name']), 'wb') as f:
f.write(zip_data)
self.ap.logger.info('下载最新版本完成: {}'.format('temp/updater/{}.zip'.format(latest_rls['tag_name'])))
# 解压zip到temp/updater/<tag_name>/
import zipfile
# 检查目标文件夹
if os.path.exists('temp/updater/{}'.format(latest_rls['tag_name'])):
import shutil
shutil.rmtree('temp/updater/{}'.format(latest_rls['tag_name']))
os.mkdir('temp/updater/{}'.format(latest_rls['tag_name']))
with zipfile.ZipFile('temp/updater/{}.zip'.format(latest_rls['tag_name']), 'r') as zip_ref:
zip_ref.extractall('temp/updater/{}'.format(latest_rls['tag_name']))
# 覆盖源码
source_root = ''
# 找到temp/updater/<tag_name>/中的第一个子目录路径
for root, dirs, files in os.walk('temp/updater/{}'.format(latest_rls['tag_name'])):
if root != 'temp/updater/{}'.format(latest_rls['tag_name']):
source_root = root
break
# 覆盖源码
import shutil
for root, dirs, files in os.walk(source_root):
# 覆盖所有子文件子目录
for file in files:
src = os.path.join(root, file)
dst = src.replace(source_root, '.')
if os.path.exists(dst):
os.remove(dst)
# 检查目标文件夹是否存在
if not os.path.exists(os.path.dirname(dst)):
os.makedirs(os.path.dirname(dst))
# 检查目标文件是否存在
if not os.path.exists(dst):
# 创建目标文件
open(dst, 'w').close()
shutil.copy(src, dst)
# 把current_tag写入文件
current_tag = latest_rls['tag_name']
with open('current_tag', 'w') as f:
f.write(current_tag)
# TODO statistics
async def is_new_version_available(self) -> bool:
"""检查是否有新版本"""
# 从github获取release列表
"""Check whether a newer version is available."""
rls_list = await self.get_release_list()
if rls_list is None:
if not rls_list:
return False
# 获取当前版本
current_tag = self.get_current_version()
# 检查是否有新版本
latest_tag_name = ''
for rls in rls_list:
if latest_tag_name == '':
latest_tag_name = rls['tag_name']
break
latest_tag_name = rls.get('tag_name', '')
break
return self.is_newer(latest_tag_name, current_tag)
return self._is_newer(latest_tag_name, current_tag)
def is_newer(self, new_tag: str, old_tag: str):
"""判断版本是否更新,忽略第四位版本和第一位版本"""
if new_tag == old_tag:
def _is_newer(self, new_tag: str, old_tag: str) -> bool:
"""Check if new_tag is a newer version than old_tag.
Compares the first three segments (major.minor.patch) only.
Returns False if the major version differs (breaking change boundary).
"""
if not new_tag or not old_tag or new_tag == old_tag:
return False
new_tag = new_tag.split('.')
old_tag = old_tag.split('.')
new_parts = new_tag.split('.')
old_parts = old_tag.split('.')
# 判断主版本是否相同
if new_tag[0] != old_tag[0]:
# Different major version — not considered an upgrade
if new_parts[0] != old_parts[0]:
return False
if len(new_tag) < 4:
if len(new_parts) < 4:
return True
# 合成前三段,判断是否相同
new_tag = '.'.join(new_tag[:3])
old_tag = '.'.join(old_tag[:3])
return new_tag != old_tag
def compare_version_str(v0: str, v1: str) -> int:
"""比较两个版本号"""
# 删除版本号前的v
if v0.startswith('v'):
v0 = v0[1:]
if v1.startswith('v'):
v1 = v1[1:]
v0: list = v0.split('.')
v1: list = v1.split('.')
# 如果两个版本号节数不同,把短的后面用0补齐
if len(v0) < len(v1):
v0.extend(['0'] * (len(v1) - len(v0)))
elif len(v0) > len(v1):
v1.extend(['0'] * (len(v0) - len(v1)))
# 从高位向低位比较
for i in range(len(v0)):
if int(v0[i]) > int(v1[i]):
return 1
elif int(v0[i]) < int(v1[i]):
return -1
return 0
return '.'.join(new_parts[:3]) != '.'.join(old_parts[:3])
async def show_version_update(self) -> typing.Tuple[str, int]:
try:
if await self.ap.ver_mgr.is_new_version_available():
if await self.is_new_version_available():
return (
'New version available:\n有新版本可用,根据文档更新: \nhttps://link.langbot.app/zh/docs/update',
'New version available. Update guide: https://link.langbot.app/en/docs/update',
logging.INFO,
)
except Exception as e:
return f'Error checking version update: {e}', logging.WARNING