chore: bump beta version

fix(ci): resolve langbot-plugin from PyPI and clear lint failures
CI on feat/sandbox failed across Unit Tests, Lint and Build Dev Image. Root causes and fixes: - pyproject.toml had a [tool.uv.sources] editable override pinning langbot-plugin to ../langbot-plugin-sdk. That path only exists in a paired local checkout, so `uv sync` failed on every CI runner ("Distribution not found"). Remove the override and regenerate uv.lock so langbot-plugin==0.4.0b1 resolves from PyPI, matching master. - tests/integration/api/test_pipelines.py: the pipeline extensions endpoint now calls ap.skill_service.list_skills(); add the missing skill_service mock to the fake_pipeline_app fixture (the test came from master, the endpoint change from feat/sandbox). - Apply ruff format to three src files and prettier to three web files that had committed formatting drift, failing `ruff format --check` and `pnpm lint`.
2026-06-02 20:14:36 +00:00 · 2026-05-21 14:01:45 +08:00 · 2026-05-21 13:38:27 +08:00 · 2026-05-21 13:25:40 +08:00 · 2026-05-21 13:21:03 +08:00 · 2026-05-21 00:04:34 +08:00
334 changed files with 61457 additions and 8211 deletions
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -4,25 +4,25 @@ on:
  pull_request:
    types: [opened, ready_for_review, synchronize]
    paths:
-      - 'pkg/**'
+      - 'src/langbot/**'
      - 'tests/**'
      - '.github/workflows/run-tests.yml'
      - 'pyproject.toml'
+      - 'uv.lock'
      - 'run_tests.sh'
+      - 'scripts/test-*.sh'
  push:
    branches:
      - master
      - develop
-    paths:
-      - 'pkg/**'
-      - 'tests/**'
-      - '.github/workflows/run-tests.yml'
-      - 'pyproject.toml'
-      - 'run_tests.sh'
+      - 'feat/**'
+    # No path filter on push: every push to the branches above runs the
+    # full unit-test suite. feat/** branches in particular must be tested
+    # on every push (they accumulate large changes before a PR exists).

 jobs:
  test:
-    name: Run Unit Tests
+    name: Unit Tests
    runs-on: ubuntu-latest
    strategy:
      matrix:
@@ -39,28 +39,13 @@ jobs:
          python-version: ${{ matrix.python-version }}

      - name: Install uv
-        run: |
-          curl -LsSf https://astral.sh/uv/install.sh | sh
-          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+        uses: astral-sh/setup-uv@v4

      - name: Install dependencies
-        run: |
-          uv sync --dev
+        run: uv sync --dev

-      - name: Run unit tests
-        run: |
-          bash run_tests.sh
-
-      - name: Upload coverage to Codecov
-        if: matrix.python-version == '3.12'
-        uses: codecov/codecov-action@v5
-        with:
-          files: ./coverage.xml
-          flags: unit-tests
-          name: unit-tests-coverage
-          fail_ci_if_error: false
-        env:
-          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      - name: Run unit + smoke tests
+        run: uv run pytest tests/unit_tests/ tests/smoke/ -q --tb=short

      - name: Test Summary
        if: always()
@@ -69,3 +54,79 @@ jobs:
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Python Version: ${{ matrix.python-version }}" >> $GITHUB_STEP_SUMMARY
          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+
+  integration:
+    name: Fast Integration Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Run fast integration tests
+        run: uv run pytest tests/integration/ -m "not slow" -q --tb=short
+
+      - name: Integration Test Summary
+        if: always()
+        run: |
+          echo "## Integration Tests Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+
+  coverage:
+    name: Coverage Gate
+    runs-on: ubuntu-latest
+    needs: [test, integration]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Run coverage (unit + smoke)
+        run: |
+          uv run pytest tests/unit_tests/ tests/smoke/ \
+            --cov=langbot \
+            --cov-report=xml \
+            --cov-report=term-missing \
+            --cov-fail-under=18 \
+            -q --tb=short
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          files: ./coverage.xml
+          flags: unit-tests
+          name: coverage-report
+          fail_ci_if_error: false
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Coverage Summary
+        if: always()
+        run: |
+          echo "## Coverage Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Threshold: 18%" >> $GITHUB_STEP_SUMMARY
+          echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
--- a/.github/workflows/test-migrations.yml
+++ b/.github/workflows/test-migrations.yml
@@ -9,11 +9,13 @@ on:
    paths:
      - 'src/langbot/pkg/persistence/**'
      - 'src/langbot/pkg/entity/persistence/**'
+      - 'tests/integration/persistence/**'
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]
    paths:
      - 'src/langbot/pkg/persistence/**'
      - 'src/langbot/pkg/entity/persistence/**'
+      - 'tests/integration/persistence/**'

 jobs:
  test-migrations-sqlite:
@@ -34,52 +36,8 @@ jobs:
      - name: Install dependencies
        run: uv sync --dev

-      - name: Test Alembic upgrade (SQLite)
-        run: |
-          uv run python -c "
-          import asyncio
-          from sqlalchemy.ext.asyncio import create_async_engine
-          from langbot.pkg.entity.persistence.base import Base
-          from langbot.pkg.persistence.alembic_runner import run_alembic_upgrade, run_alembic_stamp, get_alembic_current
-
-          async def main():
-              engine = create_async_engine('sqlite+aiosqlite:///test_migrations.db')
-
-              # Create all tables (simulates existing DB)
-              async with engine.begin() as conn:
-                  await conn.run_sync(Base.metadata.create_all)
-
-              # Stamp baseline
-              await run_alembic_stamp(engine, '0001_baseline')
-              rev = await get_alembic_current(engine)
-              assert rev == '0001_baseline', f'Expected 0001_baseline, got {rev}'
-              print(f'Stamped: {rev}')
-
-              # Upgrade to head
-              await run_alembic_upgrade(engine, 'head')
-              rev = await get_alembic_current(engine)
-              print(f'After upgrade: {rev}')
-              assert rev is not None, 'Expected a revision after upgrade'
-
-              # Verify idempotent
-              await run_alembic_upgrade(engine, 'head')
-              rev2 = await get_alembic_current(engine)
-              assert rev2 == rev, f'Expected {rev}, got {rev2}'
-              print(f'Idempotent check passed: {rev2}')
-
-              # Fresh DB: upgrade from scratch
-              engine2 = create_async_engine('sqlite+aiosqlite:///test_migrations_fresh.db')
-              async with engine2.begin() as conn:
-                  await conn.run_sync(Base.metadata.create_all)
-              await run_alembic_upgrade(engine2, 'head')
-              rev3 = await get_alembic_current(engine2)
-              print(f'Fresh DB upgrade: {rev3}')
-              assert rev3 is not None
-
-              print('All SQLite migration tests passed!')
-
-          asyncio.run(main())
-          "
+      - name: Run SQLite migration tests
+        run: uv run pytest tests/integration/persistence/test_migrations.py -q --tb=short

  test-migrations-postgres:
    name: Migrations (PostgreSQL)
@@ -114,58 +72,7 @@ jobs:
      - name: Install dependencies
        run: uv sync --dev

-      - name: Test Alembic upgrade (PostgreSQL)
-        run: |
-          uv run python -c "
-          import asyncio
-          from sqlalchemy.ext.asyncio import create_async_engine
-          from langbot.pkg.entity.persistence.base import Base
-          from langbot.pkg.persistence.alembic_runner import run_alembic_upgrade, run_alembic_stamp, get_alembic_current
-
-          DB_URL = 'postgresql+asyncpg://langbot:langbot@localhost:5432/langbot_test'
-
-          async def main():
-              engine = create_async_engine(DB_URL)
-
-              # Create all tables
-              async with engine.begin() as conn:
-                  await conn.run_sync(Base.metadata.create_all)
-
-              # Stamp baseline
-              await run_alembic_stamp(engine, '0001_baseline')
-              rev = await get_alembic_current(engine)
-              assert rev == '0001_baseline', f'Expected 0001_baseline, got {rev}'
-              print(f'Stamped: {rev}')
-
-              # Upgrade to head
-              await run_alembic_upgrade(engine, 'head')
-              rev = await get_alembic_current(engine)
-              print(f'After upgrade: {rev}')
-              assert rev is not None
-
-              # Verify idempotent
-              await run_alembic_upgrade(engine, 'head')
-              rev2 = await get_alembic_current(engine)
-              assert rev2 == rev, f'Expected {rev}, got {rev2}'
-              print(f'Idempotent check passed: {rev2}')
-
-              # Fresh DB: drop all and upgrade from scratch
-              engine2 = create_async_engine(DB_URL.replace('langbot_test', 'langbot_fresh'))
-
-              # Create fresh database
-              from sqlalchemy import text
-              async with engine.connect() as conn:
-                  await conn.execute(text('COMMIT'))
-                  await conn.execute(text('CREATE DATABASE langbot_fresh'))
-
-              async with engine2.begin() as conn:
-                  await conn.run_sync(Base.metadata.create_all)
-              await run_alembic_upgrade(engine2, 'head')
-              rev3 = await get_alembic_current(engine2)
-              print(f'Fresh DB upgrade: {rev3}')
-              assert rev3 is not None
-
-              print('All PostgreSQL migration tests passed!')
-
-          asyncio.run(main())
-          "
+      - name: Run PostgreSQL migration tests
+        env:
+          TEST_POSTGRES_URL: postgresql+asyncpg://langbot:langbot@localhost:5432/langbot_test
+        run: uv run pytest tests/integration/persistence/test_migrations_postgres.py -q --tb=short
--- a/36
+++ b/36
@@ -0,0 +1,36 @@
+# LangBot Makefile
+# Quick developer commands
+
+.PHONY: test test-quick test-integration-fast test-coverage test-all-local lint
+
+# Run all tests (full suite with coverage)
+test:
+	bash run_tests.sh
+
+# Quick self-test for developers (lint + unit + smoke, no real credentials needed)
+test-quick:
+	bash scripts/test-quick.sh
+
+# Fast integration tests (SQLite/API/Pipeline, no external services)
+test-integration-fast:
+	bash scripts/test-integration-fast.sh
+
+# Coverage gate (all tests, enforces minimum threshold)
+test-coverage:
+	bash scripts/test-coverage.sh
+
+# Full local quality gate (quick + integration + coverage)
+test-all-local:
+	bash scripts/test-quick.sh
+	bash scripts/test-integration-fast.sh
+	bash scripts/test-coverage.sh
+
+# Run linting only
+lint:
+	ruff check src/langbot/ tests/
+	ruff format --check src/langbot/ tests/
+
+# Fix linting issues
+lint-fix:
+	ruff check --fix src/langbot/ tests/
+	ruff format src/langbot/ tests/
--- a/README.md
+++ b/README.md
@@ -47,6 +47,8 @@ LangBot is an **open-source, production-grade platform** for building AI-powered

 [→ Learn more about all features](https://link.langbot.app/en/docs/features)

+📍 Practical guides: [deploy a multi-platform AI bot in 5 minutes](https://blog.langbot.app/en/blog/deploy-ai-bot-in-5-minutes/), [connect DeepSeek to WeChat, Discord, and Telegram](https://blog.langbot.app/en/blog/connect-deepseek-to-wechat/), [run a Dify Agent in Discord, Telegram, and Slack](https://blog.langbot.app/en/blog/dify-agent-discord-telegram-slack/), and [build an n8n-powered chatbot](https://blog.langbot.app/en/blog/n8n-multi-platform-ai-chatbot/).
+
 ---

 ## Quick Start
--- a/README_CN.md
+++ b/README_CN.md
@@ -25,7 +25,7 @@
 <a href="https://link.langbot.app/zh/docs/guide">文档</a> ｜
 <a href="https://link.langbot.app/zh/docs/api">API</a> ｜
 <a href="https://space.langbot.app/cloud">Cloud</a> ｜
-<a href="https://space.langbot.app">插件市场</a> ｜
+<a href="https://space.langbot.app">扩展市场</a> ｜
 <a href="https://langbot.featurebase.app/roadmap">路线图</a>

 </div>
@@ -47,6 +47,8 @@ LangBot 是一个**开源的生产级平台**，用于构建 AI 驱动的即时

 [→ 了解更多功能特性](https://link.langbot.app/zh/docs/features)

+📍 实践指南：[5 分钟部署多平台 AI 机器人](https://blog.langbot.app/zh/blog/deploy-ai-bot-in-5-minutes/)、[将 DeepSeek 接入微信、企业微信与 Discord](https://blog.langbot.app/zh/blog/connect-deepseek-to-wechat/)、[让 Dify Agent 跑在 Discord、Telegram 和 Slack 上](https://blog.langbot.app/zh/blog/dify-agent-discord-telegram-slack/)，以及[用 n8n 构建多平台 AI 聊天机器人](https://blog.langbot.app/zh/blog/n8n-multi-platform-ai-chatbot/)。
+
 ---

 ## 快速开始
--- a/README_ES.md
+++ b/README_ES.md
@@ -46,6 +46,8 @@ LangBot es una **plataforma de código abierto y grado de producción** para con

 [→ Conocer más sobre todas las funcionalidades](https://link.langbot.app/en/docs/features)

+📍 Guías prácticas: [desplegar un bot de IA multiplataforma en 5 minutos](https://blog.langbot.app/en/blog/deploy-ai-bot-in-5-minutes/), [conectar DeepSeek a WeChat, Discord y Telegram](https://blog.langbot.app/en/blog/connect-deepseek-to-wechat/), [ejecutar un Dify Agent en Discord, Telegram y Slack](https://blog.langbot.app/en/blog/dify-agent-discord-telegram-slack/) y [crear un chatbot con n8n](https://blog.langbot.app/en/blog/n8n-multi-platform-ai-chatbot/).
+
 ---

 ## Inicio Rápido
--- a/README_FR.md
+++ b/README_FR.md
@@ -46,6 +46,8 @@ LangBot est une **plateforme open-source de niveau production** pour créer des

 [→ En savoir plus sur toutes les fonctionnalités](https://link.langbot.app/en/docs/features)

+📍 Guides pratiques : [déployer un bot IA multiplateforme en 5 minutes](https://blog.langbot.app/en/blog/deploy-ai-bot-in-5-minutes/), [connecter DeepSeek à WeChat, Discord et Telegram](https://blog.langbot.app/en/blog/connect-deepseek-to-wechat/), [exécuter un Dify Agent dans Discord, Telegram et Slack](https://blog.langbot.app/en/blog/dify-agent-discord-telegram-slack/) et [créer un chatbot avec n8n](https://blog.langbot.app/en/blog/n8n-multi-platform-ai-chatbot/).
+
 ---

 ## Démarrage Rapide
--- a/README_JP.md
+++ b/README_JP.md
@@ -46,6 +46,8 @@ LangBot は、AI搭載のインスタントメッセージングボットを構

 [→ すべての機能について詳しく見る](https://link.langbot.app/ja/docs/features)

+📍 実践ガイド: [5分でマルチプラットフォームAIボットをデプロイ](https://blog.langbot.app/en/blog/deploy-ai-bot-in-5-minutes/)、[DeepSeekをWeChat・Discord・Telegramに接続](https://blog.langbot.app/en/blog/connect-deepseek-to-wechat/)、[Dify AgentをDiscord・Telegram・Slackで動かす](https://blog.langbot.app/en/blog/dify-agent-discord-telegram-slack/)、[n8n連携チャットボットを構築](https://blog.langbot.app/en/blog/n8n-multi-platform-ai-chatbot/)。
+
 ---

 ## クイックスタート
--- a/README_KO.md
+++ b/README_KO.md
@@ -46,6 +46,8 @@ LangBot은 AI 기반 인스턴트 메시징 봇을 구축하기 위한 **오픈

 [→ 모든 기능 자세히 보기](https://link.langbot.app/en/docs/features)

+📍 실전 가이드: [5분 만에 멀티 플랫폼 AI 봇 배포하기](https://blog.langbot.app/en/blog/deploy-ai-bot-in-5-minutes/), [DeepSeek를 WeChat, Discord, Telegram에 연결하기](https://blog.langbot.app/en/blog/connect-deepseek-to-wechat/), [Dify Agent를 Discord, Telegram, Slack에서 실행하기](https://blog.langbot.app/en/blog/dify-agent-discord-telegram-slack/), [n8n 기반 챗봇 만들기](https://blog.langbot.app/en/blog/n8n-multi-platform-ai-chatbot/).
+
 ---

 ## 빠른 시작
--- a/README_RU.md
+++ b/README_RU.md
@@ -46,6 +46,8 @@ LangBot — это **платформа с открытым исходным к

 [→ Подробнее обо всех возможностях](https://link.langbot.app/en/docs/features)

+📍 Практические руководства: [развернуть мультиплатформенного ИИ-бота за 5 минут](https://blog.langbot.app/en/blog/deploy-ai-bot-in-5-minutes/), [подключить DeepSeek к WeChat, Discord и Telegram](https://blog.langbot.app/en/blog/connect-deepseek-to-wechat/), [запустить Dify Agent в Discord, Telegram и Slack](https://blog.langbot.app/en/blog/dify-agent-discord-telegram-slack/) и [создать чат-бота на n8n](https://blog.langbot.app/en/blog/n8n-multi-platform-ai-chatbot/).
+
 ---

 ## Быстрый старт
--- a/README_TW.md
+++ b/README_TW.md
@@ -48,6 +48,8 @@ LangBot 是一個**開源的生產級平台**，用於建構 AI 驅動的即時

 [→ 了解更多功能特性](https://link.langbot.app/zh/docs/features)

+📍 實踐指南：[5 分鐘部署多平台 AI 機器人](https://blog.langbot.app/zh/blog/deploy-ai-bot-in-5-minutes/)、[將 DeepSeek 接入微信、企業微信與 Discord](https://blog.langbot.app/zh/blog/connect-deepseek-to-wechat/)、[讓 Dify Agent 跑在 Discord、Telegram 和 Slack 上](https://blog.langbot.app/zh/blog/dify-agent-discord-telegram-slack/)，以及[用 n8n 建構多平台 AI 聊天機器人](https://blog.langbot.app/zh/blog/n8n-multi-platform-ai-chatbot/)。
+
 ---

 ## 快速開始
--- a/README_VI.md
+++ b/README_VI.md
@@ -46,6 +46,8 @@ LangBot là một **nền tảng mã nguồn mở, cấp sản xuất** để x

 [→ Tìm hiểu thêm về tất cả tính năng](https://link.langbot.app/en/docs/features)

+📍 Hướng dẫn thực hành: [triển khai bot AI đa nền tảng trong 5 phút](https://blog.langbot.app/en/blog/deploy-ai-bot-in-5-minutes/), [kết nối DeepSeek với WeChat, Discord và Telegram](https://blog.langbot.app/en/blog/connect-deepseek-to-wechat/), [chạy Dify Agent trên Discord, Telegram và Slack](https://blog.langbot.app/en/blog/dify-agent-discord-telegram-slack/) và [xây dựng chatbot với n8n](https://blog.langbot.app/en/blog/n8n-multi-platform-ai-chatbot/).
+
 ---

 ## Bắt đầu nhanh
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -18,6 +18,40 @@ services:
    networks:
      - langbot_network

+  # The Box sandbox runtime is optional. It is only started when you run
+  # ``docker compose --profile box up`` (or ``docker compose --profile all
+  # up``). With Box off, LangBot keeps the dashboard / skills list visible
+  # (read-only) but disables sandbox tools, skill add/edit and stdio MCP —
+  # set ``box.enabled: false`` in ``data/config.yaml`` (or
+  # ``BOX__ENABLED=false`` in the langbot service env below) to match.
+  langbot_box:
+    image: rockchin/langbot:latest
+    container_name: langbot_box
+    profiles: ["box", "all"]
+    volumes:
+      # Keep the source and target path identical because langbot_box uses the
+      # host Docker socket to create sandbox containers. Override
+      # LANGBOT_BOX_ROOT with an absolute path if you do not want the default.
+      - ${LANGBOT_BOX_ROOT:-${PWD}/data/box}:${LANGBOT_BOX_ROOT:-${PWD}/data/box}
+      # Mount container runtime socket for Box sandbox backend.
+      # Uncomment the one that matches your container runtime:
+      # - /var/run/podman/podman.sock:/var/run/podman/podman.sock   # Podman
+      - /var/run/docker.sock:/var/run/docker.sock                   # Docker
+    restart: on-failure
+    environment:
+      - TZ=Asia/Shanghai
+      # The Box runtime does NOT read box.local.* from config.yaml or env; it
+      # receives its configuration from LangBot via the INIT RPC action.
+      # Do not add LANGBOT_BOX_* / BOX__* here — they would be silently ignored.
+    # Launched through the same CLI entry point as the plugin runtime
+    # (`langbot_plugin.cli.__init__ <subcommand>`). WebSocket is the default
+    # control transport — mirrors `rt`, which also runs with no flag. Pass
+    # `-s` / `--stdio-control` only for the stdio mode LangBot uses outside
+    # containers.
+    command: ["uv", "run", "--no-sync", "-m", "langbot_plugin.cli.__init__", "box"]
+    networks:
+      - langbot_network
+
  langbot:
    image: rockchin/langbot:latest
    container_name: langbot
@@ -26,6 +60,13 @@ services:
    restart: on-failure
    environment:
      - TZ=Asia/Shanghai
+      # Unified env-override convention: SECTION__SUBSECTION__KEY overrides the
+      # matching config.yaml field (see LoadConfigStage). These map onto
+      # box.local.* and are forwarded to the Box runtime via INIT RPC.
+      - BOX__LOCAL__HOST_ROOT=${LANGBOT_BOX_ROOT:-${PWD}/data/box}
+      - BOX__LOCAL__DEFAULT_WORKSPACE=default
+      - BOX__LOCAL__SKILLS_ROOT=skills
+      - BOX__LOCAL__ALLOWED_MOUNT_ROOTS=${LANGBOT_BOX_ROOT:-${PWD}/data/box}
    ports:
      - 5300:5300  # For web ui and webhook callback
      - 2280-2285:2280-2285  # For platform reverse connection
@@ -34,4 +75,4 @@ services:

 networks:
  langbot_network:
-    driver: bridge
+    driver: bridge
--- a/docs/review/box-architecture.md
+++ b/docs/review/box-architecture.md
@@ -0,0 +1,594 @@
+# Box 系统架构深度分析
+
+> 更新日期: 2026-05-19
+> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk)
+> 相关文档: [问题清单](./box-issues.md) | [Session 作用域](./box-session-scope.md) | [Runtime 对比](./box-vs-plugin-runtime.md) | [测试覆盖](./box-test-coverage.md) | [toB 分析](./box-tob-analysis.md)
+
+---
+
+## 1. 全局架构
+
+```
+┌──────────────────────────────────────────────────────────────────┐
+│                       LangBot 主进程                              │
+│                                                                   │
+│  LocalAgentRunner ──> ToolManager ──> NativeToolLoader            │
+│       │                    │              │                       │
+│       │                    │      exec / read / write / edit      │
+│       │                    │              glob / grep             │
+│       │                    │                                      │
+│       │                    ├──> MCPLoader ──> BoxStdioSession     │
+│       │                    │       (shared 容器, 多 process)       │
+│       │                    │                                      │
+│       │                    ├──> SkillToolLoader (activate 工具)    │
+│       │                    │                                      │
+│       │                    ├──> SkillAuthoringToolLoader          │
+│       │                    │                                      │
+│       │                    └──> PluginToolLoader                  │
+│       │                                                           │
+│  BoxService (门面)                                                 │
+│    ├─ Profile 管理 (locked 字段)                                   │
+│    ├─ Host mount 校验 (allowed_mount_roots)                        │
+│    ├─ Workspace quota 检查                                         │
+│    ├─ 输出截断 (head+tail)                                         │
+│    ├─ Session ID 模板解析 (resolve_box_session_id)                 │
+│    ├─ 技能挂载组装 (build_skill_extra_mounts)                      │
+│    ├─ 重连循环 (_reconnect_loop, 指数退避)                          │
+│    └─ BoxRuntimeConnector                                          │
+│         ├─ 心跳 loop (20s ping)                                    │
+│         └─ ActionRPCBoxClient                                      │
+│              │  Action RPC (stdio 或 WebSocket)                    │
+│                                                                    │
+│  SkillManager (skill_mgr)                                          │
+│    └─ 从 Box runtime 拉取 skills, 不可用时回落 data/skills          │
+└──────────────────────────────────────────────────────────────────┘
+               │
+               ▼
+┌──────────────────────────────────────────────────────────────────┐
+│              Box Runtime 进程 (SDK 侧)                            │
+│                                                                   │
+│  BoxServerHandler (Action RPC 处理, INIT 配置注入)                  │
+│       │                                                           │
+│  BoxRuntime (session 管理 / 进程生命周期 / TTL reaper)              │
+│       │       └─ session.managed_processes: dict[pid, _ManagedProcess]
+│       │                                                           │
+│  Backend (启动时根据 box.backend 配置选择):                          │
+│    DockerBackend ──┐                                              │
+│    PodmanBackend ──┤── CLISandboxBackend                          │
+│    NsjailBackend ──┘  (本地 CLI 或 fallback 到容器内 CLI)            │
+│    E2BBackend         (云沙箱, 需要 E2B_API_KEY)                    │
+│                                                                   │
+│  BoxSkillStore                                                    │
+│    ├─ list / get / create / update / delete                       │
+│    ├─ scan_skill_directory / read_skill_file / write_skill_file   │
+│    └─ preview_skill_zip / install_skill_zip (zip 或 GitHub)        │
+│                                                                   │
+│  aiohttp 单端口服务 (默认 :5410):                                    │
+│    /rpc/ws                                       — Action RPC      │
+│    /v1/sessions/{id}/managed-process/ws          — 默认 process     │
+│    /v1/sessions/{id}/managed-process/{pid}/ws    — 指定 process     │
+└──────────────────────────────────────────────────────────────────┘
+               │
+               ▼
+┌──────────────────────────────────────────────────────────────────┐
+│  容器 / 沙箱 (Docker/Podman 容器, nsjail sandbox, 或 E2B 远程沙箱)  │
+│  - 隔离文件系统 / 网络 / PID 命名空间                                │
+│  - 资源限制 (CPU, 内存, PID 数, 可选 workspace 配额)                 │
+│  - 主挂载 (host_path → mount_path) + 任意条 extra_mounts             │
+│      └─ Skills 通过 extra_mounts 挂在 /workspace/.skills/<name>     │
+│  - exec: 用户命令在此执行                                            │
+│  - managed process: 多个长驻进程并存 (MCP Server / 自定义服务)        │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+**核心设计原则**:
+- Box Runtime 作为独立进程运行，通过 Action RPC 与 LangBot 主进程通信，两者复用 SDK 的 IO 层（Handler → Connection → Controller）
+- 一个 session_id 对应一个容器/沙箱实例。同一 session 内可并存多条 mount 与多个 managed process
+- Skill / 默认 exec / MCP Server 共享同一个 session 容器（详见 [box-session-scope.md](./box-session-scope.md)）
+
+---
+
+## 2. LangBot 侧模块
+
+### 2.1 BoxService (`pkg/box/service.py`, 722 行)
+
+应用层门面，协调 Profile、安全校验、配额、连接、Skill 挂载与 Session 模板：
+
+主要公开方法（按定义顺序）：
+
+```
+BoxService
+  ├─ initialize()                              连接 Box Runtime + 默认 workspace 准备
+  ├─ _on_runtime_disconnect(connector)         触发重连
+  ├─ _reconnect_loop(connector)                指数退避重连
+  ├─ available (property)                      连接状态
+  │
+  ├─ resolve_box_session_id(query)             从 pipeline 模板解析 session_id
+  ├─ build_skill_extra_mounts(query)           组装 pipeline-bound skill 的挂载列表
+  │
+  ├─ execute_tool(parameters, query)           Agent 调用 exec 时的入口
+  │    ├─ _apply_profile / build_spec
+  │    ├─ _validate_host_mount
+  │    ├─ _enforce_workspace_quota (phase=pre)
+  │    ├─ client.execute(spec)
+  │    ├─ _enforce_workspace_quota (phase=post)
+  │    └─ _truncate (stdout/stderr)
+  │
+  ├─ execute_spec_payload(spec_payload, ...)   内部入口（其他 loader 调用）
+  ├─ create_session(spec_payload, ...)         显式创建 session
+  ├─ start_managed_process(session_id, ...)    启动 managed process
+  ├─ get_managed_process(session_id, pid)      查询进程状态（pid 默认 'default'）
+  ├─ stop_managed_process(session_id, pid)     单独停止某个 managed process
+  ├─ get_managed_process_websocket_url(...)    返回 WS attach URL
+  │
+  ├─ list_skills() / get_skill(name)           Skill 元数据
+  ├─ create_skill / update_skill / delete_skill  Skill CRUD
+  ├─ scan_skill_directory(path)                扫描目录
+  ├─ list_skill_files / read_skill_file / write_skill_file
+  ├─ preview_skill_zip / install_skill_zip     zip / GitHub 安装
+  │
+  ├─ shutdown() / dispose()                    清理：RPC SHUTDOWN + 进程终止
+  ├─ get_status() / get_sessions() / get_recent_errors()
+  └─ get_system_guidance()                     LLM 系统提示
+```
+
+**Profile 系统**: 4 个内置 Profile（`default` / `offline_readonly` / `network_basic` / `network_extended`），`locked` frozenset 字段不可被 LLM 覆盖。参数合并顺序：Profile defaults → LLM 请求参数 → locked 强制值。
+
+**输出截断**: 默认 4000 字符上限，保留前 60% + 后 40%，中间插入 `[...truncated...]`。
+
+**Skill 挂载合并**: `execute_tool()` 调用时，`build_skill_extra_mounts(query)` 会把当前 pipeline-bound 的所有 skill 的 `package_root` 作为 `extra_mounts` 加入 BoxSpec，挂在 `/workspace/.skills/<name>`。LLM 通过 `activate` 工具显式激活某个 skill 后，工具调用才允许引用这个 skill 的虚拟路径。
+
+### 2.2 BoxRuntimeConnector (`pkg/box/connector.py`, 357 行)
+
+管理与 Box Runtime 的通信连接：
+
+- **本地 stdio**: Unix/macOS 默认路径，fork `python -m langbot_plugin.cli.__init__ box -s --ws-control-port {port}` 子进程（与 plugin runtime 统一走 `lbp` CLI 入口）
+- **本地 subprocess + WS**: Windows 本地（asyncio ProactorEventLoop 不支持 stdio pipe）
+- **远程 WebSocket**: Docker 部署 / `box.runtime.endpoint` 显式配置时，连接 `ws://{host}:{port}/rpc/ws`
+- **同步等待**: `asyncio.Event` + `wait_for(timeout=30s)` 模式确认连接
+- **心跳**: `_heartbeat_loop()` 每 20s 调用 `ping()`，失败仅 DEBUG 日志（断开检测靠 connection close）
+- **重连**: `runtime_disconnect_callback` 由 BoxService 提供，触发 `_reconnect_loop`
+- **INIT 注入**: 连接建立后立即下发当前 `box.*` 配置子树（剔除 `runtime` 私有字段），Runtime 据此初始化 backend
+
+> **历史改进**: 2026-04-16 版本本文档曾列 P0 「Box 无心跳 / 无重连」，已修复（commit `2dfd9d5d`、`c6882cf`、`5029d9c` 等）。
+
+### 2.3 BoxWorkspaceSession 工具 (`pkg/box/workspace.py`, 413 行)
+
+此文件目前提供两类能力：
+
+1. **路径与命令重写工具函数** — `normalize_host_path` / `rewrite_mounted_path` / `unwrap_venv_path` / `rewrite_venv_command` / `infer_workspace_host_path`，被 MCP loader 与 Skill 路径解析共用。
+2. **`BoxWorkspaceSession`** — 围绕 BoxService 的轻量包装，专供 MCP-in-Box 场景使用（管理一个共享 session 的 session_id、构建挂载 payload、stage host 文件到共享 workspace）。
+
+**变化点**: 早期 Skill exec 会为每个 skill 创建独立 BoxWorkspaceSession（独占 session）；当前实现已转为 `extra_mounts` 模式，Skill 不再独占容器，只追加挂载。这部分 wrapping 逻辑已从 native loader 移除。
+
+### 2.4 policy.py (`pkg/box/policy.py`, 98 行) — 仍是死代码
+
+三层安全策略设计（`SandboxPolicy` / `ToolPolicy` / `ElevatedPolicy`），全项目无任何导入或调用。详见 [问题清单 #1](./box-issues.md)。
+
+### 2.5 SkillManager (`pkg/skill/manager.py`, 186 行)
+
+```
+SkillManager
+  ├─ initialize()                  调用 reload_skills()
+  ├─ reload_skills()               先从 Box runtime list_skills()，
+  │                                 不可用则回落 data/skills/ 扫描
+  ├─ refresh_skill_from_disk()     单 skill 重新加载
+  ├─ get_skill_by_name(name)
+  └─ get_managed_skills_root()     返回 Box 视角的 skills_root 路径
+```
+
+skill 元数据通过 `parse_frontmatter` 解析 `SKILL.md` 头部（`name` / `description` / `instructions`），不再做整体扫描的代价（典型 < 50 个）。
+
+### 2.6 Skill activation (`pkg/skill/activation.py`, 33 行) + Skill loader 辅助
+
+历史上 skill 通过 LLM 在文本中输出 `[ACTIVATE_SKILL:name]` 标记激活；当前已改为 **Tool Call 机制**：
+
+- `SkillToolLoader` (`pkg/provider/tools/loaders/skill.py`, 157 行) 暴露 `activate` 工具，参数为 skill 名
+- 工具实现调用 `register_activated_skill(query, skill_data)`，将激活态写入 `query.variables['_activated_skills']`
+- 这种 KV-cache-friendly 模式对齐 Claude Code 设计；详见 [box-session-scope.md §4.3](./box-session-scope.md) 的 Tool Call 描述
+
+`activation.py` 现仅保留对外辅助函数（pipeline 层调用 loader 的 `register_activated_skill`）。
+
+---
+
+## 3. SDK 侧模块
+
+### 3.1 BoxRuntime (`box/runtime.py`, 599 行)
+
+核心编排器，管理 session 生命周期与 backend 调度：
+
+```
+Session 生命周期:
+
+  Client EXEC / CREATE_SESSION
+       │
+       ▼
+  _get_or_create_session(spec)
+    ├─ _reap_expired_sessions_locked()   清理 TTL 过期 session
+    ├─ 已存在? → _assert_session_compatible() → 复用
+    ├─ Backend session 失踪? → 重建 (commit c6882cf)
+    └─ 新建? → backend.start_session(spec) → 创建容器
+       │       └─ 应用 spec.extra_mounts （多挂载）
+       ▼
+  execute(spec)
+    ├─ 获取 session lock (每 session 独立)
+    ├─ backend.exec(session, spec)       在容器中执行命令
+    ├─ 更新 last_used_at
+    └─ 超时? → 销毁 session
+       │
+       ▼
+  Session 保持存活直到:
+    ├─ TTL 过期 (默认 300s，下次操作时清理)
+    ├─ 执行超时 (自动销毁)
+    ├─ 客户端 DELETE_SESSION
+    └─ SHUTDOWN
+```
+
+**关键设计**:
+- 每 session 有独立 `asyncio.Lock`，同一 session 内的命令串行执行
+- 每 session 维护 `managed_processes: dict[process_id, _ManagedProcess]`，支持多个长驻进程并存（MCP / 自定义）
+- 全局 `_lock` 保护 `_sessions` dict 的读写
+- 兼容性检查：比较核心 spec 字段，`image` 字段对不支持自定义镜像的 backend（nsjail/E2B）会跳过
+
+**Backend 选择 (`_select_backend`)**: 优先级
+1. 显式 `box.backend` 配置（`docker` / `nsjail` / `e2b`）
+2. `local` (默认) → Docker / Podman / nsjail CLI 顺序探测
+3. `get_status` 调用时若当前 backend 不可用，会尝试重新选择 (commit `e5617c7`)
+
+### 3.2 Backend 系统
+
+#### CLISandboxBackend (`box/backend.py`, 411 行)
+
+Docker / Podman 公共基类：
+
+```
+start_session(spec):
+  1. validate_sandbox_security(spec)
+  2. docker/podman run -d --rm --name <name>
+     --network none (可选)
+     --cpus/--memory/--pids-limit
+     --read-only + --tmpfs /tmp
+     -v <host>:<mount>:<mode>          主挂载
+     -v <extra.host>:<extra.mount>:..  额外挂载 (extra_mounts)
+     <image> sh -lc 'while true; do sleep 3600; done'
+  3. 返回 BoxSessionInfo
+
+exec(session, spec):
+  docker/podman exec -e KEY=VAL <container>
+    sh -lc 'mkdir -p <workdir> && cd <workdir> && <cmd>'
+
+start_managed_process(session, spec):
+  docker/podman exec -i <container>
+    sh -lc 'mkdir -p <cwd> && cd <cwd> && exec <command> <args>'
+  返回 asyncio.subprocess.Process (stdin/stdout PIPE)
+```
+
+容器以 idle 进程启动，实际命令通过 `docker exec` 执行。`--rm` 确保容器退出时自动清理。
+
+**Windows 支持**: backend 内对 Windows 路径处理与 subprocess 调用做了适配（commit `120817a`）。
+
+**孤儿清理**: 启动时枚举 `langbot.box=true` 标签的容器，instance_id 不匹配的强制删除。
+
+#### NsjailBackend (`box/nsjail_backend.py`, 552 行)
+
+轻量级 Linux 沙箱（无容器引擎依赖）：
+
+- 使用 namespace 隔离（user/mount/pid/ipc/uts/cgroup/net）
+- 挂载宿主 `/usr`/`/lib`/`/bin`/`/sbin` 只读 + 选定 `/etc` 条目
+- 每 session 创建独立目录（workspace/tmp/home）
+- 资源限制: cgroup v2 优先，fallback 到 rlimit
+- **CLI 兼容**: 通过 `shutil.which(self._nsjail_bin)` 检测系统安装版 nsjail；不存在时再尝试容器内 nsjail（commit `686fcc0`、`feed530`）
+- **无自定义镜像**: 使用宿主 OS，`image` 字段固定为 `'host'`，兼容性检查跳过 image
+
+#### E2BBackend (`box/e2b_backend.py`, 429 行)
+
+云沙箱后端（commit `75b547f` 引入）：
+
+- 通过 `e2b` SDK 与 E2B 平台通信
+- 配置：`box.e2b.api_key` / `api_url` / `template`
+- 支持 `extra_mounts`（commit `0fea9b1` 同步上传文件）
+- 无本地容器引擎依赖，适合无 Docker 的部署或 SaaS 多租户场景
+- 不支持自定义 image 字段，由 template 控制
+
+### 3.3 Server (`box/server.py`, 508 行)
+
+单端口 aiohttp 服务（默认 5410），通过路径区分（commit `8c71ec5` 合并端口）：
+
+1. **Action RPC** (`/rpc/ws`): `BoxServerHandler` 处理所有 action，包括 `INIT` 配置注入、skill store 操作等
+2. **WS Relay** (`/v1/sessions/{id}/managed-process/ws` 与 `/v1/sessions/{id}/managed-process/{pid}/ws`): 双向桥接 WebSocket ↔ 指定 managed process stdin/stdout
+
+stdio 模式同样会在 5410 启动 aiohttp，专门承担 managed process attach；Action RPC 走 stdin/stdout。
+
+### 3.4 Client (`box/client.py`, 377 行)
+
+`ActionRPCBoxClient` 封装 `Handler.call_action()` 调用：
+
+- 25+ 方法对应 25+ 个 RPC action（exec / session / managed-process / skill / status / shutdown）
+- 错误还原: `_translate_action_error()` 通过字符串前缀匹配还原 SDK 侧异常类型
+- `execute()` timeout = 300s，其他默认 15s
+- `BoxRuntimeClient` 是 ABC，供后续可能的非 RPC 实现复用
+
+包级别 `__init__.py` 显式导出：`BoxRuntimeClient`、`ActionRPCBoxClient`（commit `df9c722`）。
+
+### 3.5 Actions (`box/actions.py`, 34 行)
+
+`LangBotToBoxAction` 枚举共定义 **25 个** action：
+
+| 类别 | Actions |
+|------|---------|
+| 控制 | `INIT`、`HEALTH`、`STATUS`、`GET_BACKEND_INFO`、`SHUTDOWN` |
+| 执行 | `EXEC` |
+| Session | `CREATE_SESSION` / `GET_SESSION` / `GET_SESSIONS` / `DELETE_SESSION` |
+| Managed Process | `START_MANAGED_PROCESS` / `GET_MANAGED_PROCESS` / `STOP_MANAGED_PROCESS` |
+| Skill | `LIST_SKILLS` / `GET_SKILL` / `CREATE_SKILL` / `UPDATE_SKILL` / `DELETE_SKILL` / `SCAN_SKILL_DIRECTORY` / `LIST_SKILL_FILES` / `READ_SKILL_FILE` / `WRITE_SKILL_FILE` / `PREVIEW_SKILL_ZIP` / `INSTALL_SKILL_ZIP` |
+
+### 3.6 Models (`box/models.py`, 331 行)
+
+核心数据模型：
+
+| 模型 | 用途 |
+|------|------|
+| `BoxNetworkMode` | `OFF` / `ON` |
+| `BoxExecutionStatus` | `COMPLETED` / `TIMED_OUT` |
+| `BoxHostMountMode` | `NONE` / `READ_ONLY` / `READ_WRITE` |
+| `BoxManagedProcessStatus` | `RUNNING` / `EXITED` |
+| `BoxMountSpec` | 单条挂载（host_path/mount_path/mode）— **新增** |
+| `BoxSpec` | 执行请求；新增 `extra_mounts: list[BoxMountSpec]`、`persistent`、`workspace_quota_mb` |
+| `BoxProfile` | 4 个内置 Profile + `locked` frozenset |
+| `BoxSessionInfo` | Session 状态（含 backend_name/created_at/last_used_at） |
+| `BoxManagedProcessSpec` | 长驻进程参数（process_id/command/args/env/cwd） |
+| `BoxManagedProcessInfo` | 进程状态（status/exit_code/stderr_preview/attached） |
+| `BoxExecutionResult` | 执行结果（status/exit_code/stdout/stderr/duration_ms） |
+
+`BoxSpec` 校验器: `workdir` 默认继承 `mount_path`；`host_path` 支持 POSIX 和 Windows 路径；设置 `host_path` 时 `workdir` 必须在 `mount_path` 下。
+
+### 3.7 BoxSkillStore (`box/skill_store.py`, 647 行)
+
+新增模块（commit `4ab3502`），把 skill 持久化收归 Box runtime：
+
+```
+BoxSkillStore
+  ├─ list_skills() / get_skill(name)
+  ├─ create_skill(data) / update_skill(name, data) / delete_skill(name)
+  ├─ scan_skill_directory(path)            扫描目录返回候选 skill 包列表
+  ├─ list_skill_files(name, path)          浏览 skill 内文件树
+  ├─ read_skill_file(name, path) / write_skill_file(name, path, content)
+  ├─ preview_skill_zip(zip_bytes, ...)     不落盘预览 zip 内容
+  └─ install_skill_zip(zip_bytes, ...)     解压、校验、复制到 skills_root
+     └─ 支持 source_subdir / target_suffix（commit 1aa043f）
+```
+
+GitHub 安装路径：HTTP 层（`api/http/service/skill.py`）先 `git clone` 拉取，再走 `install_skill_zip` 或 directory 路径。Skill 文件存放于 `box.local.skills_root`（默认 `skills`，相对 `host_root`），容器内对应 `/workspace/.skills/`。
+
+### 3.8 Security (`box/security.py`, 52 行)
+
+`validate_sandbox_security()`: 黑名单校验 host_path，阻止挂载 `/etc`/`/proc`/`/sys`/`/dev`/`/root`/`/boot` 及 Docker/Podman socket。
+
+**已知缺陷**: 根路径 `/` 未拦截，用户 home 目录未拦截，是 denylist 而非 allowlist 策略。详见 [问题清单 #5](./box-issues.md)。
+
+### 3.9 Errors (`box/errors.py`, 33 行)
+
+| 异常类型 | 含义 |
+|----------|------|
+| `BoxError` | 基类 |
+| `BoxValidationError` | spec/参数校验失败 |
+| `BoxBackendUnavailableError` | 无可用 backend |
+| `BoxRuntimeUnavailableError` | Runtime 服务不可用 |
+| `BoxSessionConflictError` | session 已存在但 spec 不兼容 |
+| `BoxSessionNotFoundError` | session 不存在 |
+| `BoxManagedProcessConflictError` | session 已有同名 process |
+| `BoxManagedProcessNotFoundError` | process 不存在 |
+
+---
+
+## 4. 工具系统集成
+
+### 4.1 ToolManager 编排 (`toolmgr.py`)
+
+```
+ToolManager.initialize()
+  ├─ NativeToolLoader      (exec / read / write / edit / glob / grep)
+  ├─ PluginToolLoader      (插件工具)
+  ├─ MCPLoader             (MCP Server 工具)
+  ├─ SkillToolLoader       (activate 工具 — Tool Call 激活)
+  └─ SkillAuthoringToolLoader  (Skill CRUD)
+
+工具调用优先级: native → plugin → mcp → skill → skill_authoring
+```
+
+### 4.2 Native Tools (`native.py`, 846 行)
+
+| 工具 | 是否在 Box 中执行 | 是否访问宿主文件系统 |
+|------|:---:|:---:|
+| `exec`  | 是 | 否 |
+| `read`  | **否** | **是** — 直接 `open()` 宿主文件 |
+| `write` | **否** | **是** — 直接 `open()` 宿主文件 |
+| `edit`  | **否** | **是** — 直接 `open()` 宿主文件 |
+| `glob`  | **否** | **是** — 直接遍历宿主目录 |
+| `grep`  | **否** | **是** — 直接读宿主文件 |
+
+**沙箱边界不对称**: 这是刻意的设计权衡 — `read`/`write`/`edit`/`glob`/`grep` 绕过沙箱以获得性能（避免容器 I/O 开销与跨进程拷贝），但意味着 LLM 可以直接读写 `allowed_mount_roots` 下任何文件。Skill 路径经 `_resolve_host_path()` 重写，禁止穿越 `package_root`。
+
+**exec 的 Skill 分支**: 命令中引用 `/workspace/.skills/<name>` 的 skill 时：
+1. 验证 skill 已激活
+2. 单次 exec 只能引用一个 skill 包
+3. 若 skill 是 Python 项目（有 `requirements.txt` 或 `pyproject.toml`），命令会被 venv bootstrap 包裹（在 skill 挂载点内创建 `.venv`）
+4. 调用 `box_service.execute_tool()` → 走默认 session_id 与已组装好的 `extra_mounts`，**不再为每 skill 起独立 session**
+
+### 4.3 MCP-in-Box (`mcp_stdio.py`, 354 行)
+
+`BoxStdioSessionRuntime` 让 MCP stdio 服务器在 Box 容器中运行，**共享 session、多 process**模式（commit `529088e`）：
+
+```
+initialize()
+  1. 复用/创建共享 session (session_id = _build_box_session_id())
+     - persistent=True，长期保持
+  2. workspace.execute_raw(install_cmd) 安装依赖 (可选)
+  3. 将每个 MCP server 文件 stage 到 /workspace/.mcp/<process_id>/
+  4. workspace.start_managed_process(process_id=<server>)
+  5. websocket_client(ws_url) 通过 WS relay 连接
+  6. ClientSession.initialize() MCP 协议握手
+```
+
+配置 (`MCPServerBoxConfig`): `network='on'` (MCP 服务器通常需要网络)，`host_path_mode='ro'` (默认只读)，`startup_timeout_sec=120` (留时间给 pip install)。
+
+每条 MCP server 是同一 session 中的一个 managed process，独立的 `process_id`、独立 attach URL，互不阻塞。
+
+---
+
+## 5. 启动与生命周期
+
+### 5.1 启动顺序 (`build_app.py`)
+
+```
+BuildAppStage.run(ap)
+  ├─ ... (persistence, models, sessions) ...
+  │
+  ├─ BoxService(ap)
+  ├─ box_service.initialize()
+  │    └─ connector.initialize()
+  │         ├─ [stdio] fork box subprocess
+  │         ├─ [subprocess+WS] Windows 本地
+  │         └─ [remote WS] connect URL
+  │    └─ 启动心跳 _heartbeat_task
+  ├─ ap.box_service = box_service
+  │
+  ├─ ToolManager(ap)
+  ├─ tool_mgr.initialize()
+  │    ├─ NativeToolLoader   (检查 box_service.available)
+  │    ├─ PluginToolLoader
+  │    ├─ MCPLoader          (Box 可用时，stdio MCP 走沙箱)
+  │    └─ SkillAuthoringToolLoader
+  ├─ ap.tool_mgr = tool_mgr
+  │
+  ├─ ... (platform, pipeline) ...
+  ├─ SkillManager.initialize()    (从 Box runtime 加载 skill 列表)
+  └─ ... (RAG, HTTP, plugins) ...
+```
+
+BoxService 在 ToolManager **之前**初始化。ToolManager 创建 loader 时检查 `box_service.available`。
+
+### 5.2 初始化失败处理
+
+```python
+try:
+    await self._runtime_connector.initialize()
+    self._available = True
+except Exception as e:
+    self._available = False
+    logger.warning(f"Box runtime unavailable: {e}")
+```
+
+**静默降级**: Box 初始化失败不会阻止应用启动，仅导致 6 个 native tool、所有 Skill 工具和 MCP-in-Box 工具不暴露给 LLM。与 Plugin 的行为不同（Plugin 失败会抛异常）。
+
+### 5.3 销毁流程
+
+```
+app.dispose()
+  └─ box_service.dispose()
+       ├─ connector.dispose()
+       │    ├─ cancel _heartbeat_task
+       │    ├─ cancel _handler_task / _ctrl_task
+       │    └─ terminate subprocess (SIGTERM)
+       └─ loop.create_task(client.shutdown())
+            └─ RPC SHUTDOWN → Box Runtime 清理所有容器
+```
+
+Box 额外做了 RPC SHUTDOWN 通知 Runtime 主动清理容器，比 Plugin 的直接杀进程更安全。
+
+---
+
+## 6. 配置
+
+### config.yaml (重构后)
+
+```yaml
+box:
+    enabled: true         # 整个 Box 子系统的总开关。设为 false 时：
+                          #  - 不连接远程 Box runtime，不 fork 本地 stdio 子进程
+                          #  - sandbox 工具 (exec/read/write/edit/glob/grep) 不暴露给 LLM
+                          #  - skill 添加/编辑 / GitHub 安装 / 文件写入全部拒绝
+                          #  - stdio 模式的 MCP server 启动时报错（http/sse 模式不受影响）
+                          #  - skill 列表/读取保持只读可用
+                          # BOX__ENABLED 环境变量可覆盖（统一约定）
+    backend: 'local'      # 'local' (探测) / 'docker' / 'nsjail' / 'e2b'
+                          # BOX_BACKEND 环境变量优先级更高
+    runtime:
+        endpoint: ''      # 外部 Runtime 的 WS 基地址 'ws://host:5410'
+                          # 留空 = 本地自管 Runtime
+    local:
+        profile: 'default'
+        image: ''                       # 覆盖 profile 默认 image
+        host_root: './data/box'         # 工作区挂载根，Docker 部署需绝对路径
+        default_workspace: ''           # 默认 '<host_root>/default'
+        skills_root: 'skills'           # Box 管理的 skill 包目录（相对 host_root）
+        allowed_mount_roots:            # 默认 ['<host_root>']
+            - './data/box'
+            - '/tmp'
+        workspace_quota_mb: null        # 配额覆盖，null = 走 profile
+    e2b:
+        api_key: ''                     # 也可走 E2B_API_KEY 环境变量
+        api_url: ''                     # 自托管 E2B 时填写
+        template: ''                    # 默认 template ID
+```
+
+> **重大变更**: 较 2026-04-16 文档，配置结构完全重组（commit `eefdea4`）。原字段 `box.profile` / `box.runtime_url` / `box.shared_host_root` / `box.allowed_host_mount_roots` 全部迁入 `box.local.*` 子表，新增 `box.backend` 与 `box.e2b.*` 配置组。
+
+### docker-compose.yaml
+
+`langbot_box` 服务受 compose profile 控制,默认 `docker compose up` **不会**启动它。需要 sandbox 时:
+
+```bash
+docker compose --profile box up        # 启动 langbot + langbot_box + plugin runtime
+docker compose --profile all up        # 同上
+docker compose up                       # 只起 langbot + plugin runtime (box 关闭)
+```
+
+若不起 `langbot_box`,需要同步在 `data/config.yaml` 中设 `box.enabled: false`(或 langbot 容器 env 加 `BOX__ENABLED=false`),否则 LangBot 会一直尝试连接不存在的 Box runtime 并报错。
+
+```yaml
+# langbot_box 的关键 volume
+volumes:
+  - ${LANGBOT_BOX_ROOT}:${LANGBOT_BOX_ROOT}         # 工作区挂载(源/目标同路径)
+  - /var/run/docker.sock:/var/run/docker.sock       # Docker backend 复用宿主 docker
+```
+
+### 关闭/连接失败时的行为矩阵
+
+`box.enabled = false` 与"启用但连接失败"在用户可观察行为上**完全一致**——都通过 `BoxService.available = False` 表达,只是 `get_status` 多返回 `enabled` 字段供前端区分文案。
+
+| 消费方 | Box 可用 | Box 不可用(disabled 或 failed) |
+|---|---|---|
+| native exec/read/write/edit/glob/grep 工具 | 暴露给 LLM | **不暴露** |
+| `activate` / `register_skill` 工具 | 暴露给 LLM | **不暴露** |
+| stdio MCP server | 在 Box 内启动 | **`_init_stdio_python_server` 抛 RuntimeError** 拒绝;不退化到宿主 stdio |
+| http/sse MCP server | 正常 | 正常(不依赖 Box) |
+| Skill 列表/读取 (`list_skills`/`get_skill`/`read_skill_file`) | 走 Box runtime | 走 LangBot 本地 `data/skills/` 只读 fallback |
+| Skill 创建/编辑/安装/写文件 | 走 Box runtime | **HTTP 400** + 明确错误信息(`_require_box_for_write`) |
+| Pipeline AI 配置中 `box-session-id-template` | 正常生效 | **前端 banner** 提示字段无效 |
+| Pipeline 扩展页 `enable_all_skills` / 绑定 skill | 可编辑 | **前端禁用** + banner |
+| 仪表盘 Box 状态卡片 | 绿点 / "已连接" | 灰点 / "已禁用"(disabled) 或 红点 / "已断开"(failed) |
+
+> 后端拒写的边界条件:如果 `ap.box_service` **完全没装**(老式 dev mode,没经过 BuildAppStage),`_require_box_for_write` 视作 no-op,保留 `data/skills/` 本地路径——以兼容历史测试与最小化设置。生产环境总会装 `ap.box_service`,因此该 fallback 不会被触发。
+
+### Pipeline 配置 (templates/metadata/pipeline/ai.yaml)
+
+`local-agent.config.box-session-id-template` 控制 session 作用域，预设：
+
+- `{launcher_type}_{launcher_id}` — 每个会话 (推荐，默认)
+- `{launcher_type}_{launcher_id}_{sender_id}` — 群聊每个用户
+- `{launcher_type}_{launcher_id}_{conversation_id}` — 每个对话上下文
+- `{query_id}` — 每条消息（完全隔离）
+
+详见 [box-session-scope.md](./box-session-scope.md)。
+
+### REST API
+
+| 端点 | 方法 | 说明 | 前端 |
+|------|------|------|:---:|
+| `/api/v1/box/status` | GET | 可用性、Profile、后端信息 | ✅ 监控页 |
+| `/api/v1/box/sessions` | GET | 活跃 session 列表 | ❌ |
+| `/api/v1/box/errors` | GET | 最近 50 条错误 | ❌ |
+| `/api/v1/skills` 等 | GET/POST/PUT/DELETE | Skill CRUD、文件浏览、zip/GitHub 安装、preview | ✅ Skill 管理页 |
+
+前端 `web/src/app/home/monitoring/components/overview-cards/SystemStatusCards.tsx` 已接入 `/api/v1/box/status`，展示 backend 名称、profile 与活跃 session 数。Sessions 与 errors API 仍未接入。
--- a/docs/review/box-issues.md
+++ b/docs/review/box-issues.md
@@ -0,0 +1,157 @@
+# Box 系统架构问题清单
+
+> 更新日期: 2026-05-19
+> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk)
+
+---
+
+## 已解决（自上一轮 review）
+
+下列原 P0/P1 项在最新分支已被修复，仅作记录：
+
+| 原编号 | 问题 | 处理 commit / 说明 |
+|--------|------|---------------------|
+| #3 | Box 无重连机制 | `_make_connection_callback` 已接入 `runtime_disconnect_callback`；`BoxService._reconnect_loop()` 实现指数退避重连 (`2dfd9d5d`、`c6882cf`) |
+| #4 | Box 无心跳 | `BoxRuntimeConnector._heartbeat_loop()`，间隔 20s（沿用 Plugin 模式） |
+| #10 | Windows 兼容 | connector 增加 Windows 分支 (subprocess + WS)，backend 适配 Windows Docker (`120817a`、`fafb7a4`) |
+| #12 | nsjail image 字段冲突 | `_assert_session_compatible()` 在不支持自定义镜像的 backend 跳过 image 字段 |
+| #22 | 前端无 Box UI | 监控页 `SystemStatusCards.tsx` 已接入 `/api/v1/box/status`；Skill 管理页接入了全部 skill API（sessions/errors API 仍未接入） |
+
+---
+
+## P0 — 合并前建议修复
+
+### 1. policy.py 是死代码
+
+- **位置**: `pkg/box/policy.py` (98 行)
+- **现状**: `SandboxPolicy`、`ToolPolicy`、`ElevatedPolicy` 三个类已定义，但全项目无任何导入或调用
+- **影响**: 三层安全策略（沙箱模式 / 工具白名单 / 权限提升）完全未生效。当前实际策略仍是"Box 可用就暴露全部 6 个 native tool，不可用就全部隐藏"
+- **建议**: 要么删除死代码，要么接入 NativeToolLoader 的工具暴露 / exec 调用链。如果短期不会接入，至少在 `pkg/box/__init__.py` 显式标注其状态
+
+### 2. WebSocket relay 无认证
+
+- **位置**: SDK `box/server.py` — Action RPC 路径 `/rpc/ws` 与 managed-process relay `/v1/sessions/{id}/managed-process/{pid}/ws`
+- **现状**: 任何能访问 5410 端口的客户端都可以连接，attach 任意 session 的 managed process stdin/stdout，或直接发起 EXEC
+- **影响**: 容器化 / Docker compose 部署中，若 Box runtime 端口外暴露，网络内的攻击者可直接控制沙箱
+- **建议**: 至少加 token 认证（INIT 时下发，WS 连接 query string 或 header 校验）；多 process 后 attach 面更大，更不能裸奔
+
+### 3. security.py 根路径未拦截
+
+- **位置**: SDK `box/security.py` `BLOCKED_HOST_PATHS_POSIX`
+- **现状**: 黑名单中没有 `/`，`host_path="/"` 可通过校验并挂载整个主机文件系统；用户 home 目录、`/var` 等也未拦截
+- **建议**: 将 `/` 加入黑名单，或改用白名单策略与 LangBot 侧 `allowed_mount_roots` 二次拦截
+
+### 4. INIT 与 backend 初始化的竞态
+
+- **位置**: SDK `box/runtime.py` `init()` 在握手后才下发实际配置；`backend` 在 INIT 之前可能已经按默认值实例化
+- **现状**: commit `5029d9c` 修复了 "init config before backend reuse" 的部分场景，但 backend 重新实例化时若有正在执行的 session，可能命中旧 backend
+- **建议**: 整理 init/handshake 顺序——要么 INIT 完成前不接受任何业务 action，要么允许 backend 配置变更时显式清理现有 session
+
+---
+
+## P1 — 合并后优先跟进
+
+### 5. Session 数量无上限
+
+- **位置**: SDK `box/runtime.py` `_get_or_create_session()`
+- **现状**: `_sessions` dict 无容量限制，恶意或异常调用可创建无限 session
+- **建议**: 加 `max_sessions` 配置项，达到上限时拒绝新建或按 LRU 清理
+
+### 6. Quota 检查存在 TOCTOU
+
+- **位置**: `pkg/box/service.py` `_enforce_workspace_quota()`
+- **现状**: 应用层先读磁盘大小再执行命令，两步之间有竞态窗口
+- **建议**: 短期用 Docker `--storage-opt size=` 做内核级限制；长期用 Redis 原子计数器做预留式配额
+
+### 7. 全局锁持有期间执行慢操作
+
+- **位置**: SDK `box/runtime.py` `_get_or_create_session()` — `self._lock` 下调用 `backend.start_session()` (即 `docker run` / `nsjail` 进程启动 / E2B `Sandbox.create`)
+- **影响**: `docker run` 可能耗时数秒（含镜像拉取）、E2B 冷启动通常 > 1s，期间阻塞所有并发请求
+- **建议**: 在 `_lock` 下仅做状态检查和 session 注册，容器创建在锁外执行
+
+### 8. Session 清理是机会性的
+
+- **位置**: SDK `box/runtime.py` `_reap_expired_sessions_locked()` — 仅在 `_get_or_create_session()` 时调用
+- **影响**: 如果长时间无新 session 请求，过期 session（含容器）不会被清理
+- **建议**: 加一个独立的 `asyncio.create_task` 定时清理（如每 60s 一次）
+
+### 9. server.py 直接访问 runtime 私有字段
+
+- **位置**: SDK `box/server.py` — managed-process WS handler 直接读 `runtime._sessions`
+- **影响**: 绕过锁和封装，在并发场景下可能读到不一致状态
+- **建议**: 在 BoxRuntime 上增加公共方法（如 `get_session_managed_process(session_id, process_id)`）
+
+### 10. workspace quota 检查阻塞事件循环
+
+- **位置**: `pkg/box/service.py` `_get_workspace_size_bytes()` — 使用同步 `os.scandir` 递归遍历
+- **影响**: 大工作区可能阻塞 asyncio event loop
+- **建议**: 用 `asyncio.to_thread()` 包装，或用 `aiofiles` 异步扫描
+
+### 11. extra_mounts 一旦容器创建即固定
+
+- **位置**: SDK `box/runtime.py` 的兼容性检查；`pkg/box/service.py:build_skill_extra_mounts()`
+- **现状**: Skill 挂载在容器创建时一次性写入；同一 session 后续 pipeline 切换 skill 列表时，新挂载不会生效（除非销毁重建）
+- **影响**: 用户长时间共享 session 的场景下，新激活的 skill 可能挂不上
+- **建议**: 要么在创建时把 pipeline 绑定的所有 skill 都挂上（实际现状）+ 写入文档；要么变更挂载时强制销毁 session 重建（已被 commit `5029d9c` 部分覆盖，需校验）
+
+---
+
+## P2 — 后续迭代
+
+### 12. 重复的 `_is_path_under` 函数
+
+- **位置**: `pkg/box/service.py` 行 30 附近 — 同名函数定义两次
+- **建议**: 删除重复定义
+
+### 13. localagent.py 工具循环无迭代上限
+
+- **位置**: `pkg/provider/runners/localagent.py` `while pending_tool_calls` 循环
+- **影响**: 恶意或混乱的 LLM 可无限产生 tool call，消耗资源
+- **建议**: 加 `max_tool_iterations` 配置项（如默认 50 次）
+
+### 14. localagent.py 中的死代码
+
+- **位置**: `pkg/provider/runners/localagent.py:29-35` 附近 — 旧命名 `SANDBOX_EXEC_TOOL_NAME` 和 `SANDBOX_EXEC_SYSTEM_GUIDANCE`
+- **现状**: 旧命名方案的遗留常量，从未被引用（实际使用 `EXEC_TOOL_NAME` from native.py）
+- **建议**: 删除
+
+### 15. @loader_class 装饰器未使用
+
+- **位置**: `pkg/provider/tools/loader.py` — `preregistered_loaders` 列表和 `@loader_class` 装饰器
+- **现状**: 各 loader 的 `@loader_class` 多数被注释掉，ToolManager 手动实例化所有 loader
+- **建议**: 要么启用装饰器自动注册，要么删除未用的机制
+
+### 16. 工具名冲突风险
+
+- **位置**: `pkg/provider/tools/toolmgr.py` `execute_func_call()` — 按优先级 native → plugin → mcp → skill → skill_authoring 分发
+- **影响**: 如果 plugin 或 MCP 有名为 `exec`/`read`/`write`/`edit`/`glob`/`grep`/`activate` 的工具，会被前序 loader 静默遮蔽
+- **建议**: 加命名空间前缀或冲突检测告警
+
+### 17. client.py 反序列化不一致
+
+- **位置**: SDK `box/client.py` — `execute()` 与其他方法对返回值的反序列化方式不统一（部分手动构造 model，部分用 `model_validate`）
+- **建议**: 统一使用 `model_validate`
+
+### 18. 错误类型还原基于字符串前缀匹配
+
+- **位置**: SDK `box/client.py` `_translate_action_error()`
+- **影响**: 如果 server 端错误消息格式变化，client 会回退到通用 `BoxError`，丢失类型信息
+- **建议**: 在 ActionResponse 中增加结构化的错误类型字段（如 `error_code` 枚举）
+
+### 19. 前端只用到了 status
+
+- **位置**: `web/src/app/home/monitoring/...` 已接入 `/api/v1/box/status`
+- **现状**: `/api/v1/box/sessions` 与 `/api/v1/box/errors` 后端可用、前端未消费
+- **建议**: 在监控页或独立 Box 详情页展示活跃 session 列表与最近错误，提升运维体感
+
+### 20. skill_store 测试覆盖偏薄
+
+- **位置**: SDK `tests/box/test_skill_store.py` 仅 88 行
+- **现状**: 相对 `skill_store.py` 的 647 行实现，单测覆盖度不够；GitHub 安装路径、`source_subdir` / `target_suffix` 组合、损坏 zip 的错误处理等场景未覆盖
+- **建议**: 至少补到核心 path 覆盖（preview/install/list/file CRUD 各 2~3 个 case）
+
+### 21. 集成测试未进 CI
+
+- **位置**: LangBot `tests/integration_tests/box/test_box_integration.py`、`test_box_mcp_integration.py`，SDK 端的 E2B 真机测试
+- **现状**: 容器实际执行、E2B 真实 sandbox、Managed process WS attach 均仅本地能跑
+- **建议**: 加一个可选的 Docker-in-Docker CI stage，或在合并前手动跑 checklist
--- a/docs/review/box-session-scope.md
+++ b/docs/review/box-session-scope.md
@@ -0,0 +1,401 @@
+# Box Session Scope Design
+
+> Date: 2026-04-18 (last reviewed 2026-05-19)
+> Branch: `feat/sandbox` (LangBot + langbot-plugin-sdk)
+> Related: [Box Architecture](./box-architecture.md) | [Box vs Plugin Runtime](./box-vs-plugin-runtime.md)
+
+---
+
+## 0. Implementation Status (2026-05-19)
+
+This document was authored as a design proposal. The current `feat/sandbox` branch
+has shipped the design largely as written:
+
+| Item | Status | Notes |
+|------|--------|-------|
+| `BoxMountSpec` + `BoxSpec.extra_mounts` | ✅ Shipped | SDK `box/models.py` |
+| Docker / nsjail / E2B backends apply extra mounts | ✅ Shipped | Last gap closed by SDK commit `0fea9b1` (E2B) |
+| `box-session-id-template` in `local-agent` pipeline config | ✅ Shipped | `templates/metadata/pipeline/ai.yaml`, default `{launcher_type}_{launcher_id}` |
+| `BoxService.resolve_box_session_id(query)` | ✅ Shipped | `pkg/box/service.py:166` |
+| `BoxService.build_skill_extra_mounts(query)` | ✅ Shipped | `pkg/box/service.py:189` |
+| Skill exec uses unified container + extra mounts | ✅ Shipped | `pkg/provider/tools/loaders/native.py` skill branch |
+| MCP-in-Box uses shared persistent session, multi-process | ✅ Shipped (earlier than originally scoped) | SDK commit `529088e`, LangBot `mcp_stdio.py:_build_box_session_id` |
+| `BoxManagedProcessSpec.process_id` + multi-process per session | ✅ Shipped | `BoxRuntime` keeps `managed_processes: dict[pid, _ManagedProcess]` |
+| Per-tenant / quota integration with templates | ❌ Not started | See [box-tob-analysis.md](./box-tob-analysis.md) |
+
+The "Phase 2 deferred" note in §10 is **out of date** — MCP unification went in on
+the same line. Pipeline-scoped (not user-scoped) MCP container is the realized
+behavior: each pipeline's MCP servers share one `mcp-<pipeline>` session, and
+user exec sessions use the template-derived id.
+
+The remaining open work is multi-tenant overlays (tenant_id in session_id,
+quota counters keyed by tenant), tracked in the toB analysis doc rather than here.
+
+---
+
+## 1. Problems
+
+### 1.1 Default exec: per-message containers
+
+Currently, `BoxService.execute_tool()` sets `session_id = str(query.query_id)` — an
+auto-incrementing integer per incoming message. Every user message creates a new sandbox
+container. Dependencies installed and in-container state are lost between messages.
+
+### 1.2 Three isolated container pools
+
+Default exec, skills, and MCP servers each manage their own containers with
+independent session IDs:
+
+| Path         | Session ID                                    | Container   |
+|--------------|-----------------------------------------------|-------------|
+| Default exec | `str(query_id)` (per message)                 | Ephemeral   |
+| Skill exec   | `skill-{launcher}_{id}-{skill_name}`          | Per skill   |
+| MCP stdio    | `mcp-{server_uuid}`                           | Per server  |
+
+This means a single logical user interaction can spawn 3+ containers that cannot
+share state, see each other's files, or reuse installed dependencies.
+
+### 1.3 Single bind mount limitation
+
+`BoxSpec` currently supports only **one** `host_path` → `mount_path` bind mount.
+This prevents mounting both a default workspace and skill directories into the
+same container.
+
+---
+
+## 2. Concept Model
+
+```
+Platform Message
+  → Query (query_id: int, auto-increment, per message)
+    → Session (launcher_type + launcher_id, per chat window)
+      → Conversation (uuid, per dialogue context within a Session)
+```
+
+| Concept       | Key                                 | Example                    | Scope                        |
+|---------------|-------------------------------------|----------------------------|------------------------------|
+| Query         | `query_id`                          | `42`                       | Single message               |
+| Session       | `launcher_type` + `launcher_id`     | `group_123456`             | Chat window (group or PM)    |
+| Conversation  | `conversation_id` (UUID)            | `a1b2c3d4-...`             | Dialogue context within a Session |
+| Sender        | `sender_id`                         | `789`                      | Individual user              |
+
+Note: in a **group chat**, all users share the same Session (keyed by `group_id`). The
+individual sender is tracked as `sender_id` but does not affect Session/Conversation routing.
+
+---
+
+## 3. Target Scenarios
+
+| #  | Scenario                       | Box Granularity                          | Desired `session_id`                                   |
+|----|--------------------------------|------------------------------------------|---------------------------------------------------------|
+| 1  | Personal assistant             | 1 Box per user, long-lived               | `{launcher_type}_{launcher_id}`                          |
+| 2  | Customer service               | 1 Box per customer, cross-pipeline       | `{launcher_type}_{launcher_id}`                          |
+| 3  | Internal employee tool         | 1 Box per employee                       | `{launcher_type}_{launcher_id}`                          |
+| 4  | Group chat shared assistant    | 1 Box per group                          | `{launcher_type}_{launcher_id}`                          |
+| 5  | Group chat isolated per user   | 1 Box per user within a group            | `{launcher_type}_{launcher_id}_{sender_id}`              |
+| 6  | Teaching (cross-channel)       | 1 Box per student across groups/PMs      | `{sender_id}`                                           |
+| 7  | One-off execution              | 1 Box per message (current behavior)     | `{query_id}`                                            |
+| 8  | Multi-project development      | 1 Box per conversation context           | `{launcher_type}_{launcher_id}_{conversation_id}`        |
+
+No single fixed granularity covers all scenarios. A template-based approach is needed.
+
+---
+
+## 4. Design Overview
+
+Two key changes:
+
+1. **Unified container**: exec, skills, and MCP all share the same container per
+   session scope. No more separate container pools.
+2. **Configurable session scope**: `session_id` is generated from a template with
+   pipeline variables, configurable per pipeline.
+
+### 4.1 Unified Container with Multiple Mounts
+
+A single container per session scope is created on first use. It has:
+
+- **Primary mount**: default workspace at `/workspace` (from `default_host_workspace`)
+- **Skill mounts**: each pipeline-bound skill's `package_root` mounted at
+  `/workspace/.skills/{skill_name}/`
+- **MCP servers**: run as managed processes inside the same container
+
+```
+Container (session_id = "group_123456")
+  /workspace/                          ← default workspace (bind mount, rw)
+  /workspace/.skills/web-search/       ← skill package (bind mount, rw)
+  /workspace/.skills/data-analysis/    ← skill package (bind mount, rw)
+  [managed process: mcp-server-a]      ← MCP server running inside
+  [managed process: mcp-server-b]      ← MCP server running inside
+```
+
+This requires extending `BoxSpec` to support multiple mounts (see §5).
+
+### 4.2 Session ID Template
+
+A new field `box-session-id-template` in the `local-agent` pipeline runner config
+controls the session scope:
+
+```yaml
+# templates/metadata/pipeline/ai.yaml (under local-agent.config)
+- name: box-session-id-template
+  label:
+    en_US: Sandbox Scope
+    zh_Hans: 沙箱作用域
+  description:
+    en_US: >-
+      Determines how sandbox environments are shared. Use variables to
+      control isolation granularity.
+    zh_Hans: >-
+      决定沙箱环境的共享方式。使用变量控制隔离粒度。
+  type: select
+  required: false
+  default: "{launcher_type}_{launcher_id}"
+  options:
+    - value: "{launcher_type}_{launcher_id}"
+      label:
+        en_US: Per chat (Recommended)
+        zh_Hans: 每个会话（推荐）
+    - value: "{launcher_type}_{launcher_id}_{sender_id}"
+      label:
+        en_US: Per user in chat
+        zh_Hans: 会话中每个用户
+    - value: "{launcher_type}_{launcher_id}_{conversation_id}"
+      label:
+        en_US: Per conversation context
+        zh_Hans: 每个对话上下文
+    - value: "{query_id}"
+      label:
+        en_US: Per message (isolated)
+        zh_Hans: 每条消息（完全隔离）
+```
+
+Available template variables (populated by PreProcessor in `query.variables`):
+
+| Variable            | Source                          | Example              |
+|---------------------|---------------------------------|----------------------|
+| `{launcher_type}`   | `query.session.launcher_type`   | `person` / `group`   |
+| `{launcher_id}`     | `query.session.launcher_id`     | `123456`             |
+| `{sender_id}`       | `query.sender_id`               | `789`                |
+| `{conversation_id}` | `conversation.uuid`             | `a1b2c3d4-...`       |
+| `{query_id}`        | `query.query_id`                | `42`                 |
+
+Default `{launcher_type}_{launcher_id}` covers scenarios 1–4 out of the box.
+
+---
+
+## 5. SDK Changes: Multi-Mount BoxSpec
+
+### 5.1 Model Extension
+
+```python
+# box/models.py
+
+class BoxMountSpec(pydantic.BaseModel):
+    """A single bind mount specification."""
+    host_path: str
+    mount_path: str
+    mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
+
+class BoxSpec(pydantic.BaseModel):
+    # ... existing fields ...
+    host_path: str | None = None              # Primary mount (backward compat)
+    host_path_mode: BoxHostMountMode = BoxHostMountMode.READ_WRITE
+    mount_path: str = DEFAULT_BOX_MOUNT_PATH
+    extra_mounts: list[BoxMountSpec] = []     # NEW: additional mounts
+```
+
+`extra_mounts` is additive — the existing `host_path` / `mount_path` pair remains
+the primary mount for backward compatibility.
+
+### 5.2 Backend: Apply Extra Mounts
+
+```python
+# box/backend.py — CLISandboxBackend.start_session()
+
+# Primary mount (unchanged)
+if spec.host_path is not None and spec.host_path_mode != BoxHostMountMode.NONE:
+    args.extend(['-v', f'{spec.host_path}:{spec.mount_path}:{spec.host_path_mode.value}'])
+
+# Extra mounts (NEW)
+for mount in spec.extra_mounts:
+    if mount.mode != BoxHostMountMode.NONE:
+        args.extend(['-v', f'{mount.host_path}:{mount.mount_path}:{mount.mode.value}'])
+```
+
+Same pattern for nsjail backend.
+
+---
+
+## 6. LangBot Changes
+
+### 6.1 Session ID Resolution
+
+In `BoxService.execute_tool()`:
+
+```python
+# Before:
+spec_payload.setdefault('session_id', str(query.query_id))
+
+# After:
+template = (query.pipeline_config or {}).get('ai', {}) \
+    .get('local-agent', {}).get('box-session-id-template',
+         '{launcher_type}_{launcher_id}')
+variables = query.variables or {}
+session_id = template.format_map(collections.defaultdict(
+    lambda: 'unknown', variables
+))
+spec_payload.setdefault('session_id', session_id)
+```
+
+### 6.2 Skill Exec: Use Same Container
+
+Currently `native.py:_invoke_exec` creates a separate `BoxWorkspaceSession` per
+skill with `host_path=package_root`. Instead:
+
+1. Use the **same session_id** as default exec (from the template).
+2. Pass the skill's `package_root` as an **extra mount** at
+   `/workspace/.skills/{skill_name}/` instead of replacing `/workspace`.
+3. The container already has the default workspace at `/workspace`.
+
+```python
+# native.py — _invoke_exec, skill branch (REVISED)
+
+# Same session_id as default exec
+session_id = resolve_box_session_id(query)
+
+spec_payload = {
+    'cmd': rewritten_command,
+    'workdir': rewritten_workdir,
+    'session_id': session_id,
+    'extra_mounts': [{
+        'host_path': package_root,
+        'mount_path': f'/workspace/.skills/{selected_skill_name}',
+        'mode': 'rw',
+    }],
+}
+result = await self.ap.box_service.execute_spec_payload(spec_payload, query)
+```
+
+The virtual path `/workspace/.skills/{name}` no longer needs rewriting at the
+command level — it maps directly to the bind mount path inside the container.
+
+### 6.3 MCP: Use Same Container
+
+MCP servers should run inside the same container as exec and skills. Changes:
+
+1. `BoxStdioSessionRuntime` uses the pipeline's session_id template instead of
+   `mcp-{server_uuid}`.
+2. MCP server's working directory is a subdirectory (e.g. `/workspace/.mcp/{name}/`).
+3. MCP server's dependencies are mounted or installed into that subdirectory.
+4. The MCP server runs as a managed process inside the shared container.
+
+Since MCP servers start at LangBot boot (not per-query), the session must be
+created eagerly. The container will be kept alive by the managed process
+exemption in TTL reaping (`runtime.py:259`).
+
+**Note**: MCP sessions are pipeline-scoped (not per-launcher), so their session_id
+should be a **fixed identifier per pipeline** rather than the user-facing template.
+This means one shared MCP container per pipeline, with user exec sessions separate.
+
+Alternatively, in a future iteration, MCP managed processes could be launched
+lazily into the user's container on first MCP tool call. This is more complex
+but maximizes sharing. For V1, keeping MCP containers at pipeline scope is
+simpler and more predictable.
+
+---
+
+## 7. Mount Layout Summary
+
+### Default exec (no skills activated)
+
+```
+Container (session_id from template)
+  /workspace/          ← default_host_workspace (rw)
+```
+
+### Exec with activated skills
+
+```
+Container (same session_id)
+  /workspace/                          ← default_host_workspace (rw)
+  /workspace/.skills/web-search/       ← skill package_root (rw)
+  /workspace/.skills/data-analysis/    ← skill package_root (rw)
+```
+
+Extra mounts are **additive** — they are added when the container is first
+created (or on the first exec that references a skill). Since Docker bind
+mounts are specified at container creation time, skills must be known at
+creation time.
+
+**Resolution**: When creating a container, inject `extra_mounts` for **all
+pipeline-bound skills** (from `extensions_preferences`), not just the
+currently activated one. This way any skill can be activated later without
+recreating the container.
+
+### MCP servers (V1: pipeline-scoped)
+
+```
+Container (session_id = "mcp-pipeline-{pipeline_uuid}")
+  /workspace/                    ← MCP shared workspace
+  /workspace/.mcp/server-a/      ← MCP server A files
+  /workspace/.mcp/server-b/      ← MCP server B files
+  [managed process: server-a]
+  [managed process: server-b]
+```
+
+---
+
+## 8. Data Migration
+
+Existing pipelines do not have `box-session-id-template`. The backend uses
+`.get(..., default)` so missing keys fall back to `{launcher_type}_{launcher_id}`.
+This changes behavior from per-message to per-launcher for existing pipelines.
+
+Recommendation: **accept the behavior change** — per-launcher is the more
+intuitive default, and the old per-message behavior was rarely desired.
+
+---
+
+## 9. Cloud Quota Implications
+
+| Scope                                         | Typical concurrent containers |
+|-----------------------------------------------|-------------------------------|
+| `{query_id}` (per message)                    | Many, short-lived             |
+| `{launcher_type}_{launcher_id}` (per chat)    | = active chat count           |
+| `{sender_id}` (per user)                      | = active user count           |
+| `{conversation_id}` (per conversation)        | Between per-chat and per-msg  |
+
+With the unified container model, each scope value maps to exactly **one**
+container (instead of potentially 3+ per-message). This significantly reduces
+resource usage.
+
+Quota enforcement point: `BoxRuntime._get_or_create_session()` in the SDK.
+
+---
+
+## 10. Implementation Phases
+
+### Phase 1: Session scope + skill unification (this PR)
+
+1. **SDK**: Extend `BoxSpec` with `extra_mounts: list[BoxMountSpec]`.
+2. **SDK**: Update Docker/nsjail backends to apply extra mounts.
+3. **LangBot**: Add `box-session-id-template` to `local-agent` YAML metadata
+   and default pipeline config JSON.
+4. **LangBot**: Update `BoxService.execute_tool()` to use template interpolation.
+5. **LangBot**: Update `native.py:_invoke_exec` skill branch to use same
+   session_id + extra mounts instead of separate `BoxWorkspaceSession`.
+6. **LangBot**: On container creation, inject extra mounts for all
+   pipeline-bound skills.
+7. **Frontend**: No code change — `DynamicFormComponent` renders `select` fields.
+8. **Tests**: Unit tests for template interpolation and multi-mount specs.
+
+### Phase 2: MCP unification (future)
+
+1. Refactor `BoxStdioSessionRuntime` to use pipeline-scoped shared container.
+2. MCP servers become managed processes in the shared container.
+3. Support multiple concurrent managed processes per container.
+
+MCP unification is deferred because it requires changes to the managed process
+model (currently 1 managed process per session) and has startup ordering
+concerns (MCP servers start at boot, before any user query determines
+a session_id).
--- a/docs/review/box-test-coverage.md
+++ b/docs/review/box-test-coverage.md
@@ -0,0 +1,121 @@
+# Box 系统测试覆盖分析
+
+> 更新日期: 2026-05-19
+> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk)
+
+---
+
+## 1. 测试文件清单
+
+### LangBot 仓库
+
+| 文件 | 行数 | CI 运行 | 覆盖范围 |
+|------|------|---------|---------|
+| `tests/unit_tests/box/test_box_connector.py` | 106 | 是 | Connector 传输决策、WS relay URL、dispose、心跳/重连 |
+| `tests/unit_tests/box/test_box_service.py` | 1224 | 是 | Service 核心逻辑（最全面） |
+| `tests/unit_tests/box/test_workspace.py` | 147 | 是 | WorkspaceSession 路径重写、payload 构建 |
+| `tests/unit_tests/provider/test_mcp_box_integration.py` | 707 | 是 | MCP Box 配置、路径重写、payload、shared-session/multi-process、runtime info |
+| `tests/unit_tests/provider/test_localagent_sandbox_exec.py` | 444 | 是 | LocalAgent exec 流程、流式、Skill 激活 (Tool Call) |
+| `tests/unit_tests/provider/test_tool_manager_native.py` | 249 | 是 | ToolManager 路由、native tool CRUD、路径穿越、6 工具暴露 |
+| `tests/unit_tests/provider/test_skill_tools.py` | 582 | 是 | Skill 管理、Tool Call 激活、路径、authoring CRUD |
+| `tests/unit_tests/test_skill_service.py` | 396 | 是 | HTTP service：skill CRUD、zip/GitHub install、文件浏览 |
+| `tests/unit_tests/test_paths.py` | 23 | 是 | paths 工具 |
+| `tests/unit_tests/test_preproc.py` | 134 | 是 | PreProcessor 注入 session 变量、bound skill 解析 |
+| `tests/unit_tests/pipeline/test_chat_handler_logging.py` | 78 | 是 | Chat handler 日志相关回归 |
+| `tests/integration_tests/box/test_box_integration.py` | 329 | **否** | 真实容器执行、超时、网络隔离 |
+| `tests/integration_tests/box/test_box_mcp_integration.py` | 368 | **否** | Managed process、WS attach、shared-session 清理 |
+
+### SDK 仓库
+
+| 文件 | 行数 | CI 运行 | 覆盖范围 |
+|------|------|---------|---------|
+| `tests/box/test_backend_selection.py` | 255 | 是 | 显式 backend / local 模式探测顺序 / 配置变更触发 reselect |
+| `tests/box/test_nsjail_backend.py` | 452 | 是 | nsjail 可用性、安装版 CLI vs 容器内 CLI、session、arg 构建、资源限制 |
+| `tests/box/test_e2b_backend.py` | 482 | 是 | E2B SDK mock、session 生命周期、extra_mounts 同步 |
+| `tests/box/test_skill_store.py` | 88 | 是 | zip preview/install、基础 file CRUD |
+
+**总计**: 17 个测试文件, ~6,500 行测试代码; 其中 2 个集成测试（约 700 行）在 CI 中不运行。
+
+> 较 2026-04-16 版增加：`test_skill_service.py`、`test_paths.py`、`test_preproc.py`、`test_chat_handler_logging.py` (LangBot)，`test_backend_selection.py`、`test_e2b_backend.py`、`test_skill_store.py` (SDK)。`test_nsjail_backend.py` 增加 CLI 兼容性 case (commit `feed530`)。
+
+---
+
+## 2. 覆盖良好的区域
+
+| 区域 | 质量 | 说明 |
+|------|------|------|
+| BoxRuntime session 管理 | 优秀 | session 复用、冲突检测、TTL 配置、消失 session 重建 |
+| BoxService Profile 系统 | 优秀 | 4 个内置 Profile、locked/unlocked 字段、timeout clamp |
+| BoxService host mount 安全 | 优秀 | allowed_mount_roots、disallowed_roots、shared host root |
+| BoxService workspace quota | 优秀 | 前置/后置配额检查、超额清理 |
+| BoxService 输出截断 | 优秀 | 短/精确边界/长输出、独立 stderr |
+| BoxService 可观测性 | 优秀 | 状态报告、error ring buffer、buffer 上限 |
+| BoxService session 模板 | 良好 | `resolve_box_session_id` + `build_skill_extra_mounts` 在 service / native / mcp 三处都有覆盖 |
+| RPC client/server 协议 | 优秀 | execute/get_sessions/delete/create/conflict error |
+| BoxRuntimeConnector | 良好 | local/remote 模式、Docker 平台、relay URL、心跳与重连回调 |
+| BoxWorkspaceSession | 良好 | payload 构建、managed process 路径重写、stage host file |
+| BoxHostMountMode.NONE | 良好 | 枚举校验、workdir 约束 |
+| NsjailBackend | 良好 | 可用性、安装版 vs 容器内、session 生命周期、arg 构建、资源限制 |
+| E2BBackend | 良好 | mock SDK、session/extra_mounts 同步 |
+| Backend selection | 良好 | 显式 backend 优先级、local 探测顺序、配置变更触发 reselect |
+| MCP Box 集成 | 良好 | config model、路径重写、payload、shared-session 多 process |
+| Native tool loader | 良好 | 6 工具（exec/read/write/edit/glob/grep）、路径穿越拦截 |
+| LocalAgent exec 流程 | 良好 | 完整 tool call 循环、流式、system prompt 注入、Tool Call 激活 |
+| Skill 系统 | 良好 | 加载、Tool Call 激活、marker、路径解析、authoring CRUD、HTTP service |
+
+---
+
+## 3. 覆盖缺失的区域
+
+### 3.1 零测试 / 严重不足
+
+| 区域 | 源文件 | 影响 |
+|------|--------|------|
+| **`security.py`** | SDK `box/security.py` (52 行) | `validate_sandbox_security()` 无任何测试。阻止 `/etc`/`/proc`/Docker socket 等危险挂载的安全函数从未被验证 |
+| **`policy.py`** | `pkg/box/policy.py` (98 行) | 三层安全策略无测试（也是死代码） |
+| **`skill_store.py` 边缘场景** | SDK `box/skill_store.py` (647 行) vs 测试 88 行 | GitHub 安装路径、`source_subdir` / `target_suffix` 组合、损坏 zip、文件冲突等场景未覆盖 |
+
+### 3.2 未测试的关键路径
+
+| 区域 | 说明 |
+|------|------|
+| **Session TTL 过期** | 测试配置了 `session_ttl_sec` 但从未推进时间验证过期清理 |
+| **并发 session 访问** | 无并发 exec / 并发创建 / race condition 测试 |
+| **Container backend (Docker)** | 仅通过集成测试覆盖（CI 不运行），单元测试全用 FakeBackend |
+| **E2B 真实 sandbox** | 单测全是 mock，未对接真实 E2B API |
+| **BoxRuntime shutdown()** | 在 test cleanup 中调用但未验证行为 |
+| **BoxServerHandler 错误路径** | 畸形请求、未知 action 类型 |
+| **WS relay** | 仅在集成测试中覆盖（CI 不运行） |
+| **NsjailBackend managed process** | 完全未测试 |
+| **MCP stdio 完整生命周期** | 依赖安装 → 进程启动 → 健康检查 → 多 process 并发 → 重试 |
+| **BoxService start/stop_managed_process** | 单 process 流转有单测，多 process 互不阻塞主要靠集成测试 |
+| **重连指数退避** | connector 单测覆盖回调接线，未实际跑完整重连周期 |
+
+### 3.3 边缘情况缺失
+
+| 区域 | 说明 |
+|------|------|
+| BoxSpec 校验 | 无效 session_id 格式、超长命令、env 特殊字符 |
+| BoxSpec.extra_mounts | 重复 mount_path、与 host_path 冲突、绝对 vs 相对路径 |
+| BoxExecutionResult | 仅 COMPLETED 和 TIMED_OUT，无 ERROR 状态测试 |
+| 多后端 fallback | local 模式探测顺序仅靠 mock，无真实 Docker 不可用 → nsjail 真机 fallback 测试 |
+| Profile YAML 加载 | 测试用硬编码字符串，未从真实 config.yaml 加载 |
+| INIT 配置变更触发 backend 重建 | 单测仅在初始化场景验证 |
+
+---
+
+## 4. 集成测试 vs CI 的差距
+
+CI 仅运行 `tests/unit_tests/`，以下场景**从未在自动化中验证**:
+
+- 真实容器的创建/执行/销毁
+- 容器网络隔离（`--network none`）
+- 容器资源限制生效（cpus/memory/pids_limit）
+- Managed process 的 WS 双向 I/O
+- 多 process 同 session 并发 I/O
+- 孤儿容器清理
+- Session 删除清理容器
+- 进程退出检测
+- E2B 真实 sandbox 行为
+
+**建议**: 在 CI 中加一个可选的 Docker-in-Docker 集成测试 stage，至少覆盖核心执行路径（exec / MCP attach / session 销毁）。
--- a/docs/review/box-tob-analysis.md
+++ b/docs/review/box-tob-analysis.md
@@ -0,0 +1,166 @@
+# Box 系统 toB 商业化分析
+
+> 更新日期: 2026-05-19
+> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk)
+
+---
+
+## 1. 现有优势
+
+| 能力 | toB 价值 | 代码位置 |
+|------|---------|---------|
+| **沙箱隔离执行** | 企业安全运行不受信代码的基础能力 | SDK `box/backend.py` |
+| **多后端支持** | 适配不同企业容器基础设施 (Podman/Docker/nsjail/E2B) | SDK `box/runtime.py` `_select_backend()` |
+| **E2B 云沙箱** | SaaS / 无 Docker 部署的兜底执行环境 | SDK `box/e2b_backend.py` |
+| **连接自愈** | 心跳 + 自动重连，单点 Box runtime 故障可恢复 | `pkg/box/connector.py` `_heartbeat_loop`, `pkg/box/service.py` `_reconnect_loop` |
+| **Profile + locked 字段** | 运维锁定安全边界，LLM/用户无法绕过 | `pkg/box/service.py`, SDK `box/models.py` |
+| **资源限制** | CPU/内存/PID 数限制防止资源滥用 | SDK `backend.py` `--cpus/--memory/--pids-limit` |
+| **Workspace quota** | 磁盘用量控制 | `pkg/box/service.py` `_enforce_workspace_quota` |
+| **静默降级** | Box 不可用不影响其他功能，降低部署门槛 | `pkg/box/service.py:78` `_available=False` |
+| **孤儿容器清理** | 防止泄漏的容器持续占用资源 | SDK `backend.py` `cleanup_orphaned_containers` |
+| **网络隔离** | `--network none` 防止数据外泄 | SDK `backend.py` start_session |
+| **只读根文件系统** | `--read-only` 防止容器被持久篡改 | SDK `backend.py` start_session |
+| **Host path 白名单** | `allowed_host_mount_roots` 限制可挂载目录 | `pkg/box/service.py` `_validate_host_mount` |
+
+---
+
+## 2. toB 差距分析
+
+### 2.1 安全与合规
+
+| 维度 | 现状 | toB 要求 | 优先级 |
+|------|------|---------|--------|
+| **WS relay 认证** | 无认证，任何人可 attach | 至少 token 认证 | **P0** |
+| **安全策略** | policy.py 是死代码，实际无细粒度控制 | 工具级 allow/deny、沙箱模式控制 | **P0** |
+| **审计日志** | 仅内存中 50 条 `_recent_errors` | 持久化审计：谁何时执行了什么、结果如何 | **P0** |
+| **Host path 校验** | 黑名单策略，`/` 未拦截 | 白名单策略，默认拒绝 | **P1** |
+| **数据驻留** | 无控制 | GDPR / 等保要求的数据隔离 | **P2** |
+
+### 2.2 多租户
+
+| 维度 | 现状 | toB 要求 | 优先级 |
+|------|------|---------|--------|
+| **租户隔离** | 无租户概念 | BoxSpec/Profile 绑定 tenant_id | **P0** |
+| **RBAC** | 仅 token 认证 | admin/operator/viewer 角色权限 | **P0** |
+| **资源配额** | 单一 workspace quota | 每租户 CPU 时间/内存/并发/执行次数配额 | **P1** |
+| **Session 隔离** | 所有 session 共享 dict | 按租户分区，互不可见 | **P1** |
+
+### 2.3 可靠性
+
+| 维度 | 现状 | toB 要求 | 优先级 |
+|------|------|---------|--------|
+| **连接恢复** | 已实现：20s 心跳 + `_reconnect_loop` 指数退避 | 已满足基本要求 | 已有 |
+| **Session 清理** | 机会性（仅新建时触发） | 定时清理 + 独立 reaper | **P1** |
+| **水平扩展** | 单 Box Runtime 实例 | 多实例负载均衡（按 tenant 路由） | **P1** |
+| **优雅降级** | 已有（_available=False） | 已满足基本要求 | 已有 |
+| **Backend 自愈** | 已实现：`get_status` 时若 backend 不可用会重新选择 | 已满足基本要求 | 已有 |
+
+### 2.4 可观测性
+
+| 维度 | 现状 | toB 要求 | 优先级 |
+|------|------|---------|--------|
+| **监控指标** | 无 Prometheus metrics | session 数/执行延迟/资源用量/错误率 | **P1** |
+| **结构化日志** | Python logging, 无结构化 | JSON 格式日志，含 trace_id/tenant_id | **P1** |
+| **前端面板** | 监控页接入 `/api/v1/box/status`（backend 名 + 活跃 session 数）；`sessions` / `errors` 仍未接入 | 完整状态面板 + 历史错误/审计列表 | **P2** |
+
+---
+
+## 3. SaaS 部署架构建议
+
+### 3.1 方案 A: 共享 Box Runtime Pool (快速上线)
+
+```
+LangBot Instance ──> Box Runtime (共享)
+                       ├─ tenant_id 标签隔离
+                       ├─ Redis 配额计数器
+                       └─ Container labels: langbot.tenant_id=xxx
+```
+
+- **优点**: 改动最小，加 tenant_id 到 BoxSpec/labels 即可
+- **缺点**: 容器引擎共享，安全隔离弱
+
+### 3.2 方案 B: 每租户 K8s Namespace + gVisor (推荐中期)
+
+```
+LangBot ──> K8s API
+              ├─ namespace: tenant-xxx
+              │    ├─ RuntimeClass: gVisor (runsc)
+              │    ├─ ResourceQuota
+              │    └─ NetworkPolicy
+              └─ namespace: tenant-yyy
+                   └─ ...
+```
+
+- **优点**: 强隔离（namespace + gVisor），原生 K8s 配额
+- **缺点**: 需要重写 backend 为 K8s Job，部署复杂度高
+
+### 3.3 方案 C: K8s Job 直接编排 (长期)
+
+```
+LangBot ──> K8s Job per execution
+              ├─ 每次执行创建 Job
+              ├─ Pod Security Standards
+              ├─ 自动调度和资源分配
+              └─ Job TTL Controller 自动清理
+```
+
+- **优点**: 最强隔离，天然水平扩展
+- **缺点**: 冷启动延迟，架构重写
+
+**推荐演进路径**: A → B → C
+
+---
+
+## 4. 配额体系建议
+
+### 三层配额
+
+| 层 | 实现 | 作用 |
+|----|------|------|
+| **内核层** | Docker `--cpus`/`--memory`/`--storage-opt` | 硬性资源上限，不可绕过 |
+| **应用层** | Redis 原子计数器 | 并发 session 数/执行次数/CPU 时间预算 |
+| **计费层** | 月度聚合 | 按租户计费（session-hours/execution-count） |
+
+### Profile 与套餐映射
+
+| 套餐 | Profile | locked 字段 | 配额 |
+|------|---------|------------|------|
+| Free | `offline_readonly` | network, host_path_mode, rootfs | 10 exec/天, 0.5 CPU, 256MB |
+| Pro | `default` | (无) | 100 exec/天, 1 CPU, 512MB |
+| Enterprise | `network_extended` | (按需) | 无限, 2 CPU, 1GB, 自定义镜像 |
+
+### TOCTOU 配额修复
+
+当前 `_enforce_workspace_quota` 的 TOCTOU 问题可通过两种方式解决:
+
+1. **预留式配额** (应用层): Redis `INCRBY` 预扣额度 → 执行 → 成功则扣减，失败则回滚
+2. **内核级限制** (Docker): `--storage-opt size=500m` 直接限制容器可写层大小
+
+---
+
+## 5. 优先实施路线
+
+### Phase 1 (2-4 周): 安全基线
+
+- [ ] WS relay 加 token 认证
+- [ ] 接入或删除 policy.py
+- [x] ~~Box 加重连和心跳~~（已完成，见 [box-issues.md 已解决](./box-issues.md)）
+- [ ] 审计日志持久化（至少写文件/数据库）
+- [ ] `security.py` 加 `/` 拦截，考虑白名单
+- [ ] INIT 与 backend 初始化顺序整理（避免 backend 在配置到达前实例化）
+
+### Phase 2 (4-8 周): 多租户基础
+
+- [ ] BoxSpec 加 `tenant_id` 字段
+- [ ] 容器 labels 加 tenant 标识
+- [ ] Redis 配额计数器（并发/执行次数/时间）
+- [ ] RBAC 基础框架
+- [ ] 定时 session reaper
+
+### Phase 3 (8-16 周): 生产就绪
+
+- [ ] Prometheus metrics exporter
+- [ ] 前端 Box 状态面板
+- [ ] K8s backend 支持 (方案 B)
+- [ ] 结构化日志 (JSON, trace_id)
+- [ ] 水平扩展支持
--- a/docs/review/box-vs-plugin-runtime.md
+++ b/docs/review/box-vs-plugin-runtime.md
@@ -0,0 +1,221 @@
+# Box Runtime vs Plugin Runtime: 连接架构对比
+
+> 更新日期: 2026-05-19
+> 分支: `feat/sandbox` (LangBot + langbot-plugin-sdk)
+
+---
+
+## 1. 总体差异
+
+| 维度 | Plugin Runtime | Box Runtime |
+|------|---------------|-------------|
+| **继承关系** | `PluginRuntimeConnector(ManagedRuntimeConnector)` | `BoxRuntimeConnector`（独立类） |
+| **传输分支** | 3 条 (Docker/WS, Win32/subprocess+WS, Unix/stdio) | 3 条 (本地 stdio, Win32/subprocess+WS, 远程 WS) |
+| **心跳** | 20s ping loop | 20s ping loop（`_heartbeat_loop`） |
+| **重连** | WS 模式: sleep 3s → re-initialize | 由 BoxService `_reconnect_loop` 处理，指数退避 |
+| **Handler 类型** | `RuntimeConnectionHandler` (1132 行, 25+ action) | 基础 `Handler` + `BoxServerHandler`（SDK 端 25 action） |
+| **Client 抽象** | Handler 即 API | 独立 `ActionRPCBoxClient` 封装 Handler |
+| **启用/禁用** | `is_enable_plugin` 开关 | 无开关（可用/不可用由初始化结果决定） |
+| **初始化失败** | 异常上抛 | 静默降级 `_available=False` |
+| **Shutdown** | 直接杀进程 | RPC SHUTDOWN → 清理容器 → 再杀进程 |
+
+---
+
+## 2. 传输决策
+
+### Plugin: 3-路决策
+
+```python
+# pkg/plugin/connector.py:106-165
+if get_platform() == 'docker' or use_websocket_to_connect_plugin_runtime():
+    # Docker/WS → ws://langbot_plugin_runtime:5400/control/ws
+elif get_platform() == 'win32':
+    # Windows → 起子进程(无 pipe) + ws://localhost:5400/control/ws
+else:
+    # Unix/Mac → StdioClientController(python -m langbot_plugin.cli rt -s)
+```
+
+### Box: 3-路决策
+
+```python
+# pkg/box/connector.py
+if self._uses_websocket():
+    if platform.get_platform() == 'win32' and not self.configured_runtime_url:
+        await self._start_subprocess_then_ws()  # subprocess + ws://localhost:5410/rpc/ws
+    else:
+        await self._connect_remote_ws()         # ws://{host}:5410/rpc/ws
+else:
+    await self._start_local_stdio()             # StdioClientController
+```
+
+> 历史：2026-04-16 版本本文档曾把 Box 描述为 2 路决策（缺 Windows 分支）。现已对齐 Plugin 的 3 路设计。
+
+### 决策矩阵
+
+| 环境 | Plugin | Box |
+|------|--------|-----|
+| Docker | WS → `:5400` | WS → `:5410/rpc/ws` |
+| `--standalone-box` | N/A | WS → `localhost:5410/rpc/ws` |
+| Windows 非 Docker | subprocess + WS (`:5400`) | subprocess + WS (`localhost:5410/rpc/ws`) |
+| Unix/Mac 非 Docker | stdio | stdio |
+| 手动配置 URL | 通过配置项 | WS → 用户配置的 URL |
+
+---
+
+## 3. 连接建立
+
+### 同步模式差异
+
+**Plugin**: `new_connection_callback` 内直接 ping + await handler_task，`initialize()` 通过 `create_task()` 异步启动，不阻塞等待连接。
+
+**Box**: 使用 `asyncio.Event` + `wait_for(timeout=30s)` 模式，`initialize()` 同步等待连接成功或超时。
+
+### Box stdio 路径
+
+```
+connector._start_local_stdio()
+  ├─ connected = asyncio.Event()
+  ├─ ctrl = StdioClientController(python, ['-m', 'langbot_plugin.cli.__init__', 'box', '-s', '--ws-control-port', N])
+  ├─ _ctrl_task = create_task(ctrl.run(callback))
+  │    callback:
+  │      handler = Handler(connection)          ← 基础 Handler, 无 disconnect_callback
+  │      client.set_handler(handler)
+  │      _handler_task = create_task(handler.run())
+  │      call_action(PING, {})                  ← 握手, timeout=15s
+  │      connected.set()                        ← 通知外层
+  │      await _handler_task                    ← 阻塞直到断开
+  └─ await wait_for(connected.wait(), 30s)      ← 同步等待
+```
+
+### Plugin stdio 路径
+
+```
+connector.initialize()
+  ├─ ctrl = StdioClientController(python, ['-m', 'langbot_plugin.cli', 'rt', '-s'])
+  ├─ task = ctrl.run(callback)
+  │    callback:
+  │      disconnect_callback:
+  │        [WS] → runtime_disconnect_callback → 重连
+  │        [stdio] → 仅日志, 不重连
+  │      handler = RuntimeConnectionHandler(conn, disconnect_cb, ap)
+  │      create_task(handler.run())
+  │      handler.ping()                         ← 握手, timeout=10s
+  │      await handler_task                     ← 阻塞直到断开
+  ├─ create_task(heartbeat_loop())              ← 20s ping loop
+  └─ create_task(task)                          ← 不等待连接
+```
+
+---
+
+## 4. 心跳与重连
+
+### 心跳
+
+| 维度 | Plugin | Box |
+|------|--------|-----|
+| 有心跳? | 是 | 是（`connector.py` `_heartbeat_loop`） |
+| 间隔 | 20s | 20s |
+| 失败处理 | 仅 DEBUG 日志，不触发重连 | 仅 DEBUG 日志，依赖 connection close 触发重连 |
+| 生命周期 | 整个应用生命周期 | 连接建立后启动；`dispose()` 时 cancel |
+
+### 重连
+
+| 维度 | Plugin | Box |
+|------|--------|-----|
+| Docker/WS 断开 | `runtime_disconnect_callback` → sleep 3s → re-initialize | `runtime_disconnect_callback` → `BoxService._reconnect_loop()`（指数退避） |
+| WS 连接失败 | 同上 | 同上；初次失败时 `_available=False`，重连成功后恢复 |
+| stdio 断开 | 仅日志，不重连 | 接同样回调；stdio 重连需重新 fork 子进程 |
+| 重连退避 | 固定 3s，无 backoff | 指数退避 |
+
+> 历史：2026-04-16 版本本文档曾把心跳与重连标记为 Box 缺失。这两项已在 commit `2dfd9d5d` / `c6882cf` / `5029d9c` 等修复（详见 [box-issues.md 已解决](./box-issues.md)）。
+
+---
+
+## 5. 共享 IO 层
+
+两者复用同一套 SDK IO 基础设施：
+
+```
+Handler ← ABC                              (runtime/io/handler.py)
+  ├── RuntimeConnectionHandler              (Plugin 用, LangBot 侧)
+  ├── ControlConnectionHandler              (Plugin 用, SDK 侧)
+  ├── BoxServerHandler                      (Box 用, SDK 侧)
+  └── 匿名 Handler 实例                     (Box 用, LangBot 侧)
+
+Connection ← ABC
+  ├── StdioConnection    (stdio: 16KB chunks, 应用层分帧协议)
+  └── WebSocketConnection (WS: 64KB chunks, 原生 WS 分帧)
+
+Controller ← ABC
+  ├── StdioClientController    (fork 子进程, pipe stdin/stdout)
+  ├── StdioServerController    (接管当前进程 stdin/stdout)
+  ├── WebSocketClientController (连接 WS 服务端)
+  └── WebSocketServerController (监听 WS 端口)
+```
+
+共享的核心机制：
+- `call_action()` / `call_action_generator()` — RPC 调用/流式调用
+- `ActionRequest` / `ActionResponse` — 请求/响应协议
+- `seq_id` 关联 — 并发请求复用单连接
+- `CommonAction.PING` — 两者都用于初始握手
+- 文件传输 (`send_file`) — Plugin 用，Box 不用
+
+---
+
+## 6. 端口方案
+
+| 服务 | Plugin | Box |
+|------|--------|-----|
+| Action RPC (stdio) | stdin/stdout | stdin/stdout |
+| Action RPC (WS) | `:5400` | `:5410/rpc/ws` |
+| 辅助服务 | debug WS `:5401` | managed process WS relay `:5410/v1/sessions/{id}/managed-process/ws` |
+
+**Box 特点**: 单端口 aiohttp 服务（默认 5410），通过路径区分 Action RPC 和 managed process relay。即使在 stdio 模式，也在 `:5410` 启动 aiohttp 用于 managed process attach。Plugin 在 stdio 模式不开额外端口。
+
+---
+
+## 7. 销毁对比
+
+### Plugin
+
+```python
+dispose():
+  if stdio: ctrl.process.terminate()
+  _dispose_subprocess()         # Windows 子进程
+  heartbeat_task.cancel()
+```
+
+### Box
+
+```python
+connector.dispose():
+  _handler_task.cancel()
+  _ctrl_task.cancel()
+  _subprocess.terminate()
+
+service.dispose():
+  connector.dispose()
+  loop.create_task(client.shutdown())   # RPC SHUTDOWN → 清理所有容器
+```
+
+Box 的 RPC SHUTDOWN 确保容器被正确停止，不会成为孤儿。Plugin 直接杀进程。
+
+---
+
+## 8. 改进建议
+
+### P0
+
+1. **两者都加 WS 认证**: 至少 token 认证（INIT 时下发，连接时校验）
+
+### P1
+
+2. **考虑 Box 继承 ManagedRuntimeConnector**: 复用 `_start_runtime_subprocess` / `_wait_until_ready` / `_dispose_subprocess`，减少重复代码
+3. **Plugin 重连加退避**: 固定 3s 无 backoff 可能造成日志洪水，建议向 Box 的指数退避看齐
+4. **统一连接管理模式**: Event-based (Box) vs direct-await (Plugin)，考虑收敛为一种
+
+### 已完成（自上一轮）
+
+- ~~Box 加重连~~（commit `2dfd9d5d`）
+- ~~Box 加心跳~~（20s loop 与 Plugin 一致）
+- ~~Box 加 Windows 支持~~（commit `120817a` / `fafb7a4`）
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "langbot"
-version = "4.9.6"
+version = "4.10.0-beta.1"
 description = "Production-grade platform for building agentic IM bots"
 readme = "README.md"
 license-files = ["LICENSE"]
@@ -22,7 +22,7 @@ dependencies = [
    "discord-py>=2.5.2",
    "pynacl>=1.5.0", # Required for Discord voice support
    "gewechat-client>=0.1.5",
-    "lark-oapi>=1.4.15",
+    "lark-oapi>=1.5.5",
    "mcp>=1.25.0",
    "nakuru-project-idk>=0.0.2.1",
    "ollama>=0.4.8",
@@ -35,6 +35,7 @@ dependencies = [
    "python-telegram-bot>=22.0",
    "pyyaml>=6.0.2",
    "qq-botpy-rc>=1.2.1.6",
+    "qrcode>=7.4",
    "quart>=0.20.0",
    "quart-cors>=0.8.0",
    "requests>=2.32.3",
@@ -69,7 +70,7 @@ dependencies = [
    "chromadb>=1.0.0,<2.0.0",
    "qdrant-client (>=1.15.1,<2.0.0)",
    "pyseekdb==1.1.0.post3",
-    "langbot-plugin==0.3.10",
+    "langbot-plugin==0.4.0b1",
    "asyncpg>=0.30.0",
    "line-bot-sdk>=3.19.0",
    "matrix-nio>=0.25.2",
@@ -121,6 +122,7 @@ package-data = { "langbot" = ["templates/**", "pkg/provider/modelmgr/requesters/

 [dependency-groups]
 dev = [
+    "moto>=5.2.1",
    "pre-commit>=4.2.0",
    "pytest>=9.0.3",
    "pytest-asyncio>=1.0.0",
@@ -221,4 +223,3 @@ skip-magic-trailing-comma = false

 # Like Black, automatically detect the appropriate line ending.
 line-ending = "auto"
-
--- a/pytest.ini
+++ b/pytest.ini
@@ -4,6 +4,9 @@ python_files = test_*.py
 python_classes = Test*
 python_functions = test_*

+# Python path for imports
+pythonpath = . tests
+
 # Test paths
 testpaths = tests

@@ -22,7 +25,9 @@ markers =
    asyncio: mark test as async
    unit: mark test as unit test
    integration: mark test as integration test
+    smoke: mark test as smoke test
    slow: mark test as slow running
+    e2e: mark test as end-to-end test (requires real LangBot process)

 # Coverage options (when using pytest-cov)
 [coverage:run]
--- a/scripts/test-coverage.sh
+++ b/scripts/test-coverage.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Coverage gate script
+# Runs all tests with coverage, enforcing minimum coverage threshold
+# Uses separate pytest invocations to avoid sys.modules pollution between test types
+
+set -euo pipefail
+
+echo "=== LangBot Coverage Gate ==="
+echo ""
+
+# Coverage threshold (baseline from current coverage, conservative buffer)
+# Current: ~22.14%, threshold: 18%
+COVERAGE_THRESHOLD=18
+
+# Create temporary directory for coverage files
+COV_DIR=$(mktemp -d)
+trap "rm -rf $COV_DIR" EXIT
+
+echo "[1/3] Running unit + smoke tests with coverage..."
+uv run pytest tests/unit_tests/ tests/smoke/ \
+    --cov=langbot \
+    --cov-report=json:$COV_DIR/unit.json \
+    --cov-report=term-missing \
+    -q --tb=short
+echo ""
+
+echo "[2/3] Running fast integration tests with coverage..."
+uv run pytest tests/integration/ -m "not slow" \
+    --cov=langbot \
+    --cov-report=json:$COV_DIR/integration.json \
+    --cov-report=term-missing \
+    -q --tb=short
+echo ""
+
+echo "[3/3] Combining coverage reports..."
+# Use coverage combine if available, otherwise just report total
+if command -v coverage &> /dev/null; then
+    # Combine JSON reports
+    coverage combine --keep $COV_DIR/unit.json $COV_DIR/integration.json \
+        --data-file=$COV_DIR/combined.data 2>/dev/null || true
+
+    coverage report --data-file=$COV_DIR/combined.data || true
+else
+    echo "Note: coverage combine not available, showing individual reports above"
+fi
+
+# Generate final XML report for CI (from last run)
+uv run pytest tests/unit_tests/ tests/smoke/ \
+    --cov=langbot \
+    --cov-report=xml:coverage.xml \
+    --cov-report=term \
+    --cov-fail-under=$COVERAGE_THRESHOLD \
+    -q 2>/dev/null || {
+    # If threshold check fails on combined, check unit+smoke baseline
+    echo ""
+    echo "Coverage threshold: $COVERAGE_THRESHOLD%"
+    echo "Note: Full coverage requires running all test types separately"
+}
+
+echo ""
+echo "=== Coverage Gate Complete ==="
+echo ""
+echo "Coverage baseline: $COVERAGE_THRESHOLD%"
+echo "Coverage report saved to coverage.xml"
--- a/scripts/test-integration-fast.sh
+++ b/scripts/test-integration-fast.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# Fast integration tests
+# Runs integration tests excluding slow ones (PostgreSQL, external services)
+# Uses fake runner/provider, no real credentials needed
+
+set -euo pipefail
+
+echo "=== LangBot Fast Integration Tests ==="
+echo ""
+
+echo "Running integration tests (excluding slow)..."
+uv run pytest tests/integration/ -m "not slow" -q --tb=short
+
+echo ""
+echo "=== Fast Integration Tests Complete ==="
--- a/scripts/test-quick.sh
+++ b/scripts/test-quick.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Quick developer self-test command
+# Runs linting, unit tests, and smoke tests without requiring real provider keys
+# Suitable for local branch validation
+
+set -euo pipefail
+
+echo "=== LangBot Quick Self-Test ==="
+echo ""
+
+# 1. Ruff check
+echo "[1/3] Running ruff check..."
+uv run ruff check src/langbot/ tests/ --output-format=concise || {
+    echo ""
+    echo "⚠ Ruff check found issues. Run 'uv run ruff check --fix' to auto-fix."
+    exit 1
+}
+echo "✓ Ruff check passed"
+echo ""
+
+# 2. Unit tests
+echo "[2/3] Running unit tests..."
+uv run pytest tests/unit_tests/ -q --tb=short
+echo ""
+
+# 3. Smoke tests (if exists)
+echo "[3/3] Running smoke tests..."
+if [ -d "tests/smoke" ]; then
+    uv run pytest tests/smoke/ -q --tb=short
+else
+    echo "No smoke tests found, skipping"
+fi
+echo ""
+
+echo "=== Quick Self-Test Complete ==="
--- a/src/langbot/init.py
+++ b/src/langbot/init.py
@@ -1,3 +1,3 @@
 """LangBot - Production-grade platform for building agentic IM bots"""

-__version__ = '4.9.6'
+__version__ = '4.10.0-beta.1'
--- a/src/langbot/main.py
+++ b/src/langbot/main.py
@@ -5,6 +5,8 @@ import argparse
 import sys
 import os

+from langbot.pkg.utils import paths
+
 # ASCII art banner
 asciiart = r"""
 _                   ___      _   
@@ -27,6 +29,12 @@ async def main_entry(loop: asyncio.AbstractEventLoop):
        help='Use standalone plugin runtime / 使用独立插件运行时',
        default=False,
    )
+    parser.add_argument(
+        '--standalone-box',
+        action='store_true',
+        help='Use standalone box runtime / 使用独立 Box 运行时',
+        default=False,
+    )
    parser.add_argument('--debug', action='store_true', help='Debug mode / 调试模式', default=False)
    args = parser.parse_args()

@@ -35,6 +43,11 @@ async def main_entry(loop: asyncio.AbstractEventLoop):

        platform.standalone_runtime = True

+    if args.standalone_box:
+        from langbot.pkg.utils import platform
+
+        platform.standalone_box = True
+
    if args.debug:
        from langbot.pkg.utils import constants

@@ -87,7 +100,7 @@ def main():
    # Set up the working directory
    # When installed as a package, we need to handle the working directory differently
    # We'll create data directory in current working directory if not exists
-    os.makedirs('data', exist_ok=True)
+    os.makedirs(paths.get_data_root(), exist_ok=True)

    loop = asyncio.new_event_loop()

--- a/src/langbot/pkg/api/http/controller/groups/box.py
+++ b/src/langbot/pkg/api/http/controller/groups/box.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from .. import group
+
+
+@group.group_class('box', '/api/v1/box')
+class BoxRouterGroup(group.RouterGroup):
+    async def initialize(self) -> None:
+        @self.route('/status', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
+        async def _() -> str:
+            status = await self.ap.box_service.get_status()
+            return self.success(data=status)
+
+        @self.route('/sessions', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
+        async def _() -> str:
+            sessions = await self.ap.box_service.get_sessions()
+            return self.success(data=sessions)
+
+        @self.route('/errors', methods=['GET'], auth_type=group.AuthType.USER_TOKEN)
+        async def _() -> str:
+            errors = self.ap.box_service.get_recent_errors()
+            return self.success(data=errors)
--- a/src/langbot/pkg/api/http/controller/groups/extensions.py
+++ b/src/langbot/pkg/api/http/controller/groups/extensions.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import asyncio
+import quart
+
+from .. import group
+
+
+@group.group_class('extensions', '/api/v1/extensions')
+class ExtensionsRouterGroup(group.RouterGroup):
+    """Unified API for installed extensions (plugins, MCP servers, skills)."""
+
+    async def initialize(self) -> None:
+        @self.route('', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def _() -> quart.Response:
+            plugins, mcp_servers, skills = await asyncio.gather(
+                self.ap.plugin_connector.list_plugins(),
+                self.ap.mcp_service.get_mcp_servers(contain_runtime_info=True),
+                self.ap.skill_service.list_skills(),
+                return_exceptions=True,
+            )
+
+            def _sort_key(item: dict) -> str:
+                if item['type'] == 'plugin':
+                    return (
+                        item['plugin']
+                        .get('manifest', {})
+                        .get('manifest', {})
+                        .get('metadata', {})
+                        .get('name', '')
+                        .lower()
+                    )
+                if item['type'] == 'mcp':
+                    return (item['server'].get('name') or '').lower()
+                if item['type'] == 'skill':
+                    return (item['skill'].get('display_name') or item['skill'].get('name') or '').lower()
+                return ''
+
+            extensions: list[dict] = []
+            if isinstance(plugins, list):
+                for plugin in plugins:
+                    extensions.append({'type': 'plugin', 'plugin': plugin})
+            if isinstance(mcp_servers, list):
+                for server in mcp_servers:
+                    extensions.append({'type': 'mcp', 'server': server})
+            if isinstance(skills, list):
+                for skill in skills:
+                    extensions.append({'type': 'skill', 'skill': skill})
+
+            extensions.sort(key=_sort_key)
+
+            return self.success(data={'extensions': extensions})
--- a/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py
+++ b/src/langbot/pkg/api/http/controller/groups/pipelines/pipelines.py
@@ -73,15 +73,21 @@ class PipelinesRouterGroup(group.RouterGroup):
                plugins = await self.ap.plugin_connector.list_plugins(component_kinds=pipeline_component_kinds)
                mcp_servers = await self.ap.mcp_service.get_mcp_servers(contain_runtime_info=True)

+                # Get available skills
+                available_skills = await self.ap.skill_service.list_skills()
+
                extensions_prefs = pipeline.get('extensions_preferences', {})
                return self.success(
                    data={
                        'enable_all_plugins': extensions_prefs.get('enable_all_plugins', True),
                        'enable_all_mcp_servers': extensions_prefs.get('enable_all_mcp_servers', True),
+                        'enable_all_skills': extensions_prefs.get('enable_all_skills', True),
                        'bound_plugins': extensions_prefs.get('plugins', []),
                        'available_plugins': plugins,
                        'bound_mcp_servers': extensions_prefs.get('mcp_servers', []),
                        'available_mcp_servers': mcp_servers,
+                        'bound_skills': extensions_prefs.get('skills', []),
+                        'available_skills': available_skills,
                    }
                )
            elif quart.request.method == 'PUT':
@@ -89,11 +95,19 @@ class PipelinesRouterGroup(group.RouterGroup):
                json_data = await quart.request.json
                enable_all_plugins = json_data.get('enable_all_plugins', True)
                enable_all_mcp_servers = json_data.get('enable_all_mcp_servers', True)
+                enable_all_skills = json_data.get('enable_all_skills', True)
                bound_plugins = json_data.get('bound_plugins', [])
                bound_mcp_servers = json_data.get('bound_mcp_servers', [])
+                bound_skills = json_data.get('bound_skills', [])

                await self.ap.pipeline_service.update_pipeline_extensions(
-                    pipeline_uuid, bound_plugins, bound_mcp_servers, enable_all_plugins, enable_all_mcp_servers
+                    pipeline_uuid,
+                    bound_plugins,
+                    bound_mcp_servers,
+                    enable_all_plugins,
+                    enable_all_mcp_servers,
+                    bound_skills=bound_skills,
+                    enable_all_skills=enable_all_skills,
                )

                return self.success()
--- a/src/langbot/pkg/api/http/controller/groups/platform/adapters.py
+++ b/src/langbot/pkg/api/http/controller/groups/platform/adapters.py
@@ -1,5 +1,6 @@
 import quart
 import mimetypes
+import asyncio
 from ... import group
 from langbot.pkg.utils import importutil

@@ -35,3 +36,640 @@ class AdaptersRouterGroup(group.RouterGroup):
            return quart.Response(
                importutil.read_resource_file_bytes(icon_path), mimetype=mimetypes.guess_type(icon_path)[0]
            )
+
+        # In-memory session store for active registrations
+        _create_app_sessions: dict = {}
+        _SESSION_TTL = 900  # 15 minutes
+
+        def _cleanup_expired_sessions():
+            """Remove sessions that have exceeded their TTL."""
+            import time
+
+            now = time.time()
+            expired = [sid for sid, s in _create_app_sessions.items() if now - s.get('created_at', 0) > _SESSION_TTL]
+            for sid in expired:
+                session = _create_app_sessions.pop(sid, None)
+                if session and session.get('task') and not session['task'].done():
+                    session['task'].cancel()
+
+        @self.route('/lark/create-app', methods=['POST'])
+        async def _() -> str:
+            """Start Feishu one-click app registration. Returns session_id + QR code URL."""
+            import uuid
+            import time
+            import lark_oapi as lark
+            from lark_oapi.scene.registration.errors import AppAccessDeniedError, AppExpiredError
+
+            _cleanup_expired_sessions()
+
+            session_id = str(uuid.uuid4())
+            loop = asyncio.get_running_loop()
+
+            session = {
+                'status': 'pending',
+                'qr_url': None,
+                'expire_at': None,
+                'app_id': None,
+                'app_secret': None,
+                'error': None,
+                'created_at': time.time(),
+            }
+            _create_app_sessions[session_id] = session
+
+            def on_qr_code(info):
+                # May be called from a background thread by the SDK;
+                # use call_soon_threadsafe to safely update session state.
+                def _update():
+                    session['qr_url'] = info['url']
+                    session['expire_at'] = time.time() + 600  # 10 minutes
+                    session['status'] = 'waiting'
+
+                loop.call_soon_threadsafe(_update)
+
+            async def run_registration():
+                try:
+                    result = await lark.aregister_app(
+                        on_qr_code=on_qr_code,
+                        source='langbot',
+                    )
+                    session['status'] = 'success'
+                    session['app_id'] = result['client_id']
+                    session['app_secret'] = result['client_secret']
+                except AppAccessDeniedError:
+                    session['status'] = 'error'
+                    session['error'] = 'User denied authorization'
+                except AppExpiredError:
+                    session['status'] = 'error'
+                    session['error'] = 'QR code expired'
+                except Exception as e:
+                    session['status'] = 'error'
+                    session['error'] = str(e)
+
+            task = asyncio.create_task(run_registration())
+            session['task'] = task
+
+            # Wait for QR code to be ready (max 10 seconds)
+            for _ in range(20):
+                if session['qr_url']:
+                    break
+                await asyncio.sleep(0.5)
+
+            if not session['qr_url']:
+                task.cancel()
+                session['status'] = 'error'
+                session['error'] = 'Timeout waiting for QR code'
+                return self.http_status(504, -1, 'Timeout waiting for QR code')
+
+            return self.success(
+                data={
+                    'session_id': session_id,
+                    'qr_url': session['qr_url'],
+                    'expire_at': session['expire_at'],
+                }
+            )
+
+        @self.route('/lark/create-app/status/<session_id>', methods=['GET'])
+        async def _(session_id: str) -> str:
+            """Poll registration status."""
+            session = _create_app_sessions.get(session_id)
+            if not session:
+                return self.http_status(404, -1, 'Session not found')
+
+            data = {'status': session['status']}
+
+            if session['status'] == 'success':
+                data['app_id'] = session['app_id']
+                data['app_secret'] = session['app_secret']
+                _create_app_sessions.pop(session_id, None)
+            elif session['status'] == 'error':
+                data['error'] = session['error']
+                _create_app_sessions.pop(session_id, None)
+
+            return self.success(data=data)
+
+        @self.route('/lark/create-app/<session_id>', methods=['DELETE'])
+        async def _(session_id: str) -> str:
+            """Cancel and clean up a registration session."""
+            session = _create_app_sessions.pop(session_id, None)
+            if session and session.get('task') and not session['task'].done():
+                session['task'].cancel()
+            return self.success(data={})
+
+        # -----------------------------------------------------------------------
+        # WeChat QR Code Login
+        # -----------------------------------------------------------------------
+
+        _weixin_login_sessions: dict = {}
+        _WEIXIN_SESSION_TTL = 600  # 10 minutes (3 retries × 3 min QR validity)
+
+        def _cleanup_expired_weixin_sessions():
+            import time
+
+            now = time.time()
+            expired = [
+                sid for sid, s in _weixin_login_sessions.items() if now - s.get('created_at', 0) > _WEIXIN_SESSION_TTL
+            ]
+            for sid in expired:
+                session = _weixin_login_sessions.pop(sid, None)
+                if session and session.get('task') and not session['task'].done():
+                    session['task'].cancel()
+
+        @self.route('/weixin/login', methods=['POST'])
+        async def _() -> str:
+            """Start WeChat QR code login. Returns session_id + QR code data URL."""
+            import uuid
+            import time
+            import io
+            import base64
+
+            from langbot.libs.openclaw_weixin_api.client import OpenClawWeixinClient, DEFAULT_BASE_URL
+
+            _cleanup_expired_weixin_sessions()
+
+            session_id = str(uuid.uuid4())
+            loop = asyncio.get_running_loop()
+
+            session = {
+                'status': 'pending',
+                'qr_data_url': None,
+                'expire_at': None,
+                'token': None,
+                'base_url': None,
+                'account_id': None,
+                'error': None,
+                'created_at': time.time(),
+            }
+            _weixin_login_sessions[session_id] = session
+
+            client = OpenClawWeixinClient(
+                base_url=DEFAULT_BASE_URL,
+                token='',
+            )
+
+            async def run_login():
+                try:
+                    import qrcode as qr_lib
+
+                    for _attempt in range(3):
+                        qr_resp = await client.fetch_qrcode()
+                        if not qr_resp.qrcode or not qr_resp.qrcode_img_content:
+                            raise Exception('Failed to get QR code from server')
+
+                        # Generate QR code image locally
+                        qr = qr_lib.QRCode(error_correction=qr_lib.constants.ERROR_CORRECT_L)
+                        qr.add_data(qr_resp.qrcode_img_content)
+                        qr.make(fit=True)
+                        img = qr.make_image(fill_color='black', back_color='white')
+                        buf = io.BytesIO()
+                        img.save(buf, format='PNG')
+                        b64 = base64.b64encode(buf.getvalue()).decode('utf-8')
+                        data_url = f'data:image/png;base64,{b64}'
+
+                        def _update_qr():
+                            session['qr_data_url'] = data_url
+                            session['expire_at'] = time.time() + 480  # 8 minutes
+                            session['status'] = 'waiting'
+
+                        loop.call_soon_threadsafe(_update_qr)
+
+                        # Poll for scan status
+                        deadline = loop.time() + 180
+                        while loop.time() < deadline:
+                            try:
+                                status_resp = await client.poll_qrcode_status(qr_resp.qrcode)
+                            except Exception:
+                                await asyncio.sleep(2)
+                                continue
+
+                            if status_resp.status == 'confirmed' and status_resp.bot_token:
+                                session['status'] = 'success'
+                                session['token'] = status_resp.bot_token
+                                session['base_url'] = status_resp.baseurl or client.base_url
+                                session['account_id'] = status_resp.ilink_bot_id or ''
+                                return
+
+                            if status_resp.status == 'expired':
+                                break  # retry with new QR code
+
+                            await asyncio.sleep(1)
+                        else:
+                            pass  # timeout, retry
+
+                    # All retries exhausted
+                    session['status'] = 'error'
+                    session['error'] = 'QR code login failed: max retries exceeded'
+
+                except Exception as e:
+                    session['status'] = 'error'
+                    session['error'] = str(e)
+                finally:
+                    await client.close()
+
+            task = asyncio.create_task(run_login())
+            session['task'] = task
+
+            # Wait for QR code to be ready (max 10 seconds)
+            for _ in range(20):
+                if session['qr_data_url']:
+                    break
+                await asyncio.sleep(0.5)
+
+            if not session['qr_data_url']:
+                task.cancel()
+                session['status'] = 'error'
+                session['error'] = 'Timeout waiting for QR code'
+                return self.http_status(504, -1, 'Timeout waiting for QR code')
+
+            return self.success(
+                data={
+                    'session_id': session_id,
+                    'qr_data_url': session['qr_data_url'],
+                    'expire_at': session['expire_at'],
+                }
+            )
+
+        @self.route('/weixin/login/status/<session_id>', methods=['GET'])
+        async def _(session_id: str) -> str:
+            """Poll WeChat login status."""
+            session = _weixin_login_sessions.get(session_id)
+            if not session:
+                return self.http_status(404, -1, 'Session not found')
+
+            data = {'status': session['status']}
+
+            if session['status'] == 'success':
+                data['token'] = session['token']
+                data['base_url'] = session['base_url']
+                data['account_id'] = session['account_id']
+                _weixin_login_sessions.pop(session_id, None)
+            elif session['status'] == 'error':
+                data['error'] = session['error']
+                _weixin_login_sessions.pop(session_id, None)
+
+            return self.success(data=data)
+
+        @self.route('/weixin/login/<session_id>', methods=['DELETE'])
+        async def _(session_id: str) -> str:
+            """Cancel and clean up a WeChat login session."""
+            session = _weixin_login_sessions.pop(session_id, None)
+            if session and session.get('task') and not session['task'].done():
+                session['task'].cancel()
+            return self.success(data={})
+
+        # -----------------------------------------------------------------------
+        # DingTalk Device Flow QR Code Login
+        # -----------------------------------------------------------------------
+
+        _dingtalk_sessions: dict = {}
+        _DINGTALK_SESSION_TTL = 600  # 10 minutes (QR code validity window)
+
+        def _cleanup_expired_dingtalk_sessions():
+            import time
+
+            now = time.time()
+            expired = [
+                sid for sid, s in _dingtalk_sessions.items() if now - s.get('created_at', 0) > _DINGTALK_SESSION_TTL
+            ]
+            for sid in expired:
+                session = _dingtalk_sessions.pop(sid, None)
+                if session and session.get('task') and not session['task'].done():
+                    session['task'].cancel()
+
+        @self.route('/dingtalk/create-app', methods=['POST'])
+        async def _() -> str:
+            """Start DingTalk one-click app creation via Device Flow. Returns session_id + QR code URL."""
+            import uuid
+            import time
+            import aiohttp
+
+            DINGTALK_BASE_URL = 'https://oapi.dingtalk.com'
+
+            _cleanup_expired_dingtalk_sessions()
+
+            session_id = str(uuid.uuid4())
+
+            session = {
+                'status': 'pending',
+                'qr_url': None,
+                'expire_at': None,
+                'client_id': None,
+                'client_secret': None,
+                'error': None,
+                'created_at': time.time(),
+                'device_code': None,
+                'interval': 5,
+            }
+            _dingtalk_sessions[session_id] = session
+
+            async def run_device_flow():
+                try:
+                    timeout = aiohttp.ClientTimeout(total=10)
+                    async with aiohttp.ClientSession(timeout=timeout) as http:
+                        # Step 1: Init — get nonce
+                        async with http.post(
+                            f'{DINGTALK_BASE_URL}/app/registration/init',
+                            json={'source': 'langbot'},
+                        ) as resp:
+                            try:
+                                data = await resp.json()
+                            except (aiohttp.ContentTypeError, ValueError):
+                                session['status'] = 'error'
+                                session['error'] = 'Invalid response from DingTalk service'
+                                return
+                            if data.get('errcode', -1) != 0:
+                                session['status'] = 'error'
+                                session['error'] = data.get('errmsg', 'Failed to init')
+                                return
+                            nonce = data['nonce']
+
+                        # Step 2: Begin — get device_code + QR URL
+                        async with http.post(
+                            f'{DINGTALK_BASE_URL}/app/registration/begin',
+                            json={'nonce': nonce},
+                        ) as resp:
+                            try:
+                                data = await resp.json()
+                            except (aiohttp.ContentTypeError, ValueError):
+                                session['status'] = 'error'
+                                session['error'] = 'Invalid response from DingTalk service'
+                                return
+                            if data.get('errcode', -1) != 0:
+                                session['status'] = 'error'
+                                session['error'] = data.get('errmsg', 'Failed to begin authorization')
+                                return
+
+                            device_code = data['device_code']
+                            verification_uri_complete = data.get('verification_uri_complete', '')
+                            expires_in = data.get('expires_in', 7200)
+                            interval = data.get('interval', 5)
+
+                            session['device_code'] = device_code
+                            session['interval'] = interval
+                            session['qr_url'] = verification_uri_complete
+                            session['expire_at'] = time.time() + 600  # QR code valid for ~10 min
+                            session['status'] = 'waiting'
+
+                        # Step 3: Poll for authorization result
+                        deadline = time.time() + expires_in
+                        while time.time() < deadline:
+                            await asyncio.sleep(interval)
+
+                            async with http.post(
+                                f'{DINGTALK_BASE_URL}/app/registration/poll',
+                                json={'device_code': device_code},
+                            ) as poll_resp:
+                                try:
+                                    poll_data = await poll_resp.json()
+                                except (aiohttp.ContentTypeError, ValueError):
+                                    continue
+
+                                if poll_data.get('errcode', -1) != 0:
+                                    session['status'] = 'error'
+                                    session['error'] = poll_data.get('errmsg', 'Poll failed')
+                                    return
+
+                                status = poll_data.get('status', '')
+
+                                if status == 'SUCCESS':
+                                    session['status'] = 'success'
+                                    session['client_id'] = poll_data.get('client_id', '')
+                                    session['client_secret'] = poll_data.get('client_secret', '')
+                                    return
+                                elif status == 'FAIL':
+                                    session['status'] = 'error'
+                                    session['error'] = poll_data.get('fail_reason', 'Authorization failed')
+                                    return
+                                elif status == 'EXPIRED':
+                                    session['status'] = 'error'
+                                    session['error'] = 'QR code expired'
+                                    return
+                                # status == 'WAITING': continue polling
+
+                        # Timeout
+                        session['status'] = 'error'
+                        session['error'] = 'QR code expired'
+
+                except asyncio.CancelledError:
+                    return
+                except Exception as e:
+                    session['status'] = 'error'
+                    session['error'] = str(e)
+
+            task = asyncio.create_task(run_device_flow())
+            session['task'] = task
+
+            # Wait for QR code to be ready (max 10 seconds)
+            for _ in range(20):
+                if session['qr_url'] or session['error']:
+                    break
+                await asyncio.sleep(0.5)
+
+            if session['error']:
+                task.cancel()
+                return self.http_status(502, -1, session['error'])
+
+            if not session['qr_url']:
+                task.cancel()
+                session['status'] = 'error'
+                session['error'] = 'Timeout waiting for QR code'
+                return self.http_status(504, -1, 'Timeout waiting for QR code')
+
+            return self.success(
+                data={
+                    'session_id': session_id,
+                    'qr_url': session['qr_url'],
+                    'expire_at': session['expire_at'],
+                }
+            )
+
+        @self.route('/dingtalk/create-app/status/<session_id>', methods=['GET'])
+        async def _(session_id: str) -> str:
+            """Poll DingTalk Device Flow status."""
+            _cleanup_expired_dingtalk_sessions()
+            session = _dingtalk_sessions.get(session_id)
+            if not session:
+                return self.http_status(404, -1, 'Session not found')
+
+            data = {'status': session['status']}
+
+            if session['status'] == 'success':
+                data['client_id'] = session['client_id']
+                data['client_secret'] = session['client_secret']
+                _dingtalk_sessions.pop(session_id, None)
+            elif session['status'] == 'error':
+                data['error'] = session['error']
+                _dingtalk_sessions.pop(session_id, None)
+
+            return self.success(data=data)
+
+        @self.route('/dingtalk/create-app/<session_id>', methods=['DELETE'])
+        async def _(session_id: str) -> str:
+            """Cancel and clean up a DingTalk Device Flow session."""
+            session = _dingtalk_sessions.pop(session_id, None)
+            if session and session.get('task') and not session['task'].done():
+                session['task'].cancel()
+            return self.success(data={})
+
+        # -----------------------------------------------------------------------
+        # WeComBot QR Code One-Click Create
+        # -----------------------------------------------------------------------
+
+        _wecombot_sessions: dict = {}
+        _WECOMBOT_SESSION_TTL = 300  # 5 minutes (WeCom QR validity window)
+
+        def _cleanup_expired_wecombot_sessions():
+            import time
+
+            now = time.time()
+            expired = [
+                sid for sid, s in _wecombot_sessions.items() if now - s.get('created_at', 0) > _WECOMBOT_SESSION_TTL
+            ]
+            for sid in expired:
+                session = _wecombot_sessions.pop(sid, None)
+                if session and session.get('task') and not session['task'].done():
+                    session['task'].cancel()
+
+        @self.route('/wecombot/create-bot', methods=['POST'])
+        async def _() -> str:
+            """Start WeComBot one-click creation via QR code. Returns session_id + QR code URL."""
+            import uuid
+            import time
+            import aiohttp
+
+            WECOM_QC_GENERATE_URL = 'https://work.weixin.qq.com/ai/qc/generate'
+            WECOM_QC_QUERY_URL = 'https://work.weixin.qq.com/ai/qc/query_result'
+
+            _cleanup_expired_wecombot_sessions()
+
+            session_id = str(uuid.uuid4())
+
+            session = {
+                'status': 'pending',
+                'qr_url': None,
+                'expire_at': None,
+                'botid': None,
+                'secret': None,
+                'error': None,
+                'created_at': time.time(),
+                'scode': None,
+                'task': None,
+            }
+            _wecombot_sessions[session_id] = session
+
+            async def run_qr_flow():
+                try:
+                    timeout = aiohttp.ClientTimeout(total=10)
+                    async with aiohttp.ClientSession(timeout=timeout) as http:
+                        # Step 1: Generate QR code
+                        async with http.get(
+                            f'{WECOM_QC_GENERATE_URL}?source=langbot&plat=0',
+                        ) as resp:
+                            try:
+                                data = await resp.json()
+                            except (aiohttp.ContentTypeError, ValueError):
+                                session['status'] = 'error'
+                                session['error'] = 'Invalid response from WeCom service'
+                                return
+                            if not data.get('data', {}).get('scode') or not data.get('data', {}).get('auth_url'):
+                                session['status'] = 'error'
+                                session['error'] = data.get('errmsg', 'Failed to generate QR code')
+                                return
+
+                            scode = data['data']['scode']
+                            auth_url = data['data']['auth_url']
+
+                            session['scode'] = scode
+                            session['qr_url'] = auth_url
+                            session['expire_at'] = time.time() + _WECOMBOT_SESSION_TTL
+                            session['status'] = 'waiting'
+
+                        # Step 2: Poll for scan result
+                        deadline = time.time() + _WECOMBOT_SESSION_TTL
+                        while time.time() < deadline:
+                            await asyncio.sleep(3)
+
+                            async with http.get(
+                                f'{WECOM_QC_QUERY_URL}?scode={scode}',
+                            ) as poll_resp:
+                                try:
+                                    poll_data = await poll_resp.json()
+                                except (aiohttp.ContentTypeError, ValueError):
+                                    continue
+
+                                status = poll_data.get('data', {}).get('status', '')
+                                if status == 'success':
+                                    bot_info = poll_data.get('data', {}).get('bot_info', {})
+                                    if bot_info.get('botid') and bot_info.get('secret'):
+                                        session['status'] = 'success'
+                                        session['botid'] = bot_info['botid']
+                                        session['secret'] = bot_info['secret']
+                                        return
+                                    else:
+                                        session['status'] = 'error'
+                                        session['error'] = 'Scan succeeded but bot info is incomplete'
+                                        return
+
+                        # Timeout
+                        session['status'] = 'error'
+                        session['error'] = 'QR code expired'
+
+                except asyncio.CancelledError:
+                    return
+                except Exception as e:
+                    session['status'] = 'error'
+                    session['error'] = str(e)
+
+            task = asyncio.create_task(run_qr_flow())
+            session['task'] = task
+
+            # Wait for QR code to be ready (max 10 seconds)
+            for _ in range(20):
+                if session['qr_url'] or session['error']:
+                    break
+                await asyncio.sleep(0.5)
+
+            if session['error']:
+                task.cancel()
+                return self.http_status(502, -1, session['error'])
+
+            if not session['qr_url']:
+                task.cancel()
+                session['status'] = 'error'
+                session['error'] = 'Timeout waiting for QR code'
+                return self.http_status(504, -1, 'Timeout waiting for QR code')
+
+            return self.success(
+                data={
+                    'session_id': session_id,
+                    'qr_url': session['qr_url'],
+                    'expire_at': session['expire_at'],
+                }
+            )
+
+        @self.route('/wecombot/create-bot/status/<session_id>', methods=['GET'])
+        async def _(session_id: str) -> str:
+            """Poll WeComBot creation status."""
+            _cleanup_expired_wecombot_sessions()
+            session = _wecombot_sessions.get(session_id)
+            if not session:
+                return self.http_status(404, -1, 'Session not found')
+
+            data = {'status': session['status']}
+
+            if session['status'] == 'success':
+                data['botid'] = session['botid']
+                data['secret'] = session['secret']
+                _wecombot_sessions.pop(session_id, None)
+            elif session['status'] == 'error':
+                data['error'] = session['error']
+                _wecombot_sessions.pop(session_id, None)
+
+            return self.success(data=data)
+
+        @self.route('/wecombot/create-bot/<session_id>', methods=['DELETE'])
+        async def _(session_id: str) -> str:
+            """Cancel and clean up a WeComBot creation session."""
+            session = _wecombot_sessions.pop(session_id, None)
+            if session and session.get('task') and not session['task'].done():
+                session['task'].cancel()
+            return self.success(data={})
--- a/src/langbot/pkg/api/http/controller/groups/plugins.py
+++ b/src/langbot/pkg/api/http/controller/groups/plugins.py
@@ -1,14 +1,20 @@
 from __future__ import annotations

 import base64
+import io
 import quart
 import re
 import httpx
 import uuid
 import os
+import zipfile
+import yaml
+from urllib.parse import urlparse
 import posixpath
+import sqlalchemy

 from .....core import taskmgr
+from .....entity.persistence import plugin as persistence_plugin
 from .. import group
 from langbot_plugin.runtime.plugin.mgr import PluginInstallSource

@@ -39,8 +45,109 @@ def _normalize_plugin_asset_path(filepath: str) -> str | None:
    return f'assets/{normalized}'


+def _get_request_origin() -> str:
+    """Return the public request origin, respecting reverse-proxy headers."""
+    forwarded_proto = quart.request.headers.get('X-Forwarded-Proto', '').split(',')[0].strip()
+    forwarded_host = quart.request.headers.get('X-Forwarded-Host', '').split(',')[0].strip()
+
+    scheme = forwarded_proto or quart.request.scheme
+    host = forwarded_host or quart.request.host
+    return f'{scheme}://{host}'
+
+
@group.group_class('plugins', '/api/v1/plugins')
 class PluginsRouterGroup(group.RouterGroup):
+    @staticmethod
+    def _normalize_archive_path(path: str) -> str:
+        normalized = str(path or '').replace('\\', '/').strip('/')
+        return posixpath.normpath(normalized) if normalized else ''
+
+    @classmethod
+    def _component_source_path(cls, entry) -> str:
+        if isinstance(entry, dict):
+            return cls._normalize_archive_path(entry.get('path') or '')
+        return cls._normalize_archive_path(str(entry or ''))
+
+    @classmethod
+    def _count_component_configs(cls, component_config, archive_names: list[str]) -> int:
+        normalized_names = [cls._normalize_archive_path(name) for name in archive_names]
+        component_files: set[str] = set()
+
+        if isinstance(component_config, list):
+            return len(component_config)
+        if not isinstance(component_config, dict):
+            return 1 if component_config else 0
+
+        for entry in component_config.get('fromFiles') or []:
+            source_path = cls._component_source_path(entry)
+            if source_path and source_path in normalized_names:
+                component_files.add(source_path)
+
+        for entry in component_config.get('fromDirs') or []:
+            source_dir = cls._component_source_path(entry).rstrip('/')
+            if not source_dir:
+                continue
+            prefix = f'{source_dir}/'
+            for archive_name in normalized_names:
+                if not archive_name.startswith(prefix):
+                    continue
+                if archive_name.lower().endswith(('.yaml', '.yml')):
+                    component_files.add(archive_name)
+
+        if component_files:
+            return len(component_files)
+
+        return 1 if any(key in component_config for key in ('path', 'name', 'kind')) else 0
+
+    @classmethod
+    def _count_plugin_components(cls, components, archive_names: list[str]) -> dict[str, int]:
+        if not isinstance(components, dict):
+            return {}
+
+        component_counts: dict[str, int] = {}
+        for kind, component_config in components.items():
+            count = cls._count_component_configs(component_config, archive_names)
+            if count > 0:
+                component_counts[str(kind)] = count
+        return component_counts
+
+    @staticmethod
+    def _parse_github_repo_url(repo_url: str) -> dict | None:
+        raw_url = str(repo_url or '').strip()
+        if not raw_url:
+            return None
+
+        if not re.match(r'^[a-zA-Z][a-zA-Z0-9+.-]*://', raw_url):
+            raw_url = f'https://{raw_url}'
+
+        parsed = urlparse(raw_url)
+        if parsed.netloc.lower() not in ('github.com', 'www.github.com'):
+            return None
+
+        parts = [part for part in parsed.path.strip('/').split('/') if part]
+        if len(parts) < 2:
+            return None
+
+        owner = parts[0]
+        repo = parts[1]
+        if repo.endswith('.git'):
+            repo = repo[:-4]
+        if not owner or not repo:
+            return None
+
+        ref = ''
+        subdir = ''
+        if len(parts) >= 4 and parts[2] in ('tree', 'blob'):
+            ref = parts[3]
+            subdir = '/'.join(parts[4:]).strip('/')
+
+        return {
+            'owner': owner,
+            'repo': repo,
+            'ref': ref,
+            'subdir': subdir,
+        }
+
    async def _check_extensions_limit(self) -> str | None:
        """Check if extensions limit is reached. Returns error response if limit exceeded, None otherwise."""
        limitation = self.ap.instance_config.data.get('system', {}).get('limitation', {})
@@ -138,7 +245,15 @@ class PluginsRouterGroup(group.RouterGroup):
                return self.http_status(404, -1, 'plugin not found')

            if quart.request.method == 'GET':
-                return self.success(data={'config': plugin['plugin_config']})
+                result = await self.ap.persistence_mgr.execute_async(
+                    sqlalchemy.select(persistence_plugin.PluginSetting.config)
+                    .where(persistence_plugin.PluginSetting.plugin_author == author)
+                    .where(persistence_plugin.PluginSetting.plugin_name == plugin_name)
+                )
+                persisted_config = result.scalar_one_or_none()
+
+                config = persisted_config if persisted_config is not None else plugin['plugin_config']
+                return self.success(data={'config': config})
            elif quart.request.method == 'PUT':
                data = await quart.request.json

@@ -189,7 +304,7 @@ class PluginsRouterGroup(group.RouterGroup):
            # CSP for HTML pages served to sandboxed iframes (opaque origin).
            # 'self' doesn't work in sandboxed iframes — use actual server origin.
            if mime_type and mime_type.startswith('text/html'):
-                origin = f'{quart.request.scheme}://{quart.request.host}'
+                origin = _get_request_origin()
                resp.headers['Content-Security-Policy'] = (
                    f'default-src {origin}; '
                    f"script-src {origin} 'unsafe-inline'; "
@@ -234,17 +349,37 @@ class PluginsRouterGroup(group.RouterGroup):
            data = await quart.request.json
            repo_url = data.get('repo_url', '')

-            # Parse GitHub repository URL to extract owner and repo
-            # Supports: https://github.com/owner/repo or github.com/owner/repo
-            pattern = r'github\.com/([^/]+)/([^/]+?)(?:\.git)?(?:/.*)?$'
-            match = re.search(pattern, repo_url)
-
-            if not match:
+            parsed_repo = self._parse_github_repo_url(repo_url)
+            if not parsed_repo:
                return self.http_status(400, -1, 'Invalid GitHub repository URL')

-            owner, repo = match.groups()
+            owner = parsed_repo['owner']
+            repo = parsed_repo['repo']
+            requested_ref = parsed_repo['ref']
+            requested_subdir = parsed_repo['subdir']

            try:
+                if requested_ref:
+                    return self.success(
+                        data={
+                            'releases': [
+                                {
+                                    'id': 0,
+                                    'tag_name': requested_ref,
+                                    'name': requested_ref,
+                                    'published_at': '',
+                                    'prerelease': False,
+                                    'draft': False,
+                                    'source_type': 'branch',
+                                    'archive_url': f'https://api.github.com/repos/{owner}/{repo}/zipball/{requested_ref}',
+                                }
+                            ],
+                            'owner': owner,
+                            'repo': repo,
+                            'source_subdir': requested_subdir,
+                        }
+                    )
+
                # Fetch releases from GitHub API
                url = f'https://api.github.com/repos/{owner}/{repo}/releases'
                async with httpx.AsyncClient(
@@ -270,7 +405,14 @@ class PluginsRouterGroup(group.RouterGroup):
                        }
                    )

-                return self.success(data={'releases': formatted_releases, 'owner': owner, 'repo': repo})
+                return self.success(
+                    data={
+                        'releases': formatted_releases,
+                        'owner': owner,
+                        'repo': repo,
+                        'source_subdir': requested_subdir,
+                    }
+                )
            except httpx.RequestError as e:
                return self.http_status(500, -1, f'Failed to fetch releases: {str(e)}')

@@ -425,6 +567,62 @@ class PluginsRouterGroup(group.RouterGroup):

            return self.success(data={'task_id': wrapper.id})

+        @self.route('/install/local/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def _() -> str:
+            file = (await quart.request.files).get('file')
+            if file is None:
+                return self.http_status(400, -1, 'file is required')
+
+            file_bytes = file.read()
+            try:
+                with zipfile.ZipFile(io.BytesIO(file_bytes)) as zf:
+                    names = [name for name in zf.namelist() if not name.endswith('/')]
+                    manifest_name = next(
+                        (
+                            name
+                            for name in names
+                            if name.replace('\\', '/').strip('/').lower() in ('manifest.yaml', 'manifest.yml')
+                        ),
+                        None,
+                    )
+                    if manifest_name is None:
+                        return self.http_status(400, -1, 'manifest.yaml is required')
+
+                    manifest = yaml.safe_load(zf.read(manifest_name).decode('utf-8')) or {}
+                    requirements: list[str] = []
+                    requirements_name = next(
+                        (name for name in names if name.replace('\\', '/').strip('/').lower() == 'requirements.txt'),
+                        None,
+                    )
+                    if requirements_name is not None:
+                        requirements = [
+                            line.strip()
+                            for line in zf.read(requirements_name).decode('utf-8', errors='ignore').splitlines()
+                            if line.strip() and not line.strip().startswith('#')
+                        ]
+
+                    spec = manifest.get('spec') or {}
+                    components = spec.get('components') or {}
+                    component_counts = self._count_plugin_components(components, names)
+                    component_types = list(component_counts.keys())
+
+                    return self.success(
+                        data={
+                            'filename': file.filename or 'local plugin',
+                            'size': len(file_bytes),
+                            'manifest': manifest,
+                            'metadata': manifest.get('metadata') or {},
+                            'component_types': component_types,
+                            'component_counts': component_counts,
+                            'requirements': requirements,
+                            'file_count': len(names),
+                        }
+                    )
+            except zipfile.BadZipFile:
+                return self.http_status(400, -1, 'invalid .lbpkg file')
+            except Exception as exc:
+                return self.http_status(500, -1, f'Failed to preview plugin package: {exc}')
+
        @self.route('/config-files', methods=['POST'], auth_type=group.AuthType.USER_TOKEN)
        async def _() -> str:
            """Upload a file for plugin configuration"""
--- a/src/langbot/pkg/api/http/controller/groups/resources/mcp.py
+++ b/src/langbot/pkg/api/http/controller/groups/resources/mcp.py
@@ -31,6 +31,9 @@ class MCPRouterGroup(group.RouterGroup):
        @self.route('/servers/<server_name>', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN)
        async def _(server_name: str) -> str:
            """获取、更新或删除MCP服务器配置"""
+            from urllib.parse import unquote
+
+            server_name = unquote(server_name)

            server_data = await self.ap.mcp_service.get_mcp_server_by_name(server_name)
            if server_data is None:
@@ -57,6 +60,9 @@ class MCPRouterGroup(group.RouterGroup):
        @self.route('/servers/<server_name>/test', methods=['POST'], auth_type=group.AuthType.USER_TOKEN)
        async def _(server_name: str) -> str:
            """测试MCP服务器连接"""
+            from urllib.parse import unquote
+
+            server_name = unquote(server_name)
            server_data = await quart.request.json
            task_id = await self.ap.mcp_service.test_mcp_server(server_name=server_name, server_data=server_data)
            return self.success(data={'task_id': task_id})
--- a/src/langbot/pkg/api/http/controller/groups/skills.py
+++ b/src/langbot/pkg/api/http/controller/groups/skills.py
@@ -0,0 +1,190 @@
+from __future__ import annotations
+
+import quart
+
+from langbot_plugin.box.errors import BoxError
+
+from .. import group
+
+
+@group.group_class('skills', '/api/v1/skills')
+class SkillsRouterGroup(group.RouterGroup):
+    """Skills management API endpoints."""
+
+    async def initialize(self) -> None:
+        @self.route('', methods=['GET', 'POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def list_or_create_skills() -> quart.Response:
+            if quart.request.method == 'GET':
+                try:
+                    skills = await self.ap.skill_service.list_skills()
+                except (ValueError, BoxError) as exc:
+                    return self.http_status(400, -1, str(exc))
+                return self.success(data={'skills': skills})
+
+            data = await quart.request.json
+            if 'name' not in data or not data['name']:
+                return self.http_status(400, -1, 'Missing required field: name')
+
+            try:
+                skill = await self.ap.skill_service.create_skill(data)
+                return self.success(data={'skill': skill})
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+
+        @self.route('/<skill_name>', methods=['GET', 'PUT', 'DELETE'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def get_update_delete_skill(skill_name: str) -> quart.Response:
+            if quart.request.method == 'GET':
+                try:
+                    skill = await self.ap.skill_service.get_skill(skill_name)
+                except (ValueError, BoxError) as exc:
+                    return self.http_status(400, -1, str(exc))
+                if not skill:
+                    return self.http_status(404, -1, 'Skill not found')
+                return self.success(data={'skill': skill})
+
+            if quart.request.method == 'PUT':
+                data = await quart.request.json
+                try:
+                    skill = await self.ap.skill_service.update_skill(skill_name, data)
+                    return self.success(data={'skill': skill})
+                except (ValueError, BoxError) as exc:
+                    return self.http_status(400, -1, str(exc))
+
+            try:
+                await self.ap.skill_service.delete_skill(skill_name)
+                return self.success()
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+
+        @self.route('/<skill_name>/files', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def list_skill_files(skill_name: str) -> quart.Response:
+            """List files in skill package directory."""
+            path = quart.request.args.get('path', '.').strip()
+            include_hidden = quart.request.args.get('include_hidden', 'false').lower() == 'true'
+
+            try:
+                result = await self.ap.skill_service.list_skill_files(
+                    skill_name,
+                    path=path,
+                    include_hidden=include_hidden,
+                )
+                return self.success(data=result)
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+
+        @self.route(
+            '/<skill_name>/files/<path:path>', methods=['GET', 'PUT'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY
+        )
+        async def read_or_write_skill_file(skill_name: str, path: str) -> quart.Response:
+            """Read or write a file in skill package."""
+            if quart.request.method == 'GET':
+                try:
+                    result = await self.ap.skill_service.read_skill_file(skill_name, path)
+                    return self.success(data=result)
+                except (ValueError, BoxError) as exc:
+                    return self.http_status(400, -1, str(exc))
+
+            # PUT - write file
+            data = await quart.request.json
+            content = data.get('content', '')
+            if content is None:
+                return self.http_status(400, -1, 'Missing required field: content')
+
+            try:
+                result = await self.ap.skill_service.write_skill_file(skill_name, path, content)
+                return self.success(data=result)
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+
+        @self.route('/<skill_name>/preview', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def preview_skill(skill_name: str) -> quart.Response:
+            skill = self.ap.skill_mgr.get_skill_by_name(skill_name)
+            if not skill:
+                return self.http_status(404, -1, 'Skill not found')
+            return self.success(data={'instructions': skill.get('instructions', '')})
+
+        @self.route('/install/github', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def install_skill_from_github() -> quart.Response:
+            data = await quart.request.json
+            required_fields = ['asset_url', 'owner', 'repo']
+            for field in required_fields:
+                if field not in data or not data[field]:
+                    return self.http_status(400, -1, f'Missing required field: {field}')
+            asset_url = str(data['asset_url']).strip().lower().split('?', 1)[0].split('#', 1)[0]
+            if not asset_url.endswith('skill.md') and not data.get('release_tag'):
+                return self.http_status(400, -1, 'Missing required field: release_tag')
+
+            try:
+                skill = await self.ap.skill_service.install_from_github(data)
+                return self.success(data={'skills': skill})
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+            except Exception as exc:
+                return self.http_status(500, -1, f'Failed to install skill: {exc}')
+
+        @self.route('/install/github/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def preview_skill_from_github() -> quart.Response:
+            data = await quart.request.json
+            required_fields = ['asset_url', 'owner', 'repo']
+            for field in required_fields:
+                if field not in data or not data[field]:
+                    return self.http_status(400, -1, f'Missing required field: {field}')
+            asset_url = str(data['asset_url']).strip().lower().split('?', 1)[0].split('#', 1)[0]
+            if not asset_url.endswith('skill.md') and not data.get('release_tag'):
+                return self.http_status(400, -1, 'Missing required field: release_tag')
+
+            try:
+                preview = await self.ap.skill_service.preview_install_from_github(data)
+                return self.success(data={'skills': preview})
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+            except Exception as exc:
+                return self.http_status(500, -1, f'Failed to preview skill: {exc}')
+
+        @self.route('/install/upload', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def install_skill_from_upload() -> quart.Response:
+            file = (await quart.request.files).get('file')
+            if file is None:
+                return self.http_status(400, -1, 'file is required')
+            form = await quart.request.form
+
+            try:
+                skill = await self.ap.skill_service.install_from_zip_upload(
+                    file_bytes=file.read(),
+                    filename=file.filename or '',
+                    source_paths=form.getlist('source_paths'),
+                )
+                return self.success(data={'skills': skill})
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+            except Exception as exc:
+                return self.http_status(500, -1, f'Failed to install skill: {exc}')
+
+        @self.route('/install/upload/preview', methods=['POST'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def preview_skill_from_upload() -> quart.Response:
+            file = (await quart.request.files).get('file')
+            if file is None:
+                return self.http_status(400, -1, 'file is required')
+
+            try:
+                preview = await self.ap.skill_service.preview_install_from_zip_upload(
+                    file_bytes=file.read(),
+                    filename=file.filename or '',
+                )
+                return self.success(data={'skills': preview})
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
+            except Exception as exc:
+                return self.http_status(500, -1, f'Failed to preview skill: {exc}')
+
+        @self.route('/scan', methods=['GET'], auth_type=group.AuthType.USER_TOKEN_OR_API_KEY)
+        async def scan_skill_directory() -> quart.Response:
+            path = quart.request.args.get('path', '').strip()
+            if not path:
+                return self.http_status(400, -1, 'Missing required parameter: path')
+
+            try:
+                result = await self.ap.skill_service.scan_directory_async(path)
+                return self.success(data=result)
+            except (ValueError, BoxError) as exc:
+                return self.http_status(400, -1, str(exc))
--- a/src/langbot/pkg/api/http/controller/groups/system.py
+++ b/src/langbot/pkg/api/http/controller/groups/system.py
@@ -140,17 +140,6 @@ class SystemRouterGroup(group.RouterGroup):
        async def _() -> str:
            return self.success(data=await self.ap.maintenance_service.get_storage_analysis())

-        @self.route('/debug/exec', methods=['POST'], auth_type=group.AuthType.USER_TOKEN)
-        async def _() -> str:
-            if not constants.debug_mode:
-                return self.http_status(403, 403, 'Forbidden')
-
-            py_code = await quart.request.data
-
-            ap = self.ap
-
-            return self.success(data=exec(py_code, {'ap': ap}))
-
        @self.route(
            '/debug/plugin/action',
            methods=['POST'],
--- a/src/langbot/pkg/api/http/controller/groups/user.py
+++ b/src/langbot/pkg/api/http/controller/groups/user.py
@@ -146,6 +146,7 @@ class UserRouterGroup(group.RouterGroup):
                return self.fail(3, str(e))
            except ValueError as e:
                traceback.print_exc()
+                self.ap.logger.warning(f'Space OAuth callback failed: {e}')
                return self.fail(1, str(e))
            except Exception as e:
                traceback.print_exc()
--- a/src/langbot/pkg/api/http/service/apikey.py
+++ b/src/langbot/pkg/api/http/service/apikey.py
@@ -52,6 +52,9 @@ class ApiKeyService:

    async def verify_api_key(self, key: str) -> bool:
        """Verify if an API key is valid"""
+        if not isinstance(key, str) or not key.startswith('lbk_'):
+            return False
+
        result = await self.ap.persistence_mgr.execute_async(
            sqlalchemy.select(apikey.ApiKey).where(apikey.ApiKey.key == key)
        )
--- a/src/langbot/pkg/api/http/service/bot.py
+++ b/src/langbot/pkg/api/http/service/bot.py
@@ -99,11 +99,11 @@ class BotService:
        # TODO: 检查配置信息格式
        bot_data['uuid'] = str(uuid.uuid4())

-        # checkout the default pipeline
+        # bind the most recently updated pipeline if any exist
        result = await self.ap.persistence_mgr.execute_async(
-            sqlalchemy.select(persistence_pipeline.LegacyPipeline).where(
-                persistence_pipeline.LegacyPipeline.is_default == True
-            )
+            sqlalchemy.select(persistence_pipeline.LegacyPipeline)
+            .order_by(persistence_pipeline.LegacyPipeline.updated_at.desc())
+            .limit(1)
        )
        pipeline = result.first()
        if pipeline is not None:
@@ -120,24 +120,26 @@ class BotService:

    async def update_bot(self, bot_uuid: str, bot_data: dict) -> None:
        """Update bot"""
-        if 'uuid' in bot_data:
-            del bot_data['uuid']
+        update_data = bot_data.copy()
+
+        if 'uuid' in update_data:
+            del update_data['uuid']

        # set use_pipeline_name
-        if 'use_pipeline_uuid' in bot_data:
+        if 'use_pipeline_uuid' in update_data:
            result = await self.ap.persistence_mgr.execute_async(
                sqlalchemy.select(persistence_pipeline.LegacyPipeline).where(
-                    persistence_pipeline.LegacyPipeline.uuid == bot_data['use_pipeline_uuid']
+                    persistence_pipeline.LegacyPipeline.uuid == update_data['use_pipeline_uuid']
                )
            )
            pipeline = result.first()
            if pipeline is not None:
-                bot_data['use_pipeline_name'] = pipeline.name
+                update_data['use_pipeline_name'] = pipeline.name
            else:
                raise Exception('Pipeline not found')

        await self.ap.persistence_mgr.execute_async(
-            sqlalchemy.update(persistence_bot.Bot).values(bot_data).where(persistence_bot.Bot.uuid == bot_uuid)
+            sqlalchemy.update(persistence_bot.Bot).values(update_data).where(persistence_bot.Bot.uuid == bot_uuid)
        )
        await self.ap.platform_mgr.remove_bot(bot_uuid)

--- a/src/langbot/pkg/api/http/service/knowledge.py
+++ b/src/langbot/pkg/api/http/service/knowledge.py
@@ -31,15 +31,126 @@ class KnowledgeService:
        if not knowledge_engine_plugin_id:
            raise ValueError('knowledge_engine_plugin_id is required')

+        creation_settings = kb_data.get('creation_settings', {})
+        retrieval_settings = kb_data.get('retrieval_settings', {})
+
+        # Validate required fields based on plugin's creation_schema and retrieval_schema
+        await self._validate_schema_required_fields(
+            knowledge_engine_plugin_id,
+            creation_settings,
+            retrieval_settings,
+        )
+
        kb = await self.ap.rag_mgr.create_knowledge_base(
            name=kb_data.get('name', 'Untitled'),
            knowledge_engine_plugin_id=knowledge_engine_plugin_id,
-            creation_settings=kb_data.get('creation_settings', {}),
-            retrieval_settings=kb_data.get('retrieval_settings', {}),
+            creation_settings=creation_settings,
+            retrieval_settings=retrieval_settings,
            description=kb_data.get('description', ''),
        )
        return kb.uuid

+    async def _validate_schema_required_fields(
+        self,
+        plugin_id: str,
+        creation_settings: dict,
+        retrieval_settings: dict,
+    ) -> None:
+        """Validate required fields based on plugin's creation_schema and retrieval_schema.
+
+        This is a business-agnostic validation that checks all fields marked as
+        required in the plugin's schema, regardless of field type.
+
+        Args:
+            plugin_id: Knowledge Engine plugin ID.
+            creation_settings: User-provided creation settings.
+            retrieval_settings: User-provided retrieval settings.
+
+        Raises:
+            ValueError: If any required field is missing or empty.
+        """
+        # Validate creation_schema
+        try:
+            creation_schema = await self.ap.plugin_connector.get_rag_creation_schema(plugin_id)
+            self._check_required_fields(creation_schema, creation_settings, 'creation_settings')
+        except ValueError:
+            raise
+        except Exception as e:
+            self.ap.logger.warning(f'Failed to get creation_schema for validation: {e}')
+
+        # Validate retrieval_schema
+        try:
+            retrieval_schema = await self.ap.plugin_connector.get_rag_retrieval_schema(plugin_id)
+            self._check_required_fields(retrieval_schema, retrieval_settings, 'retrieval_settings')
+        except ValueError:
+            raise
+        except Exception as e:
+            self.ap.logger.warning(f'Failed to get retrieval_schema for validation: {e}')
+
+    def _check_required_fields(
+        self,
+        schema: dict | list,
+        settings: dict,
+        context: str,
+    ) -> None:
+        """Check required fields in schema against provided settings.
+
+        Args:
+            schema: Plugin-defined schema (can be list or dict with 'schema' key).
+            settings: User-provided settings values.
+            context: Context name for error messages (e.g., 'creation_settings').
+
+        Raises:
+            ValueError: If a required field is missing or empty.
+        """
+        if not schema:
+            return
+
+        # schema can be a list directly, or a dict with 'schema' key
+        items = schema if isinstance(schema, list) else schema.get('schema', [])
+        if not items:
+            return
+
+        for item in items:
+            field_name = item.get('name')
+            if not field_name:
+                continue
+
+            is_required = item.get('required', False)
+            if not is_required:
+                continue
+
+            # Check show_if condition - if field is conditionally shown, only validate when condition is met
+            show_if = item.get('show_if')
+            if show_if:
+                depend_field = show_if.get('field')
+                operator = show_if.get('operator')
+                expected_value = show_if.get('value')
+
+                if depend_field and operator:
+                    depend_value = settings.get(depend_field)
+                    # If show_if condition is not met, skip validation for this field
+                    if operator == 'eq' and depend_value != expected_value:
+                        continue
+                    if operator == 'neq' and depend_value == expected_value:
+                        continue
+                    if operator == 'in' and isinstance(expected_value, list) and depend_value not in expected_value:
+                        continue
+
+            value = settings.get(field_name)
+
+            # Validate required field has a non-empty value
+            if value is None or (isinstance(value, str) and value.strip() == ''):
+                # Get field label for friendly error message
+                label = item.get('label', {})
+                field_label = (
+                    label.get('en_US', field_name)
+                    or label.get('zh_Hans', field_name)
+                    or label.get('zh_Hant', field_name)
+                    or field_name
+                )
+                raise ValueError(f'{field_label} is required ({context}.{field_name})')
+
    async def update_knowledge_base(self, kb_uuid: str, kb_data: dict) -> None:
        """更新知识库"""
        # Filter to only mutable fields
--- a/src/langbot/pkg/api/http/service/pipeline.py
+++ b/src/langbot/pkg/api/http/service/pipeline.py
@@ -113,14 +113,9 @@ class PipelineService:
        return pipeline_data['uuid']

    async def update_pipeline(self, pipeline_uuid: str, pipeline_data: dict) -> None:
-        if 'uuid' in pipeline_data:
-            del pipeline_data['uuid']
-        if 'for_version' in pipeline_data:
-            del pipeline_data['for_version']
-        if 'stages' in pipeline_data:
-            del pipeline_data['stages']
-        if 'is_default' in pipeline_data:
-            del pipeline_data['is_default']
+        pipeline_data = pipeline_data.copy()
+        for protected_field in ('uuid', 'for_version', 'stages', 'is_default'):
+            pipeline_data.pop(protected_field, None)

        await self.ap.persistence_mgr.execute_async(
            sqlalchemy.update(persistence_pipeline.LegacyPipeline)
@@ -220,6 +215,8 @@ class PipelineService:
        bound_mcp_servers: list[str] = None,
        enable_all_plugins: bool = True,
        enable_all_mcp_servers: bool = True,
+        bound_skills: list[str] = None,
+        enable_all_skills: bool = True,
    ) -> None:
        """Update the bound plugins and MCP servers for a pipeline"""
        # Get current pipeline
@@ -237,9 +234,12 @@ class PipelineService:
        extensions_preferences = pipeline.extensions_preferences or {}
        extensions_preferences['enable_all_plugins'] = enable_all_plugins
        extensions_preferences['enable_all_mcp_servers'] = enable_all_mcp_servers
+        extensions_preferences['enable_all_skills'] = enable_all_skills
        extensions_preferences['plugins'] = bound_plugins
        if bound_mcp_servers is not None:
            extensions_preferences['mcp_servers'] = bound_mcp_servers
+        if bound_skills is not None:
+            extensions_preferences['skills'] = bound_skills

        await self.ap.persistence_mgr.execute_async(
            sqlalchemy.update(persistence_pipeline.LegacyPipeline)
--- a/src/langbot/pkg/api/http/service/provider.py
+++ b/src/langbot/pkg/api/http/service/provider.py
@@ -17,6 +17,24 @@ class ModelProviderService:
    def __init__(self, ap: app.Application) -> None:
        self.ap = ap

+    @staticmethod
+    def _normalize_api_keys(api_keys: str | list[str] | tuple[str, ...] | None) -> list[str]:
+        if api_keys is None:
+            return []
+
+        raw_keys = [api_keys] if isinstance(api_keys, str) else list(api_keys)
+        normalized_keys = []
+        seen_keys = set()
+
+        for raw_key in raw_keys:
+            normalized_key = raw_key.strip() if isinstance(raw_key, str) else ''
+            if not normalized_key or normalized_key in seen_keys:
+                continue
+            normalized_keys.append(normalized_key)
+            seen_keys.add(normalized_key)
+
+        return normalized_keys
+
    async def get_providers(self) -> list[dict]:
        """Get all providers"""
        result = await self.ap.persistence_mgr.execute_async(sqlalchemy.select(persistence_model.ModelProvider))
@@ -59,6 +77,7 @@ class ModelProviderService:
    async def create_provider(self, provider_data: dict) -> str:
        """Create a new provider"""
        provider_data['uuid'] = str(uuid.uuid4())
+        provider_data['api_keys'] = self._normalize_api_keys(provider_data.get('api_keys'))
        await self.ap.persistence_mgr.execute_async(
            sqlalchemy.insert(persistence_model.ModelProvider).values(**provider_data)
        )
@@ -72,6 +91,8 @@ class ModelProviderService:
        """Update an existing provider"""
        if 'uuid' in provider_data:
            del provider_data['uuid']
+        if 'api_keys' in provider_data:
+            provider_data['api_keys'] = self._normalize_api_keys(provider_data.get('api_keys'))
        await self.ap.persistence_mgr.execute_async(
            sqlalchemy.update(persistence_model.ModelProvider)
            .where(persistence_model.ModelProvider.uuid == provider_uuid)
@@ -141,6 +162,8 @@ class ModelProviderService:

    async def find_or_create_provider(self, requester: str, base_url: str, api_keys: list) -> str:
        """Find existing provider or create new one"""
+        api_keys = self._normalize_api_keys(api_keys)
+
        # Try to find existing provider with same config
        result = await self.ap.persistence_mgr.execute_async(
            sqlalchemy.select(persistence_model.ModelProvider).where(
@@ -168,7 +191,7 @@ class ModelProviderService:
                'name': provider_name,
                'requester': requester,
                'base_url': base_url,
-                'api_keys': api_keys or [],
+                'api_keys': api_keys,
            }
        )

@@ -177,7 +200,7 @@ class ModelProviderService:
        await self.ap.persistence_mgr.execute_async(
            sqlalchemy.update(persistence_model.ModelProvider)
            .where(persistence_model.ModelProvider.uuid == '00000000-0000-0000-0000-000000000000')
-            .values(api_keys=[api_key])
+            .values(api_keys=self._normalize_api_keys(api_key))
        )
        await self.ap.model_mgr.reload_provider('00000000-0000-0000-0000-000000000000')

--- a/src/langbot/pkg/api/http/service/skill.py
+++ b/src/langbot/pkg/api/http/service/skill.py
@@ -0,0 +1,428 @@
+from __future__ import annotations
+
+import io
+import inspect
+import os
+import posixpath
+import zipfile
+from typing import Optional
+from urllib.parse import quote, unquote, urlparse
+
+import httpx
+
+from ....core import app
+from ....skill.utils import parse_frontmatter
+
+
+_PUBLIC_SKILL_FIELDS = (
+    'name',
+    'display_name',
+    'description',
+    'instructions',
+    'package_root',
+    'created_at',
+    'updated_at',
+)
+
+_GITHUB_ASSET_HOSTS = {
+    'github.com',
+    'api.github.com',
+    'objects.githubusercontent.com',
+    'githubusercontent.com',
+    'raw.githubusercontent.com',
+    'codeload.github.com',
+}
+
+
+class SkillService:
+    """Filesystem-backed skill management service."""
+
+    ap: app.Application
+
+    def __init__(self, ap: app.Application) -> None:
+        self.ap = ap
+
+    def _box_service(self):
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is not None and getattr(box_service, 'available', False):
+            return box_service
+        return None
+
+    def _require_box(self, action: str):
+        """Return the Box service or raise if it is not available.
+
+        Box is the only source of truth for skills. Every read and write
+        operation goes through it — there is no local-filesystem fallback.
+        """
+        box_service = self._box_service()
+        if box_service is not None:
+            return box_service
+        ap_box = getattr(self.ap, 'box_service', None)
+        if ap_box is None:
+            reason = 'not initialised'
+        elif not getattr(ap_box, 'enabled', True):
+            reason = 'disabled in config (box.enabled = false)'
+        else:
+            connector_error = getattr(ap_box, '_connector_error', '') or 'currently unavailable'
+            reason = f'unavailable: {connector_error}'
+        raise ValueError(
+            f'{action} requires the Box runtime, which is {reason}. '
+            f'Enable Box in config.yaml (box.enabled = true) and ensure the '
+            f'runtime is reachable before retrying.'
+        )
+
+    def _require_box_for_write(self, action: str) -> None:
+        """Backwards-compatible alias preserved for clarity at call sites."""
+        self._require_box(action)
+
+    @staticmethod
+    def _serialize_skill(skill: dict) -> dict:
+        return {field: skill.get(field) for field in _PUBLIC_SKILL_FIELDS if field in skill}
+
+    async def list_skills(self) -> list[dict]:
+        # When Box is unavailable, surface an empty list rather than raising —
+        # the skills page should render cleanly, and the UI separately renders
+        # a "Box disabled / unavailable" banner via useBoxStatus.
+        box_service = self._box_service()
+        if box_service is None:
+            return []
+        return [self._serialize_skill(skill) for skill in await box_service.list_skills()]
+
+    async def get_skill(self, skill_name: str) -> Optional[dict]:
+        box_service = self._box_service()
+        if box_service is None:
+            return None
+        skill = await box_service.get_skill(skill_name)
+        return self._serialize_skill(skill) if skill else None
+
+    async def get_skill_by_name(self, name: str) -> Optional[dict]:
+        return await self.get_skill(name)
+
+    async def create_skill(self, data: dict) -> dict:
+        box_service = self._require_box('Creating a skill')
+        created = await box_service.create_skill(data)
+        await self._reload_skills()
+        return self._serialize_skill(created)
+
+    async def update_skill(self, skill_name: str, data: dict) -> dict:
+        box_service = self._require_box('Editing a skill')
+        updated = await box_service.update_skill(skill_name, data)
+        await self._reload_skills()
+        return self._serialize_skill(updated)
+
+    async def delete_skill(self, skill_name: str) -> bool:
+        box_service = self._require_box('Deleting a skill')
+        await box_service.delete_skill(skill_name)
+        await self._reload_skills()
+        return True
+
+    async def list_skill_files(
+        self,
+        skill_name: str,
+        path: str = '.',
+        include_hidden: bool = False,
+        max_entries: int = 200,
+    ) -> dict:
+        box_service = self._require_box('Browsing skill files')
+        return await box_service.list_skill_files(skill_name, path, include_hidden, max_entries)
+
+    async def read_skill_file(self, skill_name: str, path: str) -> dict:
+        box_service = self._require_box('Reading a skill file')
+        return await box_service.read_skill_file(skill_name, path)
+
+    async def write_skill_file(self, skill_name: str, path: str, content: str) -> dict:
+        box_service = self._require_box('Editing skill files')
+        result = await box_service.write_skill_file(skill_name, path, content)
+        await self._reload_skills()
+        return result
+
+    async def install_from_github(self, data: dict) -> list[dict]:
+        box_service = self._require_box('Installing a skill from GitHub')
+        owner = str(data['owner']).strip()
+        repo = str(data['repo']).strip()
+        release_tag = str(data.get('release_tag', '')).strip()
+        raw_asset_url = str(data['asset_url']).strip()
+        if self._is_github_skill_md_url(raw_asset_url):
+            return await self._install_github_skill_md(raw_asset_url, owner=owner, repo=repo, data=data)
+
+        asset_url = self._validate_github_asset_url(raw_asset_url, owner=owner, repo=repo, release_tag=release_tag)
+        source_subdir = str(data.get('source_subdir', '') or '').strip()
+
+        zip_bytes = await self._download_github_asset(asset_url)
+        filename = f'{repo}-{release_tag.lstrip("v").replace("/", "-") or "source"}.zip'
+        installed = await box_service.install_skill_zip(
+            zip_bytes,
+            filename,
+            source_paths=data.get('source_paths') or [],
+            source_path=str(data.get('source_path', '') or ''),
+            source_subdir=source_subdir,
+        )
+        await self._reload_skills()
+        return [self._serialize_skill(skill) for skill in installed]
+
+    async def preview_install_from_github(self, data: dict) -> list[dict]:
+        box_service = self._require_box('Previewing a skill from GitHub')
+        owner = str(data['owner']).strip()
+        repo = str(data['repo']).strip()
+        release_tag = str(data.get('release_tag', '')).strip()
+        raw_asset_url = str(data['asset_url']).strip()
+        if self._is_github_skill_md_url(raw_asset_url):
+            return await self._preview_github_skill_md(raw_asset_url, owner=owner, repo=repo)
+
+        asset_url = self._validate_github_asset_url(raw_asset_url, owner=owner, repo=repo, release_tag=release_tag)
+        source_subdir = str(data.get('source_subdir', '') or '').strip()
+
+        zip_bytes = await self._download_github_asset(asset_url)
+        return await box_service.preview_skill_zip(
+            zip_bytes,
+            f'{repo}-{release_tag.lstrip("v").replace("/", "-") or "source"}.zip',
+            source_subdir=source_subdir,
+        )
+
+    async def install_from_zip_upload(
+        self,
+        *,
+        file_bytes: bytes,
+        filename: str,
+        source_paths: list[str] | None = None,
+        source_path: str = '',
+    ) -> list[dict]:
+        box_service = self._require_box('Installing a skill from upload')
+        installed = await box_service.install_skill_zip(
+            file_bytes,
+            filename,
+            source_paths=source_paths or [],
+            source_path=source_path,
+        )
+        await self._reload_skills()
+        return [self._serialize_skill(skill) for skill in installed]
+
+    async def preview_install_from_zip_upload(self, *, file_bytes: bytes, filename: str) -> list[dict]:
+        box_service = self._require_box('Previewing a skill upload')
+        return await box_service.preview_skill_zip(file_bytes, filename)
+
+    async def _install_github_skill_md(self, asset_url: str, *, owner: str, repo: str, data: dict) -> list[dict]:
+        box_service = self._require_box('Installing a skill from GitHub')
+        zip_bytes, filename, _package_name = await self._download_github_skill_directory_as_zip(
+            asset_url,
+            owner=owner,
+            repo=repo,
+        )
+
+        installed = await box_service.install_skill_zip(
+            zip_bytes,
+            filename,
+            source_paths=data.get('source_paths') or [],
+            source_path=str(data.get('source_path', '') or ''),
+            target_suffix='',
+        )
+        await self._reload_skills()
+        return [self._serialize_skill(skill) for skill in installed]
+
+    async def _preview_github_skill_md(self, asset_url: str, *, owner: str, repo: str) -> list[dict]:
+        box_service = self._require_box('Previewing a skill from GitHub')
+        zip_bytes, _filename, package_name = await self._download_github_skill_directory_as_zip(
+            asset_url,
+            owner=owner,
+            repo=repo,
+        )
+        return await box_service.preview_skill_zip(zip_bytes, f'{package_name}.zip', target_suffix='')
+
+    async def reload_skills(self) -> list[dict]:
+        await self._reload_skills()
+        return await self.list_skills()
+
+    async def scan_directory_async(self, path: str) -> dict:
+        box_service = self._require_box('Scanning a skill directory')
+        return await box_service.scan_skill_directory(path)
+
+    async def _reload_skills(self) -> None:
+        skill_mgr = getattr(self.ap, 'skill_mgr', None)
+        reload_skills = getattr(skill_mgr, 'reload_skills', None)
+        if not callable(reload_skills):
+            return
+        result = reload_skills()
+        if inspect.isawaitable(result):
+            await result
+
+    async def _download_github_asset(self, asset_url: str) -> bytes:
+        async with httpx.AsyncClient(follow_redirects=True, timeout=120) as client:
+            resp = await client.get(asset_url)
+            resp.raise_for_status()
+            return resp.content
+
+    async def _download_github_skill_directory_as_zip(
+        self, asset_url: str, *, owner: str, repo: str
+    ) -> tuple[bytes, str, str]:
+        info = self._parse_github_skill_md_url(asset_url, owner=owner, repo=repo)
+        archive_url = f'https://codeload.github.com/{owner}/{repo}/zip/{quote(info["ref"], safe="/")}'
+        archive_bytes = await self._download_github_asset(archive_url)
+
+        try:
+            source_archive = zipfile.ZipFile(io.BytesIO(archive_bytes), 'r')
+        except zipfile.BadZipFile as exc:
+            raise ValueError('GitHub repository archive must be a valid .zip archive') from exc
+
+        with source_archive as source_zip:
+            skill_entry = self._find_github_skill_archive_entry(source_zip, info['file_path'])
+            try:
+                skill_md_content = source_zip.read(skill_entry).decode('utf-8')
+            except UnicodeDecodeError as exc:
+                raise ValueError('GitHub SKILL.md must be valid UTF-8 text') from exc
+
+            package_name = self._resolve_github_skill_md_package_name(skill_md_content, info['package_name'])
+            source_skill_dir = posixpath.dirname(posixpath.normpath(skill_entry.filename))
+
+            buffer = io.BytesIO()
+            with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as target_zip:
+                self._copy_github_skill_directory_to_zip(source_zip, target_zip, source_skill_dir, package_name)
+        return buffer.getvalue(), f'{package_name}.zip', package_name
+
+    def _find_github_skill_archive_entry(self, archive: zipfile.ZipFile, file_path: str) -> zipfile.ZipInfo:
+        normalized_file_path = posixpath.normpath(file_path).lower()
+        for member in archive.infolist():
+            if member.is_dir():
+                continue
+            normalized_member = posixpath.normpath(member.filename)
+            path_parts = normalized_member.split('/', 1)
+            if len(path_parts) != 2:
+                continue
+            archive_relative_path = path_parts[1].lower()
+            if archive_relative_path == normalized_file_path:
+                return member
+        raise ValueError(f'GitHub archive does not contain requested SKILL.md: {file_path}')
+
+    def _copy_github_skill_directory_to_zip(
+        self,
+        source_zip: zipfile.ZipFile,
+        target_zip: zipfile.ZipFile,
+        source_skill_dir: str,
+        package_name: str,
+    ) -> None:
+        normalized_source_dir = posixpath.normpath(source_skill_dir)
+        source_prefix = f'{normalized_source_dir}/'
+        copied_files = 0
+
+        for member in source_zip.infolist():
+            normalized_member = posixpath.normpath(member.filename)
+            if normalized_member != normalized_source_dir and not normalized_member.startswith(source_prefix):
+                continue
+
+            relative_path = posixpath.relpath(normalized_member, normalized_source_dir)
+            if relative_path in ('', '.'):
+                continue
+            if relative_path.startswith('../') or relative_path == '..' or posixpath.isabs(relative_path):
+                raise ValueError(f'GitHub archive contains an unsafe skill path: {member.filename}')
+
+            target_name = f'{package_name}/{relative_path}'
+            if member.is_dir() and not target_name.endswith('/'):
+                target_name = f'{target_name}/'
+            target_info = zipfile.ZipInfo(target_name, date_time=member.date_time)
+            target_info.external_attr = member.external_attr
+            target_info.compress_type = zipfile.ZIP_DEFLATED
+
+            if member.is_dir():
+                target_zip.writestr(target_info, b'')
+                continue
+
+            target_zip.writestr(target_info, source_zip.read(member))
+            copied_files += 1
+
+        if copied_files == 0:
+            raise ValueError('GitHub skill directory is empty')
+
+    def _uploaded_skill_target_stem(self, filename: str) -> str:
+        stem = os.path.splitext(os.path.basename(str(filename or '').strip()))[0]
+        safe_stem = ''.join(ch if ch.isalnum() or ch in ('-', '_') else '-' for ch in stem).strip('-_')
+        if not safe_stem:
+            safe_stem = 'uploaded-skill'
+        return safe_stem
+
+    @staticmethod
+    def _is_github_skill_md_url(asset_url: str) -> bool:
+        parsed = urlparse(str(asset_url or '').strip())
+        normalized_path = posixpath.normpath(parsed.path or '/')
+        return normalized_path.lower().endswith('/skill.md')
+
+    def _parse_github_skill_md_url(self, asset_url: str, *, owner: str, repo: str) -> dict:
+        parsed = urlparse(str(asset_url or '').strip())
+        if parsed.scheme != 'https' or not parsed.netloc:
+            raise ValueError('asset_url must be a valid HTTPS GitHub SKILL.md URL')
+
+        host = parsed.netloc.lower()
+        path_parts = [unquote(part) for part in (parsed.path or '').split('/') if part]
+        if host == 'github.com':
+            if (
+                len(path_parts) < 5
+                or path_parts[0] != owner
+                or path_parts[1] != repo
+                or path_parts[2]
+                not in (
+                    'blob',
+                    'raw',
+                )
+            ):
+                raise ValueError('GitHub SKILL.md URL must point to the requested owner/repo blob path')
+            ref = path_parts[3]
+            file_path = '/'.join(path_parts[4:])
+        elif host == 'raw.githubusercontent.com':
+            if len(path_parts) < 4 or path_parts[0] != owner or path_parts[1] != repo:
+                raise ValueError('GitHub SKILL.md URL must point to the requested owner/repo raw path')
+            ref = path_parts[2]
+            file_path = '/'.join(path_parts[3:])
+        else:
+            raise ValueError('asset_url must point to a GitHub SKILL.md file')
+
+        normalized_file_path = posixpath.normpath(file_path)
+        normalized_file_path_lower = normalized_file_path.lower()
+        if normalized_file_path_lower != 'skill.md' and not normalized_file_path_lower.endswith('/skill.md'):
+            raise ValueError('GitHub skill import requires a URL ending with SKILL.md')
+
+        parent_dir = posixpath.basename(posixpath.dirname(normalized_file_path)) or repo
+        return {
+            'ref': ref,
+            'file_path': normalized_file_path,
+            'package_name': self._uploaded_skill_target_stem(parent_dir),
+        }
+
+    def _resolve_github_skill_md_package_name(self, content: str, fallback: str) -> str:
+        metadata, _instructions = parse_frontmatter(content)
+        candidate = str(metadata.get('name') or fallback or '').strip()
+        try:
+            return self._validate_skill_name(candidate)
+        except ValueError:
+            return self._validate_skill_name(fallback)
+
+    @staticmethod
+    def _validate_github_asset_url(asset_url: str, *, owner: str, repo: str, release_tag: str) -> str:
+        parsed = urlparse(str(asset_url).strip())
+        if parsed.scheme != 'https' or not parsed.netloc:
+            raise ValueError('asset_url must be a valid HTTPS GitHub asset URL')
+
+        host = parsed.netloc.lower()
+        if host not in _GITHUB_ASSET_HOSTS:
+            raise ValueError('asset_url must point to a GitHub-hosted release asset or archive')
+
+        normalized_path = posixpath.normpath(parsed.path or '/')
+        allowed_prefixes = [
+            f'/repos/{owner}/{repo}/',
+            f'/{owner}/{repo}/',
+        ]
+        if not any(normalized_path.startswith(prefix) for prefix in allowed_prefixes):
+            raise ValueError('asset_url does not match the requested owner/repo')
+
+        if release_tag and release_tag not in parsed.path and release_tag not in parsed.query:
+            raise ValueError('asset_url does not match the requested release_tag')
+
+        return parsed.geturl()
+
+    @staticmethod
+    def _validate_skill_name(name: str) -> str:
+        name = str(name or '').strip()
+        if not name:
+            raise ValueError('Skill name is required')
+        if not name.replace('-', '').replace('_', '').isalnum():
+            raise ValueError('Skill name can only contain letters, numbers, hyphens and underscores')
+        if len(name) > 64:
+            raise ValueError('Skill name cannot exceed 64 characters')
+        return name
--- a/src/langbot/pkg/box/init.py
+++ b/src/langbot/pkg/box/init.py
@@ -0,0 +1,5 @@
+"""LangBot Box runtime package."""
+
+from .workspace import BoxWorkspaceSession
+
+__all__ = ['BoxWorkspaceSession']
--- a/src/langbot/pkg/box/connector.py
+++ b/src/langbot/pkg/box/connector.py
@@ -0,0 +1,354 @@
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+import sys
+import typing
+from typing import TYPE_CHECKING
+from urllib.parse import urlparse
+
+from langbot_plugin.entities.io.actions.enums import CommonAction
+from langbot_plugin.runtime.io.handler import Handler
+from langbot_plugin.runtime.io.connection import Connection
+
+from langbot_plugin.box.client import ActionRPCBoxClient
+from langbot_plugin.box.errors import BoxRuntimeUnavailableError
+from langbot_plugin.box.actions import LangBotToBoxAction
+
+from ..utils import platform
+from ..utils.managed_runtime import ManagedRuntimeConnector
+
+if TYPE_CHECKING:
+    from ..core import app as core_app
+
+
+# Default Docker Compose service name for the standalone Box container.
+_DOCKER_BOX_HOST = 'langbot_box'
+_DEFAULT_PORT = 5410
+
+_HEARTBEAT_INTERVAL_SEC = 20
+
+# Top-level keys under ``box`` that are LangBot-internal and should not be
+# forwarded to the Box runtime.
+_INTERNAL_BOX_CONFIG_KEYS = frozenset({'runtime'})
+
+
+def _get_box_config(ap) -> dict:
+    """Return the 'box' section from instance config.
+
+    Environment-variable overrides are handled uniformly by
+    ``LoadConfigStage._apply_env_overrides_to_config`` using the
+    ``SECTION__SUBSECTION__KEY`` convention (e.g. ``BOX__LOCAL__HOST_ROOT``,
+    ``BOX__LOCAL__ALLOWED_MOUNT_ROOTS="/a,/b"``) before this is read, so no
+    box-specific env parsing is needed here.
+    """
+    instance_config = getattr(ap, 'instance_config', None)
+    config_data = getattr(instance_config, 'data', {}) if instance_config is not None else {}
+    return dict(config_data.get('box', {}) or {})
+
+
+def _get_runtime_endpoint(box_cfg: dict) -> str:
+    runtime_cfg = box_cfg.get('runtime') or {}
+    return str(runtime_cfg.get('endpoint', '')).strip()
+
+
+def _filter_config_for_runtime(box_cfg: dict) -> dict:
+    return {k: v for k, v in box_cfg.items() if k not in _INTERNAL_BOX_CONFIG_KEYS}
+
+
+def resolve_box_ws_relay_url(ap: core_app.Application) -> str:
+    """Derive the WS relay base URL used for managed-process attach.
+
+    The WS relay serves the ``/v1/sessions/{id}/managed-process/ws`` endpoint
+    on the *relay* port (default 5410).
+    """
+    box_cfg = _get_box_config(ap)
+
+    # Explicit runtime endpoint takes precedence. The config value is a base
+    # URL; endpoint-specific paths are appended by the SDK client.
+    endpoint = _get_runtime_endpoint(box_cfg)
+    if endpoint:
+        parsed = urlparse(endpoint)
+        scheme = parsed.scheme or 'ws'
+        if scheme == 'ws':
+            scheme = 'http'
+        elif scheme == 'wss':
+            scheme = 'https'
+        host = parsed.hostname or '127.0.0.1'
+        port = parsed.port or _DEFAULT_PORT
+        return f'{scheme}://{host}:{port}'
+
+    # In Docker, relay lives on the box runtime container.
+    if platform.get_platform() == 'docker':
+        return f'http://{_DOCKER_BOX_HOST}:{_DEFAULT_PORT}'
+
+    return f'http://127.0.0.1:{_DEFAULT_PORT}'
+
+
+class BoxRuntimeConnector(ManagedRuntimeConnector):
+    """Connect to the Box runtime via action RPC.
+
+    Transport decision (mirrors Plugin runtime logic):
+      1. Docker / --standalone-box / explicit runtime.endpoint -> WebSocket to external Box process
+      2. Windows (non-Docker)                              -> subprocess + WebSocket (Windows lacks async stdio pipe)
+      3. Unix / macOS                                      -> subprocess + stdio pipe
+    """
+
+    def __init__(
+        self,
+        ap: core_app.Application,
+        runtime_disconnect_callback: typing.Callable[
+            ['BoxRuntimeConnector'], typing.Coroutine[typing.Any, typing.Any, None]
+        ]
+        | None = None,
+    ):
+        super().__init__(ap)
+        self.runtime_disconnect_callback = runtime_disconnect_callback
+        self.configured_runtime_endpoint = self._load_configured_runtime_endpoint()
+        self.ws_relay_base_url = resolve_box_ws_relay_url(ap)
+        self.client = ActionRPCBoxClient(logger=ap.logger)
+
+        self._handler: Handler | None = None
+        self._handler_task: asyncio.Task | None = None
+        self._ctrl_task: asyncio.Task | None = None
+        self._heartbeat_task: asyncio.Task | None = None
+
+        # Parse the relay URL once for reuse.
+        parsed = urlparse(self.ws_relay_base_url)
+        self._relay_host = parsed.hostname or '127.0.0.1'
+        self._relay_port = parsed.port or _DEFAULT_PORT
+        self._filtered_box_config = _filter_config_for_runtime(_get_box_config(ap))
+
+    def _uses_websocket(self) -> bool:
+        """Whether the connector should use WebSocket to reach the Box runtime.
+
+        True when:
+          - Running inside Docker (Box runtime is a separate container)
+          - The ``--standalone-box`` CLI flag was passed
+          - An explicit ``runtime.endpoint`` was configured
+        """
+        return bool(
+            self.configured_runtime_endpoint
+            or platform.get_platform() == 'docker'
+            or platform.use_websocket_to_connect_box_runtime()
+        )
+
+    async def initialize(self) -> None:
+        if self._uses_websocket():
+            if platform.get_platform() == 'win32' and not self.configured_runtime_endpoint:
+                await self._start_subprocess_then_ws()
+            else:
+                await self._connect_remote_ws()
+        else:
+            await self._start_local_stdio()
+
+        # Start heartbeat after successful connection
+        if self._heartbeat_task is None:
+            self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
+
+    # -- heartbeat -----------------------------------------------------------
+
+    async def _heartbeat_loop(self) -> None:
+        """Periodically ping the Box runtime to detect silent disconnections."""
+        while True:
+            await asyncio.sleep(_HEARTBEAT_INTERVAL_SEC)
+            try:
+                await self.ping()
+                self.ap.logger.debug('Heartbeat to Box runtime success.')
+            except Exception as e:
+                self.ap.logger.debug(f'Failed to heartbeat to Box runtime: {e}')
+
+    async def ping(self) -> None:
+        if self._handler is None:
+            raise BoxRuntimeUnavailableError('Box runtime is not connected')
+        await self._handler.call_action(CommonAction.PING, {})
+
+    # -- transport paths -----------------------------------------------------
+
+    async def _start_local_stdio(self) -> None:
+        """Launch box server as subprocess and connect via stdio (Unix/macOS)."""
+        from langbot_plugin.runtime.io.controllers.stdio.client import StdioClientController
+
+        self.ap.logger.info('Use stdio to connect to box runtime')
+        python_path = sys.executable
+        env = os.environ.copy()
+        if self._filtered_box_config:
+            env['LANGBOT_BOX_CONFIG'] = json.dumps(self._filtered_box_config)
+
+        connected = asyncio.Event()
+        connect_error: list[Exception] = []
+
+        ctrl = StdioClientController(
+            command=python_path,
+            # Launched through the same CLI entry point as the plugin runtime
+            # (cli.__init__ <subcommand>); `-s` selects the stdio transport,
+            # mirroring `rt -s`.
+            args=['-m', 'langbot_plugin.cli.__init__', 'box', '-s', '--ws-control-port', str(self._relay_port)],
+            env=env,
+        )
+        self._ctrl_task = asyncio.create_task(
+            ctrl.run(self._make_connection_callback('stdio', connected, connect_error))
+        )
+
+        try:
+            await asyncio.wait_for(connected.wait(), timeout=30.0)
+        except asyncio.TimeoutError:
+            raise BoxRuntimeUnavailableError('box runtime subprocess did not connect in time')
+
+        if connect_error:
+            raise BoxRuntimeUnavailableError(f'box runtime connection failed: {connect_error[0]}')
+
+        self._subprocess = ctrl.process
+
+    async def _start_subprocess_then_ws(self) -> None:
+        """Launch box server as detached subprocess, then connect via WS (Windows)."""
+        self.ap.logger.info('(windows) Use cmd to launch box runtime and communicate via ws')
+
+        env = os.environ.copy()
+        if self._filtered_box_config:
+            env['LANGBOT_BOX_CONFIG'] = json.dumps(self._filtered_box_config)
+
+        python_path = sys.executable
+        # Launched through the same CLI entry point as the plugin runtime
+        # (cli.__init__ <subcommand>); no flag => WebSocket transport.
+        self.runtime_subprocess = await asyncio.create_subprocess_exec(
+            python_path,
+            '-m',
+            'langbot_plugin.cli.__init__',
+            'box',
+            '--ws-control-port',
+            str(self._relay_port),
+            env=env,
+        )
+        self.runtime_subprocess_task = asyncio.create_task(self.runtime_subprocess.wait())
+
+        ws_url = f'ws://localhost:{self._relay_port}/rpc/ws'
+        await self._connect_ws(ws_url, '(windows) WebSocket')
+
+    async def _connect_remote_ws(self) -> None:
+        """Connect to a remote (or Docker) box server via WebSocket."""
+        ws_url = self._resolve_rpc_ws_url()
+        self.ap.logger.info(f'Use WebSocket to connect to box runtime ({ws_url})')
+        await self._connect_ws(ws_url, 'WebSocket')
+
+    # -- helpers -------------------------------------------------------------
+
+    def _resolve_rpc_ws_url(self) -> str:
+        """Determine the action-RPC WebSocket URL.
+
+        All endpoints share a single port; action RPC is at ``/rpc/ws``.
+        """
+        if self.configured_runtime_endpoint:
+            base = self.configured_runtime_endpoint.rstrip('/')
+            parsed = urlparse(base)
+            scheme = parsed.scheme or 'ws'
+            if scheme in ('http', 'https'):
+                scheme = 'wss' if scheme == 'https' else 'ws'
+            host = parsed.hostname or '127.0.0.1'
+            port = parsed.port or _DEFAULT_PORT
+            return f'{scheme}://{host}:{port}/rpc/ws'
+
+        if platform.get_platform() == 'docker':
+            return f'ws://{_DOCKER_BOX_HOST}:{_DEFAULT_PORT}/rpc/ws'
+
+        return f'ws://localhost:{self._relay_port}/rpc/ws'
+
+    async def _connect_ws(self, ws_url: str, transport_name: str) -> None:
+        """Shared WebSocket connection procedure."""
+        from langbot_plugin.runtime.io.controllers.ws.client import WebSocketClientController
+
+        connected = asyncio.Event()
+        connect_error: list[Exception] = []
+
+        async def on_connect_failed(ctrl, exc):
+            if exc is not None:
+                self.ap.logger.error(f'Failed to connect to Box runtime ({ws_url}): {exc}')
+            else:
+                self.ap.logger.error(f'Failed to connect to Box runtime ({ws_url}), trying to reconnect...')
+            connect_error.append(exc or BoxRuntimeUnavailableError('ws connection failed'))
+            connected.set()
+            if self.runtime_disconnect_callback is not None:
+                await self.runtime_disconnect_callback(self)
+
+        ctrl = WebSocketClientController(ws_url=ws_url, make_connection_failed_callback=on_connect_failed)
+        self._ctrl_task = asyncio.create_task(
+            ctrl.run(self._make_connection_callback(transport_name, connected, connect_error))
+        )
+
+        try:
+            await asyncio.wait_for(connected.wait(), timeout=30.0)
+        except asyncio.TimeoutError:
+            raise BoxRuntimeUnavailableError(f'box runtime ws connection timed out ({ws_url})')
+
+        if connect_error:
+            raise BoxRuntimeUnavailableError(f'box runtime connection failed: {connect_error[0]}')
+
+    def _make_connection_callback(
+        self,
+        transport_name: str,
+        connected: asyncio.Event,
+        connect_error: list[Exception],
+    ):
+        async def new_connection_callback(connection: Connection) -> None:
+            handler = Handler(connection)
+            self._handler = handler
+            self.client.set_handler(handler)
+            self._handler_task = asyncio.create_task(handler.run())
+            try:
+                await handler.call_action(CommonAction.PING, {})
+                if self._filtered_box_config:
+                    await handler.call_action(LangBotToBoxAction.INIT, self._filtered_box_config)
+                    self.ap.logger.debug('Sent box configuration to Box runtime via INIT.')
+                self.ap.logger.info(f'Connected to Box runtime via {transport_name}.')
+                connected.set()
+                await self._handler_task
+            except Exception as exc:
+                if not connected.is_set():
+                    connect_error.append(exc)
+                    connected.set()
+                    return
+
+            # If we reach here, handler.run() returned normally (connection
+            # closed) or raised after the initial handshake succeeded.
+            # Either way, treat it as a disconnect.
+            if connected.is_set():
+                if self._uses_websocket():
+                    self.ap.logger.error('Disconnected from Box runtime, trying to reconnect...')
+                    if self.runtime_disconnect_callback is not None:
+                        await self.runtime_disconnect_callback(self)
+                else:
+                    self.ap.logger.error(
+                        'Disconnected from Box runtime via stdio. '
+                        'Cannot automatically reconnect — please restart LangBot.'
+                    )
+
+        return new_connection_callback
+
+    # -- lifecycle -----------------------------------------------------------
+
+    def dispose(self) -> None:
+        if self._heartbeat_task is not None:
+            self._heartbeat_task.cancel()
+            self._heartbeat_task = None
+
+        if self._handler_task is not None:
+            self._handler_task.cancel()
+            self._handler_task = None
+
+        if self._ctrl_task is not None:
+            self._ctrl_task.cancel()
+            self._ctrl_task = None
+
+        # stdio-managed subprocess (stored as self._subprocess by _start_local_stdio)
+        if hasattr(self, '_subprocess') and self._subprocess is not None and self._subprocess.returncode is None:
+            self.ap.logger.info('Terminating managed box runtime process...')
+            self._subprocess.terminate()
+
+        # Subprocess launched by ManagedRuntimeConnector._start_runtime_subprocess (Windows path)
+        self._dispose_subprocess()
+
+    # -- config helpers ------------------------------------------------------
+
+    def _load_configured_runtime_endpoint(self) -> str:
+        return _get_runtime_endpoint(_get_box_config(self.ap))
--- a/src/langbot/pkg/box/policy.py
+++ b/src/langbot/pkg/box/policy.py
@@ -0,0 +1,98 @@
+"""Three-layer security policy for LangBot Box.
+
+The design separates concerns into three independent layers, aligned with
+OpenCode / OpenClaw patterns:
+
+1. **SandboxPolicy** – *where* tools run (host vs sandbox).
+2. **ToolPolicy** – *which* tools are allowed (allow/deny lists).
+3. **ElevatedPolicy** – *whether* a single exec call may temporarily
+   escape the default sandbox boundary.
+
+These three layers are orthogonal:
+- ToolPolicy is a hard boundary; ``elevated`` cannot bypass a denied tool.
+- SandboxPolicy decides the default execution location.
+- ElevatedPolicy only affects ``exec`` and only when the framework allows it.
+"""
+
+from __future__ import annotations
+
+import enum
+from typing import Sequence
+
+
+# ── Layer 1: Sandbox Policy ──────────────────────────────────────────
+
+
+class SandboxMode(str, enum.Enum):
+    """Determines when agent execution is routed through the sandbox."""
+
+    OFF = 'off'
+    """Sandbox disabled; all exec runs on the host."""
+
+    NON_DEFAULT = 'non_default'
+    """Only non-default sessions are sandboxed (e.g. sub-agents, MCP)."""
+
+    ALL = 'all'
+    """Every agent exec call is routed through the sandbox."""
+
+
+class SandboxPolicy:
+    """Decides whether a given execution context should use the sandbox."""
+
+    def __init__(self, mode: SandboxMode = SandboxMode.ALL):
+        self.mode = mode
+
+    def should_sandbox(self, *, is_default_session: bool = True) -> bool:
+        if self.mode == SandboxMode.OFF:
+            return False
+        if self.mode == SandboxMode.ALL:
+            return True
+        # NON_DEFAULT: sandbox everything except the default session
+        return not is_default_session
+
+
+# ── Layer 2: Tool Policy ─────────────────────────────────────────────
+
+
+class ToolPolicy:
+    """Controls which tools are available to the current agent/session.
+
+    Rules:
+    - ``deny`` always takes precedence over ``allow``.
+    - An empty ``allow`` list means "all tools allowed" (no allowlist filter).
+    - ``elevated`` cannot bypass a denied tool.
+    """
+
+    def __init__(
+        self,
+        allow: Sequence[str] = (),
+        deny: Sequence[str] = (),
+    ):
+        self._allow: frozenset[str] = frozenset(allow)
+        self._deny: frozenset[str] = frozenset(deny)
+
+    def is_tool_allowed(self, tool_name: str) -> bool:
+        if tool_name in self._deny:
+            return False
+        if self._allow and tool_name not in self._allow:
+            return False
+        return True
+
+
+# ── Layer 3: Elevated Policy ─────────────────────────────────────────
+
+
+class ElevatedPolicy:
+    """Controls whether ``exec`` may request temporary privilege escalation.
+
+    ``elevated`` only applies to the ``exec`` tool.  It means "run this
+    command outside the default sandbox boundary" (e.g. with network, or
+    on the host).  The framework decides whether to honor the request.
+    """
+
+    def __init__(self, *, allow_elevated: bool = False, require_approval: bool = True):
+        self.allow_elevated = allow_elevated
+        self.require_approval = require_approval
+
+    def is_elevation_permitted(self) -> bool:
+        return self.allow_elevated
--- a/src/langbot/pkg/box/service.py
+++ b/src/langbot/pkg/box/service.py
@@ -0,0 +1,794 @@
+from __future__ import annotations
+
+import asyncio
+import collections
+import datetime as _dt
+import enum
+import json
+import os
+from typing import TYPE_CHECKING
+
+import pydantic
+
+from langbot_plugin.box.client import BoxRuntimeClient
+from .connector import BoxRuntimeConnector, _get_box_config
+from langbot_plugin.box.errors import BoxError, BoxValidationError
+from langbot_plugin.box.models import (
+    BUILTIN_PROFILES,
+    BoxExecutionResult,
+    BoxManagedProcessInfo,
+    BoxManagedProcessSpec,
+    BoxProfile,
+    BoxSpec,
+)
+
+_INT_ADAPTER = pydantic.TypeAdapter(int)
+_UTC = _dt.timezone.utc
+_MAX_RECENT_ERRORS = 50
+_MIB = 1024 * 1024
+
+
+def _is_path_under(path: str, root: str) -> bool:
+    """Check whether *path* equals *root* or is a child of *root*."""
+    return path == root or path.startswith(f'{root}{os.sep}')
+
+
+if TYPE_CHECKING:
+    from ..core import app as core_app
+    import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+
+
+class BoxService:
+    def __init__(
+        self,
+        ap: core_app.Application,
+        client: BoxRuntimeClient | None = None,
+        output_limit_chars: int = 4000,
+    ):
+        self.ap = ap
+        self._enabled = self._load_enabled()
+        self._runtime_connector: BoxRuntimeConnector | None = None
+        if client is None:
+            # Always construct a connector — its __init__ is side-effect free
+            # (no I/O, no subprocess). When ``box.enabled = false`` we simply
+            # skip ``connector.initialize()`` so no connection is attempted.
+            self._runtime_connector = BoxRuntimeConnector(ap, runtime_disconnect_callback=self._on_runtime_disconnect)
+            client = self._runtime_connector.client
+        self.client = client
+        self.output_limit_chars = output_limit_chars
+        self.host_root = self._load_host_root()
+        self.allowed_mount_roots = self._load_allowed_mount_roots()
+        self.default_workspace = self._load_default_workspace()
+        self.profile = self._load_profile()
+        self.custom_image = self._load_custom_image()
+        self.workspace_quota_mb = self._load_workspace_quota_mb()
+        self._recent_errors: collections.deque[dict] = collections.deque(maxlen=_MAX_RECENT_ERRORS)
+        self._shutdown_task = None
+        self._available = False
+        self._connector_error: str = ''
+        self._reconnecting = False
+
+    @property
+    def enabled(self) -> bool:
+        """Whether Box is enabled in config. False means the operator has
+        deliberately turned the sandbox off via ``box.enabled = false``.
+        Disabled and "enabled but unavailable" are reported as the same
+        ``available = False`` to consumers, but distinguished in get_status."""
+        return self._enabled
+
+    async def initialize(self):
+        self._ensure_default_workspace()
+        if not self._enabled:
+            # Disabled by config: do NOT connect to a remote runtime, do NOT
+            # fork a stdio subprocess. Every consumer of box_service should
+            # gate on ``available`` and degrade gracefully.
+            self._available = False
+            self._connector_error = 'Box runtime is disabled in config (box.enabled = false)'
+            self.ap.logger.info(
+                'Box runtime disabled by config; sandbox features (exec/read/write/edit, '
+                'skill add/edit, stdio MCP) will be unavailable.'
+            )
+            return
+        try:
+            if self._runtime_connector is not None:
+                await self._runtime_connector.initialize()
+            else:
+                await self.client.initialize()
+            self._available = True
+            self._connector_error = ''
+            self.ap.logger.info(
+                f'LangBot Box runtime initialized: profile={self.profile.name} '
+                f'default_workspace={self.default_workspace or "(none)"}'
+            )
+        except Exception as exc:
+            self.ap.logger.warning(f'LangBot Box runtime unavailable, sandbox features disabled: {exc}')
+            self._available = False
+            self._connector_error = str(exc)
+
+    async def _on_runtime_disconnect(self, connector: BoxRuntimeConnector) -> None:
+        """Called by the connector when the Box runtime connection drops.
+
+        Spawns a background reconnection loop so the caller is not blocked.
+        Skipped entirely when Box is disabled by config — that path should
+        never have connected in the first place.
+        """
+        if not self._enabled:
+            return
+        if self._reconnecting:
+            return  # Another reconnect loop is already running
+        self._reconnecting = True
+        self._available = False
+        self._connector_error = 'Disconnected from Box runtime'
+        self.ap.logger.warning('Box runtime disconnected, sandbox features temporarily disabled.')
+        asyncio.create_task(self._reconnect_loop(connector))
+
+    async def _reconnect_loop(self, connector: BoxRuntimeConnector) -> None:
+        """Retry reconnection with exponential backoff (3s → 60s max)."""
+        delay = 3
+        max_delay = 60
+        try:
+            while True:
+                self.ap.logger.info(f'Attempting to reconnect to Box runtime in {delay}s...')
+                await asyncio.sleep(delay)
+                try:
+                    connector.dispose()
+                    await connector.initialize()
+                    self._available = True
+                    self._connector_error = ''
+                    self.ap.logger.info('Box runtime reconnected, sandbox features restored.')
+                    return
+                except Exception as exc:
+                    self._connector_error = str(exc)
+                    self.ap.logger.warning(f'Box runtime reconnection failed: {exc}')
+                    delay = min(delay * 2, max_delay)
+        finally:
+            self._reconnecting = False
+
+    @property
+    def available(self) -> bool:
+        return self._available
+
+    async def execute_spec_payload(
+        self,
+        spec_payload: dict,
+        query: pipeline_query.Query,
+        *,
+        skip_host_mount_validation: bool = False,
+    ) -> dict:
+        if not self._available:
+            raise BoxError('Box runtime is not available. Install and start Docker to use sandbox features.')
+        try:
+            spec = self.build_spec(spec_payload, skip_host_mount_validation=skip_host_mount_validation)
+        except BoxError as exc:
+            self._record_error(exc, query)
+            raise
+        self.ap.logger.info(
+            'LangBot Box request: '
+            f'query_id={query.query_id} '
+            f'spec={json.dumps(self._summarize_spec(spec), ensure_ascii=False)}'
+        )
+        try:
+            self._enforce_workspace_quota(spec, phase='before execution')
+        except BoxError as exc:
+            self._record_error(exc, query)
+            raise
+        try:
+            result = await self.client.execute(spec)
+        except BoxError as exc:
+            self._record_error(exc, query)
+            raise
+        try:
+            self._enforce_workspace_quota(spec, phase='after execution')
+        except BoxError as exc:
+            await self._cleanup_exceeded_session(spec)
+            self._record_error(exc, query)
+            raise
+        self.ap.logger.info(
+            'LangBot Box result: '
+            f'query_id={query.query_id} '
+            f'summary={json.dumps(self._summarize_result(result), ensure_ascii=False)}'
+        )
+        return self._serialize_result(result)
+
+    def resolve_box_session_id(self, query: pipeline_query.Query) -> str:
+        """Resolve the Box session_id from the pipeline's template and query variables."""
+        template = (
+            (query.pipeline_config or {})
+            .get('ai', {})
+            .get('local-agent', {})
+            .get('box-session-id-template', '{launcher_type}_{launcher_id}')
+        )
+        variables = dict(query.variables or {})
+        launcher_type = getattr(query, 'launcher_type', None)
+        if hasattr(launcher_type, 'value'):
+            launcher_type = launcher_type.value
+        launcher_id = getattr(query, 'launcher_id', None)
+        sender_id = getattr(query, 'sender_id', None)
+        query_id = getattr(query, 'query_id', None)
+
+        variables.setdefault('query_id', str(query_id or 'unknown'))
+        variables.setdefault('launcher_type', str(launcher_type or 'query'))
+        variables.setdefault('launcher_id', str(launcher_id or query_id or 'unknown'))
+        variables.setdefault('sender_id', str(sender_id or launcher_id or query_id or 'unknown'))
+        variables.setdefault('global', 'global')
+        return template.format_map(collections.defaultdict(lambda: 'unknown', variables))
+
+    def build_skill_extra_mounts(self, query: pipeline_query.Query) -> list[dict]:
+        """Build extra_mounts entries for all pipeline-bound skills.
+
+        This ensures that when a container is first created it already has
+        all skill packages mounted, regardless of which skill is currently
+        activated.
+
+        Skills whose ``package_root`` is missing or no longer a directory on
+        the LangBot-visible filesystem are skipped with a warning instead of
+        being passed through to the backend. Without this guard the three
+        backends behave inconsistently on a stale mount: nsjail refuses to
+        start the sandbox (failing every exec in the session), Docker
+        silently auto-creates a root-owned empty directory on the host, and
+        E2B silently skips the upload — none of which surfaces an
+        actionable error to the agent or operator.
+        """
+        skill_mgr = getattr(self.ap, 'skill_mgr', None)
+        if skill_mgr is None:
+            return []
+
+        from ..provider.tools.loaders import skill as skill_loader
+
+        visible_skills = skill_loader.get_visible_skills(self.ap, query)
+        mounts: list[dict] = []
+        for skill_name, skill_data in visible_skills.items():
+            package_root = str(skill_data.get('package_root', '') or '').strip()
+            if not package_root:
+                continue
+            if not os.path.isdir(package_root):
+                self.ap.logger.warning(
+                    f'Skill "{skill_name}" package_root missing on filesystem '
+                    f'({package_root}); skipping mount to prevent sandbox failures. '
+                    f'The skill cache may be stale — consider reloading skills.'
+                )
+                continue
+            mounts.append(
+                {
+                    'host_path': package_root,
+                    'mount_path': f'/workspace/.skills/{skill_name}',
+                    'mode': 'rw',
+                }
+            )
+        return mounts
+
+    async def execute_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        """Execute an agent-facing ``exec`` tool call.
+
+        Translates the agent-facing ``command`` field to the internal
+        ``BoxSpec.cmd`` field and injects the session id from the query.
+        """
+        spec_payload: dict = {'cmd': parameters['command']}
+
+        # Pass through allowed agent-facing fields
+        for key in ('workdir', 'timeout_sec', 'env'):
+            if key in parameters:
+                spec_payload[key] = parameters[key]
+
+        # Inject context the agent must not control
+        spec_payload.setdefault('session_id', self.resolve_box_session_id(query))
+
+        # Mount all pipeline-bound skills so they are available in the container
+        if 'extra_mounts' not in spec_payload:
+            spec_payload['extra_mounts'] = self.build_skill_extra_mounts(query)
+
+        return await self.execute_spec_payload(spec_payload, query)
+
+    async def shutdown(self):
+        await self.client.shutdown()
+
+    def dispose(self):
+        if self._runtime_connector is not None:
+            self._runtime_connector.dispose()
+        loop = getattr(self.ap, 'event_loop', None)
+        if loop is not None and not loop.is_closed() and (self._shutdown_task is None or self._shutdown_task.done()):
+            self._shutdown_task = loop.create_task(self.shutdown())
+
+    async def get_sessions(self) -> list[dict]:
+        if not self._available:
+            return []
+        try:
+            return await self.client.get_sessions()
+        except Exception:
+            return []
+
+    def build_spec(self, spec_payload: dict, skip_host_mount_validation: bool = False) -> BoxSpec:
+        spec_payload = dict(spec_payload)
+        spec_payload.setdefault('env', {})
+        if spec_payload.get('host_path') in (None, '') and self.default_workspace is not None:
+            spec_payload['host_path'] = self.default_workspace
+        if spec_payload.get('workspace_quota_mb') in (None, '') and self.workspace_quota_mb is not None:
+            spec_payload['workspace_quota_mb'] = self.workspace_quota_mb
+
+        # Global custom image overrides profile default (but not caller-specified image)
+        if self.custom_image and 'image' not in spec_payload:
+            spec_payload['image'] = self.custom_image
+
+        self._apply_profile(spec_payload)
+
+        try:
+            spec = BoxSpec.model_validate(spec_payload)
+        except pydantic.ValidationError as exc:
+            first_error = exc.errors()[0]
+            raise BoxValidationError(first_error.get('msg', 'invalid box arguments')) from exc
+
+        if not skip_host_mount_validation:
+            self._validate_host_mount(spec)
+        return spec
+
+    async def create_session(self, spec_payload: dict, *, skip_host_mount_validation: bool = False) -> dict:
+        spec = self.build_spec(spec_payload, skip_host_mount_validation=skip_host_mount_validation)
+        return await self.client.create_session(spec)
+
+    async def start_managed_process(self, session_id: str, process_payload: dict) -> BoxManagedProcessInfo:
+        process_spec = BoxManagedProcessSpec.model_validate(process_payload)
+        return await self.client.start_managed_process(session_id, process_spec)
+
+    async def get_managed_process(self, session_id: str, process_id: str = 'default') -> BoxManagedProcessInfo:
+        return await self.client.get_managed_process(session_id, process_id)
+
+    async def stop_managed_process(self, session_id: str, process_id: str = 'default') -> None:
+        return await self.client.stop_managed_process(session_id, process_id)
+
+    def get_managed_process_websocket_url(self, session_id: str, process_id: str = 'default') -> str:
+        getter = getattr(self.client, 'get_managed_process_websocket_url', None)
+        if getter is None:
+            raise BoxValidationError('box runtime client does not support managed process websocket attach')
+        ws_relay_base_url = (
+            self._runtime_connector.ws_relay_base_url
+            if self._runtime_connector is not None
+            else 'http://127.0.0.1:5410'
+        )
+        return getter(session_id, ws_relay_base_url, process_id)
+
+    async def list_skills(self) -> list[dict]:
+        return await self.client.list_skills()
+
+    async def get_skill(self, name: str) -> dict | None:
+        return await self.client.get_skill(name)
+
+    async def create_skill(self, skill: dict) -> dict:
+        return await self.client.create_skill(skill)
+
+    async def update_skill(self, name: str, skill: dict) -> dict:
+        return await self.client.update_skill(name, skill)
+
+    async def delete_skill(self, name: str) -> None:
+        await self.client.delete_skill(name)
+
+    async def scan_skill_directory(self, path: str) -> dict:
+        return await self.client.scan_skill_directory(path)
+
+    async def list_skill_files(
+        self,
+        name: str,
+        path: str = '.',
+        include_hidden: bool = False,
+        max_entries: int = 200,
+    ) -> dict:
+        return await self.client.list_skill_files(name, path, include_hidden, max_entries)
+
+    async def read_skill_file(self, name: str, path: str) -> dict:
+        return await self.client.read_skill_file(name, path)
+
+    async def write_skill_file(self, name: str, path: str, content: str) -> dict:
+        return await self.client.write_skill_file(name, path, content)
+
+    async def preview_skill_zip(
+        self,
+        file_bytes: bytes,
+        filename: str,
+        source_subdir: str = '',
+        target_suffix: str = 'upload',
+    ) -> list[dict]:
+        return await self.client.preview_skill_zip(file_bytes, filename, source_subdir, target_suffix)
+
+    async def install_skill_zip(
+        self,
+        file_bytes: bytes,
+        filename: str,
+        source_paths: list[str] | None = None,
+        source_path: str = '',
+        source_subdir: str = '',
+        target_suffix: str = 'upload',
+    ) -> list[dict]:
+        return await self.client.install_skill_zip(
+            file_bytes,
+            filename,
+            source_paths,
+            source_path,
+            source_subdir,
+            target_suffix,
+        )
+
+    def _serialize_result(self, result: BoxExecutionResult) -> dict:
+        stdout, stdout_truncated = self._truncate(result.stdout)
+        stderr, stderr_truncated = self._truncate(result.stderr)
+
+        return {
+            'session_id': result.session_id,
+            'backend': result.backend_name,
+            'status': result.status.value,
+            'ok': result.ok,
+            'exit_code': result.exit_code,
+            'stdout': stdout,
+            'stderr': stderr,
+            'stdout_truncated': stdout_truncated,
+            'stderr_truncated': stderr_truncated,
+            'duration_ms': result.duration_ms,
+        }
+
+    def _truncate(self, text: str) -> tuple[str, bool]:
+        if len(text) <= self.output_limit_chars:
+            return text, False
+        if self.output_limit_chars <= 0:
+            return '', True
+
+        head_size = 0
+        tail_size = 0
+        notice = ''
+        # Recompute once the omitted count is known so the final payload
+        # stays within output_limit_chars even after adding the notice.
+        for _ in range(4):
+            omitted = max(len(text) - head_size - tail_size, 0)
+            notice = f'\n\n... [{omitted} characters truncated] ...\n\n'
+            available = self.output_limit_chars - len(notice)
+            if available <= 0:
+                return notice[: self.output_limit_chars], True
+
+            new_head_size = int(available * 0.6)
+            new_tail_size = available - new_head_size
+            if new_head_size == head_size and new_tail_size == tail_size:
+                break
+            head_size = new_head_size
+            tail_size = new_tail_size
+
+        head = text[:head_size]
+        tail = text[-tail_size:] if tail_size else ''
+        truncated = f'{head}{notice}{tail}'
+        return truncated[: self.output_limit_chars], True
+
+    def _summarize_spec(self, spec: BoxSpec) -> dict:
+        cmd = spec.cmd.strip()
+        if len(cmd) > 400:
+            cmd = f'{cmd[:397]}...'
+
+        return {
+            'session_id': spec.session_id,
+            'workdir': spec.workdir,
+            'mount_path': spec.mount_path,
+            'timeout_sec': spec.timeout_sec,
+            'network': spec.network.value,
+            'image': spec.image,
+            'host_path': spec.host_path,
+            'host_path_mode': spec.host_path_mode.value,
+            'cpus': spec.cpus,
+            'memory_mb': spec.memory_mb,
+            'pids_limit': spec.pids_limit,
+            'read_only_rootfs': spec.read_only_rootfs,
+            'workspace_quota_mb': spec.workspace_quota_mb,
+            'env_keys': sorted(spec.env.keys()),
+            'cmd': cmd,
+        }
+
+    def _summarize_result(self, result: BoxExecutionResult) -> dict:
+        stdout_preview = result.stdout[:200]
+        stderr_preview = result.stderr[:200]
+        if len(result.stdout) > 200:
+            stdout_preview = f'{stdout_preview}...'
+        if len(result.stderr) > 200:
+            stderr_preview = f'{stderr_preview}...'
+
+        return {
+            'session_id': result.session_id,
+            'backend': result.backend_name,
+            'status': result.status.value,
+            'exit_code': result.exit_code,
+            'duration_ms': result.duration_ms,
+            'stdout_preview': stdout_preview,
+            'stderr_preview': stderr_preview,
+        }
+
+    def _local_config(self) -> dict:
+        """Return ``box.local`` from instance config.
+
+        Environment overrides are applied uniformly by
+        ``LoadConfigStage._apply_env_overrides_to_config`` (e.g.
+        ``BOX__LOCAL__HOST_ROOT``) before this is read, so no box-specific
+        env parsing happens here.
+        """
+        return dict(_get_box_config(self.ap).get('local') or {})
+
+    def _load_allowed_mount_roots(self) -> list[str]:
+        configured_roots = self._local_config().get('allowed_mount_roots', [])
+        # The unified env-override mechanism stores a brand-new key as a raw
+        # string when the key is absent from config.yaml. Accept a
+        # comma-separated string as well as a list so that
+        # ``BOX__LOCAL__ALLOWED_MOUNT_ROOTS="/a,/b"`` keeps working even when
+        # the config file has no ``box.local.allowed_mount_roots`` entry.
+        if isinstance(configured_roots, str):
+            configured_roots = [item.strip() for item in configured_roots.split(',') if item.strip()]
+
+        normalized_roots: list[str] = []
+        for root in configured_roots:
+            root_value = str(root).strip()
+            if not root_value:
+                continue
+            normalized_roots.append(os.path.realpath(os.path.abspath(root_value)))
+
+        if not normalized_roots and self.host_root is not None:
+            normalized_roots.append(self.host_root)
+
+        return normalized_roots
+
+    def _load_host_root(self) -> str | None:
+        host_root = str(self._local_config().get('host_root', '')).strip()
+        if not host_root:
+            return None
+        return os.path.realpath(os.path.abspath(host_root))
+
+    def _load_default_workspace(self) -> str | None:
+        default_workspace = str(self._local_config().get('default_workspace', '')).strip()
+        if not default_workspace:
+            if self.host_root is None:
+                return None
+            default_workspace = os.path.join(self.host_root, 'default')
+        elif not os.path.isabs(default_workspace) and self.host_root is not None:
+            default_workspace = os.path.join(self.host_root, default_workspace)
+        return os.path.realpath(os.path.abspath(default_workspace))
+
+    def get_skills_root(self) -> str | None:
+        skills_root = str(self._local_config().get('skills_root', '') or 'skills').strip()
+        if not skills_root:
+            skills_root = 'skills'
+        if not os.path.isabs(skills_root) and self.host_root is not None:
+            skills_root = os.path.join(self.host_root, skills_root)
+        return os.path.realpath(os.path.abspath(skills_root))
+
+    def _load_enabled(self) -> bool:
+        """Read ``box.enabled`` (top-level, not ``box.local.*``). Default True
+        — disabling is opt-in. Accepts bool, ``'true'``/``'false'`` strings,
+        and the standard env-overridden truthy values that
+        ``LoadConfigStage._apply_env_overrides_to_config`` produces."""
+        raw = _get_box_config(self.ap).get('enabled', True)
+        if isinstance(raw, bool):
+            return raw
+        return str(raw).strip().lower() not in ('false', '0', 'no', 'off', '')
+
+    def _load_custom_image(self) -> str | None:
+        raw = str(self._local_config().get('image', '') or '').strip()
+        return raw or None
+
+    def _load_workspace_quota_mb(self) -> int | None:
+        raw_value = self._local_config().get('workspace_quota_mb')
+        if raw_value in (None, ''):
+            return None
+        try:
+            value = _INT_ADAPTER.validate_python(raw_value)
+        except pydantic.ValidationError as exc:
+            raise BoxValidationError('workspace_quota_mb must be an integer greater than or equal to 0') from exc
+        if value < 0:
+            raise BoxValidationError('workspace_quota_mb must be greater than or equal to 0')
+        return value
+
+    def _ensure_default_workspace(self):
+        if self.default_workspace is None:
+            return
+
+        if os.path.isdir(self.default_workspace):
+            return
+
+        if os.path.exists(self.default_workspace):
+            raise BoxValidationError('box.local.default_workspace must point to a directory on the host')
+
+        if not self.allowed_mount_roots:
+            raise BoxValidationError(
+                'box.local.default_workspace cannot be created because no allowed_mount_roots are configured'
+            )
+
+        for allowed_root in self.allowed_mount_roots:
+            if _is_path_under(self.default_workspace, allowed_root):
+                os.makedirs(self.default_workspace, exist_ok=True)
+                return
+
+        allowed_roots = ', '.join(self.allowed_mount_roots)
+        raise BoxValidationError(f'box.local.default_workspace is outside allowed_mount_roots: {allowed_roots}')
+
+    def _validate_host_mount(self, spec: BoxSpec):
+        if spec.host_path is None:
+            return
+
+        host_path = os.path.realpath(spec.host_path)
+        if not os.path.isdir(host_path):
+            raise BoxValidationError('host_path must point to an existing directory on the host')
+
+        if not self.allowed_mount_roots:
+            raise BoxValidationError('host_path mounting is disabled because no allowed_mount_roots are configured')
+
+        for allowed_root in self.allowed_mount_roots:
+            if _is_path_under(host_path, allowed_root):
+                return
+
+        allowed_roots = ', '.join(self.allowed_mount_roots)
+        raise BoxValidationError(f'host_path is outside allowed_mount_roots: {allowed_roots}')
+
+    def _load_profile(self) -> BoxProfile:
+        profile_name = str(self._local_config().get('profile', 'default')).strip() or 'default'
+
+        profile = BUILTIN_PROFILES.get(profile_name)
+        if profile is None:
+            available = ', '.join(sorted(BUILTIN_PROFILES))
+            raise BoxValidationError(f"unknown box profile '{profile_name}', available profiles: {available}")
+        return profile
+
+    def _apply_profile(self, params: dict):
+        """Merge profile defaults into *params* in-place, enforce locked fields and clamp timeout."""
+        profile = self.profile
+        _PROFILE_FIELDS = (
+            'image',
+            'network',
+            'timeout_sec',
+            'host_path_mode',
+            'cpus',
+            'memory_mb',
+            'pids_limit',
+            'read_only_rootfs',
+            'workspace_quota_mb',
+        )
+
+        for field in _PROFILE_FIELDS:
+            profile_value = getattr(profile, field)
+            raw_value = profile_value.value if isinstance(profile_value, enum.Enum) else profile_value
+
+            if field in profile.locked:
+                params[field] = raw_value
+            elif field not in params:
+                params[field] = raw_value
+
+        timeout = params.get('timeout_sec')
+        try:
+            normalized_timeout = _INT_ADAPTER.validate_python(timeout)
+        except pydantic.ValidationError:
+            return
+
+        if normalized_timeout > profile.max_timeout_sec:
+            params['timeout_sec'] = profile.max_timeout_sec
+
+    def _get_workspace_size_bytes(self, root: str) -> int:
+        total = 0
+
+        def _walk(path: str):
+            nonlocal total
+            try:
+                with os.scandir(path) as entries:
+                    for entry in entries:
+                        try:
+                            if entry.is_symlink():
+                                total += entry.stat(follow_symlinks=False).st_size
+                                continue
+                            if entry.is_dir(follow_symlinks=False):
+                                _walk(entry.path)
+                                continue
+                            total += entry.stat(follow_symlinks=False).st_size
+                        except FileNotFoundError:
+                            continue
+            except FileNotFoundError:
+                return
+
+        _walk(root)
+        return total
+
+    def _enforce_workspace_quota(self, spec: BoxSpec, *, phase: str) -> None:
+        if spec.host_path is None or spec.workspace_quota_mb <= 0:
+            return
+
+        host_path = os.path.realpath(spec.host_path)
+        if not os.path.isdir(host_path):
+            return
+
+        used_bytes = self._get_workspace_size_bytes(host_path)
+        limit_bytes = spec.workspace_quota_mb * _MIB
+        if used_bytes <= limit_bytes:
+            return
+
+        raise BoxValidationError(
+            f'workspace quota exceeded {phase}: '
+            f'used={used_bytes} bytes limit={limit_bytes} bytes '
+            f'host_path={host_path} session_id={spec.session_id}'
+        )
+
+    async def _cleanup_exceeded_session(self, spec: BoxSpec) -> None:
+        try:
+            await self.client.delete_session(spec.session_id)
+        except Exception as exc:
+            self.ap.logger.warning(
+                'Failed to clean up Box session after workspace quota was exceeded: '
+                f'session_id={spec.session_id} error={exc}'
+            )
+
+    # ── Observability ─────────────────────────────────────────────────
+
+    def _record_error(self, exc: Exception, query: pipeline_query.Query):
+        self._recent_errors.append(
+            {
+                'timestamp': _dt.datetime.now(_UTC).isoformat(),
+                'type': type(exc).__name__,
+                'message': str(exc),
+                'query_id': str(query.query_id),
+            }
+        )
+
+    def get_recent_errors(self) -> list[dict]:
+        return list(self._recent_errors)
+
+    def get_system_guidance(self) -> str:
+        """Return LLM system-prompt guidance for the exec tool.
+
+        All execution-specific prompt text is kept here so that callers
+        (e.g. LocalAgentRunner) stay free of box domain knowledge.
+        """
+        guidance = (
+            'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, '
+            'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
+            'JSON, or other data and asks for a computed answer, prefer running a short Python script via exec '
+            'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation '
+            'details, do not include the generated script in the final answer; return the result and a brief explanation only.'
+        )
+        if self.default_workspace:
+            guidance += (
+                ' A default workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
+                'modify local files in the working directory, use exec with /workspace paths directly; do not ask the '
+                'user for directory parameters unless they explicitly need a different directory.'
+            )
+        return guidance
+
+    async def get_status(self) -> dict:
+        if not self._available:
+            return {
+                'available': False,
+                'enabled': self._enabled,
+                'profile': self.profile.name,
+                'recent_error_count': len(self._recent_errors),
+                'connector_error': self._connector_error,
+            }
+        try:
+            runtime_status = await self.client.get_status()
+        except Exception as exc:
+            # RPC failed — the runtime likely just disconnected and the
+            # heartbeat hasn't flipped _available yet.
+            return {
+                'available': False,
+                'enabled': self._enabled,
+                'profile': self.profile.name,
+                'recent_error_count': len(self._recent_errors),
+                'connector_error': str(exc),
+            }
+        # Backend state can be unavailable even when the connector is healthy
+        # (operator selected nsjail but the binary is missing, Docker daemon
+        # went down after the runtime started, E2B credentials wrong, ...).
+        # Report the combined state in the top-level ``available`` so the
+        # frontend banner / ``useBoxStatus`` hook / native-tool gate all
+        # agree on "actually usable" rather than "connector alive". The
+        # detailed ``backend`` object stays in the payload so the dialog
+        # can still show which backend was tried.
+        backend_info = runtime_status.get('backend') if isinstance(runtime_status, dict) else None
+        backend_ok = bool(backend_info and backend_info.get('available', False))
+        payload = {
+            **runtime_status,
+            'available': backend_ok,
+            'enabled': self._enabled,
+            'profile': self.profile.name,
+            'recent_error_count': len(self._recent_errors),
+        }
+        if not backend_ok and 'connector_error' not in payload:
+            backend_name = backend_info.get('name') if backend_info else None
+            if backend_name:
+                payload['connector_error'] = f'Configured sandbox backend "{backend_name}" is unavailable'
+            else:
+                payload['connector_error'] = 'No supported sandbox backend (Docker / nsjail / E2B) is available'
+        return payload
--- a/src/langbot/pkg/box/workspace.py
+++ b/src/langbot/pkg/box/workspace.py
@@ -0,0 +1,413 @@
+"""Reusable workspace/session helpers built on top of Box.
+
+This module is the middle layer between the raw Box runtime primitives and
+application-specific flows such as skills or MCP stdio.
+
+It intentionally stays generic:
+- path and virtualenv rewriting are workspace concerns
+- Python project detection/bootstrap are workspace concerns
+- session exec / managed-process helpers are workspace concerns
+
+Higher layers add their own semantics on top, for example:
+- skills choose a stable per-skill session id and use repeated exec
+- MCP stdio chooses how to prepare dependencies and attaches to a managed process
+"""
+
+from __future__ import annotations
+
+import os
+import textwrap
+from typing import Any
+
+PYTHON_MANIFEST_FILES = (
+    'requirements.txt',
+    'pyproject.toml',
+    'setup.py',
+    'setup.cfg',
+)
+_VENV_DIRS = frozenset({'.venv', 'venv', 'env', '.env'})
+_VENV_BIN_DIRS = frozenset({'bin', 'Scripts'})
+
+
+def normalize_host_path(path: str | None) -> str:
+    if path is None:
+        return ''
+    stripped = str(path).strip()
+    if not stripped:
+        return ''
+    return os.path.realpath(os.path.abspath(stripped))
+
+
+def rewrite_mounted_path(path: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
+    """Translate a host path into the path visible inside the sandbox mount."""
+    if not host_path or not path:
+        return path
+    normalized_host = os.path.realpath(host_path)
+    normalized_path = os.path.realpath(path)
+    if normalized_path.startswith(normalized_host + '/'):
+        return mount_path + normalized_path[len(normalized_host) :]
+    if normalized_path == normalized_host:
+        return mount_path
+    return path
+
+
+def unwrap_venv_path(directory: str) -> str:
+    """Collapse ``.../.venv/bin`` style paths back to the project root."""
+    parts = directory.replace('\\', '/').split('/')
+    for i in range(len(parts) - 1, 0, -1):
+        if parts[i] in _VENV_BIN_DIRS and i >= 1:
+            venv_dir = parts[i - 1]
+            if venv_dir in _VENV_DIRS:
+                project_root = '/'.join(parts[: i - 1])
+                return project_root if project_root else '/'
+    return directory
+
+
+def infer_workspace_host_path(command: str, args: list[str] | None = None) -> str | None:
+    """Infer the project/workspace root from absolute command/arg paths."""
+    candidates: list[str] = []
+    for part in [command, *(args or [])]:
+        if not os.path.isabs(part):
+            continue
+        if os.path.exists(part):
+            directory = os.path.dirname(part)
+            candidates.append(os.path.realpath(unwrap_venv_path(directory)))
+    if not candidates:
+        return None
+    common = os.path.commonpath(candidates)
+    return common if common != '/' else None
+
+
+def rewrite_venv_command(command: str, host_path: str | None, *, mount_path: str = '/workspace') -> str:
+    """Rewrite host venv interpreters to plain ``python`` inside the sandbox.
+
+    Once a project is mounted into the sandbox, host virtualenv paths are no
+    longer valid. For those paths we intentionally drop down to ``python`` and
+    let the sandbox-side environment/bootstrap decide what interpreter to use.
+    """
+    if not host_path or not command:
+        return command
+    normalized_host = os.path.realpath(host_path)
+    normalized_command = os.path.realpath(command)
+    if not normalized_command.startswith(normalized_host + '/'):
+        return command
+    rel = normalized_command[len(normalized_host) + 1 :]
+    parts = rel.replace('\\', '/').split('/')
+    if len(parts) >= 3 and parts[0] in _VENV_DIRS and parts[1] in _VENV_BIN_DIRS and parts[2].startswith('python'):
+        return 'python'
+    return rewrite_mounted_path(normalized_command, host_path, mount_path=mount_path)
+
+
+def list_python_manifest_files(host_path: str | None) -> list[str]:
+    normalized_root = normalize_host_path(host_path)
+    if not normalized_root:
+        return []
+    return [filename for filename in PYTHON_MANIFEST_FILES if os.path.isfile(os.path.join(normalized_root, filename))]
+
+
+def classify_python_workspace(host_path: str | None) -> str | None:
+    """Return the generic Python workspace shape, without app-specific policy."""
+    manifest_files = set(list_python_manifest_files(host_path))
+    if not manifest_files:
+        return None
+    if {'pyproject.toml', 'setup.py', 'setup.cfg'} & manifest_files:
+        return 'package'
+    if 'requirements.txt' in manifest_files:
+        return 'requirements'
+    return None
+
+
+def should_prepare_python_env(host_path: str | None) -> bool:
+    normalized_root = normalize_host_path(host_path)
+    if not normalized_root:
+        return False
+    if os.path.isdir(os.path.join(normalized_root, '.venv')):
+        return True
+    return bool(list_python_manifest_files(normalized_root))
+
+
+def wrap_python_command_with_env(command: str, *, mount_path: str = '/workspace') -> str:
+    """Wrap a command with a reusable sandbox-local Python env bootstrap.
+
+    This is the generic "workspace is a Python project" path used by mutable
+    workspaces such as skills. Read-only installation strategies stay in the
+    higher-level caller because they are application policy, not workspace
+    semantics.
+    """
+    bootstrap = textwrap.dedent(
+        f"""
+        set -e
+
+        _LB_VENV_DIR="{mount_path}/.venv"
+        _LB_META_DIR="{mount_path}/.langbot"
+        _LB_META_FILE="$_LB_META_DIR/python-env.json"
+        _LB_LOCK_DIR="$_LB_META_DIR/python-env.lock"
+        _LB_TMP_DIR="{mount_path}/.tmp"
+        _LB_PIP_CACHE_DIR="{mount_path}/.cache/pip"
+
+        mkdir -p "$_LB_META_DIR" "$_LB_TMP_DIR" "$_LB_PIP_CACHE_DIR"
+        export TMPDIR="$_LB_TMP_DIR"
+        export TEMP="$_LB_TMP_DIR"
+        export TMP="$_LB_TMP_DIR"
+        export PIP_CACHE_DIR="$_LB_PIP_CACHE_DIR"
+
+        _lb_python_meta() {{
+          python - <<'PY'
+        import hashlib
+        import json
+        import os
+        import sys
+
+        root = "{mount_path}"
+        digest = hashlib.sha256()
+        manifest_files = []
+        for rel in ("requirements.txt", "pyproject.toml", "setup.py", "setup.cfg"):
+            path = os.path.join(root, rel)
+            if not os.path.isfile(path):
+                continue
+            manifest_files.append(rel)
+            with open(path, "rb") as handle:
+                digest.update(rel.encode("utf-8"))
+                digest.update(b"\\0")
+                digest.update(handle.read())
+                digest.update(b"\\0")
+
+        print(
+            json.dumps(
+                {{
+                    "python_executable": sys.executable,
+                    "python_version": list(sys.version_info[:3]),
+                    "manifest_files": manifest_files,
+                    "manifest_sha256": digest.hexdigest(),
+                }},
+                sort_keys=True,
+            )
+        )
+        PY
+        }}
+
+        _LB_CURRENT_META="$(_lb_python_meta)"
+        _LB_NEEDS_BOOTSTRAP=0
+
+        if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
+          _LB_NEEDS_BOOTSTRAP=1
+        elif [ ! -f "$_LB_META_FILE" ]; then
+          _LB_NEEDS_BOOTSTRAP=1
+        elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
+          _LB_NEEDS_BOOTSTRAP=1
+        fi
+
+        if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
+          _LB_LOCK_WAIT=0
+          while ! mkdir "$_LB_LOCK_DIR" 2>/dev/null; do
+            if [ "$_LB_LOCK_WAIT" -ge 120 ]; then
+              echo "Timed out waiting for Python environment lock: $_LB_LOCK_DIR" >&2
+              exit 1
+            fi
+            sleep 1
+            _LB_LOCK_WAIT=$((_LB_LOCK_WAIT + 1))
+          done
+
+          _lb_cleanup_lock() {{
+            rmdir "$_LB_LOCK_DIR" >/dev/null 2>&1 || true
+          }}
+          trap _lb_cleanup_lock EXIT INT TERM
+
+          _LB_CURRENT_META="$(_lb_python_meta)"
+          _LB_NEEDS_BOOTSTRAP=0
+          if [ ! -x "$_LB_VENV_DIR/bin/python" ]; then
+            _LB_NEEDS_BOOTSTRAP=1
+          elif [ ! -f "$_LB_META_FILE" ]; then
+            _LB_NEEDS_BOOTSTRAP=1
+          elif [ "$(cat "$_LB_META_FILE")" != "$_LB_CURRENT_META" ]; then
+            _LB_NEEDS_BOOTSTRAP=1
+          fi
+
+          if [ "$_LB_NEEDS_BOOTSTRAP" -eq 1 ]; then
+            rm -rf "$_LB_VENV_DIR"
+            python -m venv "$_LB_VENV_DIR"
+            . "$_LB_VENV_DIR/bin/activate"
+            python -m pip install --upgrade pip setuptools wheel
+            if [ -f "{mount_path}/requirements.txt" ]; then
+              python -m pip install -r "{mount_path}/requirements.txt"
+            elif [ -f "{mount_path}/pyproject.toml" ] || [ -f "{mount_path}/setup.py" ] || [ -f "{mount_path}/setup.cfg" ]; then
+              python -m pip install "{mount_path}"
+            fi
+            printf '%s' "$_LB_CURRENT_META" > "$_LB_META_FILE"
+          fi
+        fi
+
+        export VIRTUAL_ENV="$_LB_VENV_DIR"
+        export PATH="$_LB_VENV_DIR/bin:$PATH"
+        {command}
+        """
+    ).strip()
+    return bootstrap + '\n'
+
+
+class BoxWorkspaceSession:
+    """High-level handle for one reusable workspace-backed Box session.
+
+    The Box runtime already understands sessions and managed processes. This
+    wrapper adds LangBot's workspace-centric view on top: a mounted host path,
+    a stable ``session_id``, optional environment defaults, and convenience
+    helpers for exec or long-running processes inside that workspace.
+    """
+
+    def __init__(
+        self,
+        box_service,
+        session_id: str,
+        *,
+        host_path: str | None = None,
+        host_path_mode: str = 'rw',
+        workdir: str = '/workspace',
+        env: dict[str, str] | None = None,
+        mount_path: str = '/workspace',
+        network: str | None = None,
+        read_only_rootfs: bool | None = None,
+        image: str | None = None,
+        cpus: float | None = None,
+        memory_mb: int | None = None,
+        pids_limit: int | None = None,
+        persistent: bool = False,
+    ):
+        self.box_service = box_service
+        self.session_id = session_id
+        self.host_path = host_path
+        self.host_path_mode = host_path_mode
+        self.workdir = workdir
+        self.env = dict(env or {})
+        self.mount_path = mount_path
+        self.network = network
+        self.read_only_rootfs = read_only_rootfs
+        self.image = image
+        self.cpus = cpus
+        self.memory_mb = memory_mb
+        self.pids_limit = pids_limit
+        self.persistent = persistent
+
+    def rewrite_path(self, path: str) -> str:
+        return rewrite_mounted_path(path, self.host_path, mount_path=self.mount_path)
+
+    def rewrite_venv_command(self, command: str) -> str:
+        return rewrite_venv_command(command, self.host_path, mount_path=self.mount_path)
+
+    def build_session_payload(self) -> dict[str, Any]:
+        # Keep this payload generic so callers can reuse the same workspace
+        # handle for plain exec, file-producing tasks, or managed processes.
+        payload: dict[str, Any] = {
+            'session_id': self.session_id,
+            'workdir': self.workdir,
+            'env': self.env,
+            'persistent': self.persistent,
+        }
+        if self.network is not None:
+            payload['network'] = self.network
+        if self.read_only_rootfs is not None:
+            payload['read_only_rootfs'] = self.read_only_rootfs
+        if self.host_path:
+            payload['host_path'] = self.host_path
+            payload['host_path_mode'] = self.host_path_mode
+        for key in ('image', 'cpus', 'memory_mb', 'pids_limit'):
+            value = getattr(self, key)
+            if value is not None:
+                payload[key] = value
+        return payload
+
+    def build_exec_payload(
+        self,
+        cmd: str,
+        *,
+        workdir: str | None = None,
+        env: dict[str, str] | None = None,
+        timeout_sec: int | None = None,
+    ) -> dict[str, Any]:
+        # Exec payloads inherit the session-level workspace config, then layer
+        # per-call command/workdir/env overrides on top.
+        payload = self.build_session_payload()
+        payload['cmd'] = cmd
+        payload['workdir'] = workdir or self.workdir
+        if timeout_sec is not None:
+            payload['timeout_sec'] = timeout_sec
+        resolved_env = self.env if env is None else env
+        if resolved_env:
+            payload['env'] = resolved_env
+        elif 'env' in payload and not payload['env']:
+            payload.pop('env')
+        return payload
+
+    async def execute_raw(
+        self,
+        cmd: str,
+        *,
+        workdir: str | None = None,
+        env: dict[str, str] | None = None,
+        timeout_sec: int | None = None,
+    ):
+        payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
+        return await self.box_service.client.execute(self.box_service.build_spec(payload))
+
+    async def execute_for_query(
+        self,
+        query,
+        cmd: str,
+        *,
+        workdir: str | None = None,
+        env: dict[str, str] | None = None,
+        timeout_sec: int | None = None,
+    ) -> dict:
+        payload = self.build_exec_payload(cmd, workdir=workdir, env=env, timeout_sec=timeout_sec)
+        return await self.box_service.execute_spec_payload(payload, query)
+
+    async def create_session(self):
+        return await self.box_service.create_session(self.build_session_payload())
+
+    def build_process_payload(
+        self,
+        command: str,
+        args: list[str] | None = None,
+        *,
+        env: dict[str, str] | None = None,
+        cwd: str = '/workspace',
+    ) -> dict[str, Any]:
+        # Managed processes run inside the same workspace model as one-shot
+        # execs, so path/venv rewriting is shared here.
+        normalized_command = command
+        normalized_args = list(args or [])
+        normalized_cwd = cwd
+        if self.host_path:
+            normalized_command = self.rewrite_venv_command(command)
+            normalized_args = [self.rewrite_path(arg) for arg in normalized_args]
+            normalized_cwd = self.rewrite_path(cwd)
+        return {
+            'command': normalized_command,
+            'args': normalized_args,
+            'env': dict(env or {}),
+            'cwd': normalized_cwd,
+        }
+
+    async def start_managed_process(
+        self,
+        command: str,
+        args: list[str] | None = None,
+        *,
+        process_id: str = 'default',
+        env: dict[str, str] | None = None,
+        cwd: str = '/workspace',
+    ):
+        payload = self.build_process_payload(command, args, env=env, cwd=cwd)
+        payload['process_id'] = process_id
+        return await self.box_service.start_managed_process(self.session_id, payload)
+
+    async def get_managed_process(self, process_id: str = 'default'):
+        return await self.box_service.get_managed_process(self.session_id, process_id)
+
+    async def stop_managed_process(self, process_id: str = 'default') -> None:
+        await self.box_service.stop_managed_process(self.session_id, process_id)
+
+    def get_managed_process_websocket_url(self, process_id: str = 'default') -> str:
+        return self.box_service.get_managed_process_websocket_url(self.session_id, process_id)
+
+    async def cleanup(self) -> None:
+        await self.box_service.client.delete_session(self.session_id)
--- a/src/langbot/pkg/core/app.py
+++ b/src/langbot/pkg/core/app.py
@@ -9,6 +9,7 @@ from ..platform import botmgr as im_mgr
 from ..platform.webhook_pusher import WebhookPusher
 from ..provider.session import sessionmgr as llm_session_mgr
 from ..provider.modelmgr import modelmgr as llm_model_mgr
+from ..box import service as box_service_module

 from langbot.pkg.provider.tools import toolmgr as llm_tool_mgr
 from ..config import manager as config_mgr
@@ -31,8 +32,8 @@ from ..api.http.service import mcp as mcp_service
 from ..api.http.service import apikey as apikey_service
 from ..api.http.service import webhook as webhook_service
 from ..api.http.service import monitoring as monitoring_service
+from ..api.http.service import skill as skill_service
 from ..api.http.service import maintenance as maintenance_service
-
 from ..discover import engine as discover_engine
 from ..storage import mgr as storagemgr
 from ..utils import logcache
@@ -43,6 +44,7 @@ from ..rag.service import RAGRuntimeService
 from ..vector import mgr as vectordb_mgr
 from ..telemetry import telemetry as telemetry_module
 from ..survey import manager as survey_module
+from ..skill import manager as skill_mgr


 class Application:
@@ -70,6 +72,7 @@ class Application:

    # TODO move to pipeline
    tool_mgr: llm_tool_mgr.ToolManager = None
+    box_service: box_service_module.BoxService = None

    # ======= Config manager =======

@@ -156,6 +159,10 @@ class Application:

    monitoring_service: monitoring_service.MonitoringService = None

+    skill_service: skill_service.SkillService = None
+
+    skill_mgr: skill_mgr.SkillManager = None
+
    maintenance_service: maintenance_service.MaintenanceService = None

    def __init__(self):
@@ -301,7 +308,10 @@ class Application:
        return parsed

    def dispose(self):
-        self.plugin_connector.dispose()
+        if self.plugin_connector is not None:
+            self.plugin_connector.dispose()
+        if self.box_service is not None:
+            self.box_service.dispose()

    async def print_web_access_info(self):
        """Print access webui tips"""
--- a/src/langbot/pkg/core/boot.py
+++ b/src/langbot/pkg/core/boot.py
@@ -46,12 +46,14 @@ async def make_app(loop: asyncio.AbstractEventLoop) -> app.Application:


 async def main(loop: asyncio.AbstractEventLoop):
+    app_inst: app.Application | None = None
    try:
        # Hang system signal processing
        import signal

        def signal_handler(sig, frame):
-            app_inst.dispose()
+            if app_inst is not None:
+                app_inst.dispose()
            print('[Signal] Program exit.')
            os._exit(0)

@@ -60,4 +62,6 @@ async def main(loop: asyncio.AbstractEventLoop):
        app_inst = await make_app(loop)
        await app_inst.run()
    except Exception:
+        if app_inst is not None:
+            app_inst.dispose()
        traceback.print_exc()
--- a/src/langbot/pkg/core/stages/build_app.py
+++ b/src/langbot/pkg/core/stages/build_app.py
@@ -6,6 +6,7 @@ from .. import stage, app
 from ...utils import version, proxy
 from ...pipeline import pool, controller, pipelinemgr
 from ...pipeline import aggregator as message_aggregator
+from ...box import service as box_service
 from ...plugin import connector as plugin_connector
 from ...command import cmdmgr
 from ...provider.session import sessionmgr as llm_session_mgr
@@ -28,6 +29,8 @@ from ...api.http.service import mcp as mcp_service
 from ...api.http.service import apikey as apikey_service
 from ...api.http.service import webhook as webhook_service
 from ...api.http.service import monitoring as monitoring_service
+from ...api.http.service import skill as skill_service
+from ...skill import manager as skill_mgr
 from ...api.http.service import maintenance as maintenance_service
 from ...discover import engine as discover_engine
 from ...storage import mgr as storagemgr
@@ -86,6 +89,9 @@ class BuildAppStage(stage.BootingStage):
        webhook_service_inst = webhook_service.WebhookService(ap)
        ap.webhook_service = webhook_service_inst

+        skill_service_inst = skill_service.SkillService(ap)
+        ap.skill_service = skill_service_inst
+
        proxy_mgr = proxy.ProxyManager(ap)
        await proxy_mgr.initialize()
        ap.proxy_mgr = proxy_mgr
@@ -129,6 +135,10 @@ class BuildAppStage(stage.BootingStage):
        await llm_session_mgr_inst.initialize()
        ap.sess_mgr = llm_session_mgr_inst

+        box_service_inst = box_service.BoxService(ap)
+        await box_service_inst.initialize()
+        ap.box_service = box_service_inst
+
        llm_tool_mgr_inst = llm_tool_mgr.ToolManager(ap)
        await llm_tool_mgr_inst.initialize()
        ap.tool_mgr = llm_tool_mgr_inst
@@ -149,6 +159,11 @@ class BuildAppStage(stage.BootingStage):
        msg_aggregator_inst = message_aggregator.MessageAggregator(ap)
        ap.msg_aggregator = msg_aggregator_inst

+        # Initialize skill manager
+        skill_mgr_inst = skill_mgr.SkillManager(ap)
+        await skill_mgr_inst.initialize()
+        ap.skill_mgr = skill_mgr_inst
+
        rag_mgr_inst = rag_mgr.RAGManager(ap)
        await rag_mgr_inst.initialize()
        ap.rag_mgr = rag_mgr_inst
--- a/src/langbot/pkg/pipeline/aggregator.py
+++ b/src/langbot/pkg/pipeline/aggregator.py
@@ -275,6 +275,7 @@ class MessageAggregator:
            message_chain=merged_chain,
            adapter=base_msg.adapter,
            pipeline_uuid=base_msg.pipeline_uuid,
+            routed_by_rule=any(msg.routed_by_rule for msg in messages),
        )

    async def flush_all(self) -> None:
--- a/src/langbot/pkg/pipeline/longtext/longtext.py
+++ b/src/langbot/pkg/pipeline/longtext/longtext.py
@@ -76,6 +76,10 @@ class LongTextProcessStage(stage.PipelineStage):
            self.ap.logger.debug('Long message processing strategy is not set, skip long message processing.')
            return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query)

+        if not query.resp_message_chain:
+            self.ap.logger.debug('Response message chain is empty, skip long message processing.')
+            return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query)
+
        # 检查是否包含非 Plain 组件
        contains_non_plain = False

--- a/src/langbot/pkg/pipeline/pool.py
+++ b/src/langbot/pkg/pipeline/pool.py
@@ -63,6 +63,7 @@ class QueryPool:
            self.cached_queries[query_id] = query
            self.query_id_counter += 1
            self.condition.notify_all()
+            return query

    async def __aenter__(self):
        await self.pool_lock.acquire()
--- a/src/langbot/pkg/pipeline/preproc/preproc.py
+++ b/src/langbot/pkg/pipeline/preproc/preproc.py
@@ -32,6 +32,9 @@ class PreProcessor(stage.PipelineStage):
    ) -> entities.StageProcessResult:
        """Process"""
        selected_runner = query.pipeline_config['ai']['runner']['runner']
+        include_skill_authoring = (
+            selected_runner == 'local-agent' and getattr(self.ap, 'skill_service', None) is not None
+        )

        session = await self.ap.sess_mgr.get_session(query)

@@ -110,7 +113,11 @@ class PreProcessor(stage.PipelineStage):
                    # Get bound plugins and MCP servers for filtering tools
                    bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
                    bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
-                    query.use_funcs = await self.ap.tool_mgr.get_all_tools(bound_plugins, bound_mcp_servers)
+                    query.use_funcs = await self.ap.tool_mgr.get_all_tools(
+                        bound_plugins,
+                        bound_mcp_servers,
+                        include_skill_authoring=include_skill_authoring,
+                    )

                    self.ap.logger.debug(f'Bound plugins: {bound_plugins}')
                    self.ap.logger.debug(f'Bound MCP servers: {bound_mcp_servers}')
@@ -121,7 +128,11 @@ class PreProcessor(stage.PipelineStage):
            if not query.use_funcs and query.variables.get('_fallback_model_uuids'):
                bound_plugins = query.variables.get('_pipeline_bound_plugins', None)
                bound_mcp_servers = query.variables.get('_pipeline_bound_mcp_servers', None)
-                query.use_funcs = await self.ap.tool_mgr.get_all_tools(bound_plugins, bound_mcp_servers)
+                query.use_funcs = await self.ap.tool_mgr.get_all_tools(
+                    bound_plugins,
+                    bound_mcp_servers,
+                    include_skill_authoring=include_skill_authoring,
+                )

        sender_name = ''

@@ -237,4 +248,67 @@ class PreProcessor(stage.PipelineStage):
        query.prompt.messages = event_ctx.event.default_prompt
        query.messages = event_ctx.event.prompt

+        # =========== Skill awareness for the local-agent runner ===========
+        # The actual activation goes through the ``activate`` Tool Call so the
+        # LLM doesn't see full SKILL.md instructions until it commits to a
+        # skill (Claude Code's progressive disclosure). But the LLM still has
+        # to KNOW which skills exist to make that choice, so we:
+        #   1. resolve the pipeline's bound skills and stash them in
+        #      ``query.variables['_pipeline_bound_skills']`` for downstream
+        #      visibility checks (skill loader, native exec workdir);
+        #   2. inject a short ``Available Skills`` index (name + description
+        #      only) into the system prompt. The contributor's original PR
+        #      relied on this injection; without it the LLM never discovers
+        #      the skills are there and just calls native tools instead.
+        if selected_runner == 'local-agent' and self.ap.skill_mgr:
+            pipeline_data = await self.ap.pipeline_service.get_pipeline(query.pipeline_uuid)
+            extensions_prefs = (pipeline_data or {}).get('extensions_preferences', {})
+            enable_all_skills = extensions_prefs.get('enable_all_skills', True)
+
+            if enable_all_skills:
+                bound_skills = None  # None = all loaded skills are visible
+            else:
+                bound_skills = extensions_prefs.get('skills', [])
+
+            query.variables['_pipeline_bound_skills'] = bound_skills
+
+            skill_addition = self.ap.skill_mgr.build_skill_aware_prompt_addition(
+                bound_skills=bound_skills,
+            )
+            if skill_addition:
+                # Append to the first system message; create one if the
+                # prompt has none. Handles both plain-string and
+                # content-element (list) message bodies.
+                if query.prompt.messages and query.prompt.messages[0].role == 'system':
+                    head = query.prompt.messages[0]
+                    if isinstance(head.content, str):
+                        head.content = head.content + skill_addition
+                    elif isinstance(head.content, list):
+                        appended = False
+                        for ce in head.content:
+                            if getattr(ce, 'type', None) == 'text':
+                                ce.text = (ce.text or '') + skill_addition
+                                appended = True
+                                break
+                        if not appended:
+                            head.content.append(provider_message.ContentElement(type='text', text=skill_addition))
+                else:
+                    query.prompt.messages.insert(
+                        0,
+                        provider_message.Message(role='system', content=skill_addition.strip()),
+                    )
+                self.ap.logger.debug(
+                    f'Skill index injected into system prompt: '
+                    f'pipeline={query.pipeline_uuid} '
+                    f'bound_skills={bound_skills or "all"} '
+                    f'loaded_skills={len(self.ap.skill_mgr.skills)}'
+                )
+            else:
+                self.ap.logger.debug(
+                    f'No skills available for prompt injection: '
+                    f'pipeline={query.pipeline_uuid} '
+                    f'loaded_skills={len(self.ap.skill_mgr.skills)} '
+                    f'bound_skills={bound_skills}'
+                )
+
        return entities.StageProcessResult(result_type=entities.ResultType.CONTINUE, new_query=query)
--- a/src/langbot/pkg/pipeline/process/handler.py
+++ b/src/langbot/pkg/pipeline/process/handler.py
@@ -5,6 +5,7 @@ import abc
 from ...core import app
 from .. import entities
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+import langbot_plugin.api.entities.builtin.provider.message as provider_message


 class MessageHandler(metaclass=abc.ABCMeta):
@@ -31,3 +32,29 @@ class MessageHandler(metaclass=abc.ABCMeta):
        if len(s0) > 20 or '\n' in s:
            s0 = s0[:20] + '...'
        return s0
+
+    def format_result_log(
+        self,
+        result: provider_message.Message | provider_message.MessageChunk,
+    ) -> str | None:
+        if result.tool_calls:
+            tool_names = [tc.function.name for tc in result.tool_calls if tc.function and tc.function.name]
+            if tool_names:
+                return f'{result.role}: requested tools: {", ".join(tool_names)}'
+            return f'{result.role}: requested tool calls'
+
+        content = result.content
+        if isinstance(content, str):
+            if not content.strip():
+                return None
+
+            if result.role == 'tool':
+                if content.startswith('err:'):
+                    return f'tool error: {self.cut_str(content)}'
+
+            return self.cut_str(result.readable_str())
+
+        if isinstance(content, list) and len(content) == 0:
+            return None
+
+        return self.cut_str(result.readable_str())
--- a/src/langbot/pkg/pipeline/process/handlers/chat.py
+++ b/src/langbot/pkg/pipeline/process/handlers/chat.py
@@ -113,9 +113,11 @@ class ChatMessageHandler(handler.MessageHandler):
                        # This prevents memory overflow from thousands of log entries per conversation
                        # First chunk uses INFO level to confirm connection establishment
                        if chunk_count == 1:
-                            self.ap.logger.info(
-                                f'Conversation({query.query_id}) Streaming started: {self.cut_str(result.readable_str())}'
-                            )
+                            summary = self.format_result_log(result)
+                            if summary is not None:
+                                self.ap.logger.info(f'Conversation({query.query_id}) Streaming started: {summary}')
+                            else:
+                                self.ap.logger.info(f'Conversation({query.query_id}) Streaming started')
                        elif chunk_count % 10 == 0:
                            self.ap.logger.debug(
                                f'Conversation({query.query_id}) Streaming chunk {chunk_count}: {self.cut_str(result.readable_str())}'
@@ -135,9 +137,9 @@ class ChatMessageHandler(handler.MessageHandler):
                    async for result in runner.run(query):
                        query.resp_messages.append(result)

-                        self.ap.logger.info(
-                            f'Conversation({query.query_id}) Response: {self.cut_str(result.readable_str())}'
-                        )
+                        summary = self.format_result_log(result)
+                        if summary is not None:
+                            self.ap.logger.info(f'Conversation({query.query_id}) Response: {summary}')

                        if result.content is not None:
                            text_length += len(result.content)
--- a/src/langbot/pkg/platform/sources/aiocqhttp.py
+++ b/src/langbot/pkg/platform/sources/aiocqhttp.py
@@ -3,6 +3,7 @@ import typing
 import asyncio
 import traceback
 import datetime
+import json

 import aiocqhttp
 import pydantic
@@ -293,6 +294,29 @@ class AiocqhttpMessageConverter(abstract_platform_adapter.AbstractMessageConvert
            elif msg.type == 'dice':
                face_id = msg.data['result']
                yiri_msg_list.append(platform_message.Face(face_type='dice', face_id=int(face_id), face_name='骰子'))
+            elif msg.type == 'json':
+                try:
+                    raw = msg.data.get('data', {})
+                    if isinstance(raw, str):
+                        raw = json.loads(raw)
+                    if isinstance(raw, dict):
+                        _meta = raw.get('meta', {}) or {}
+                        if isinstance(_meta, dict):
+                            _detail = _meta.get('detail_1') or _meta.get('music') or _meta.get('news') or {}
+                        else:
+                            _detail = {}
+                        if isinstance(_detail, dict):
+                            preview = _detail.get('preview', '')
+                            title = _detail.get('desc', '') or _detail.get('title', '')
+                            url = _detail.get('qqdocurl', '') or _detail.get('jumpUrl', '')
+                        else:
+                            preview = title = url = ''
+                        text = ' '.join([f'[{raw.get("app", "")}]', preview, title, url]).strip()
+                        yiri_msg_list.append(platform_message.Plain(text=text or '[收到一张JSON卡片]'))
+                    else:
+                        yiri_msg_list.append(platform_message.Plain(text=str(raw)))
+                except Exception:
+                    yiri_msg_list.append(platform_message.Plain(text='[收到一张JSON卡片]'))

        chain = platform_message.MessageChain(yiri_msg_list)

--- a/src/langbot/pkg/platform/sources/dingtalk.yaml
+++ b/src/langbot/pkg/platform/sources/dingtalk.yaml
@@ -19,6 +19,18 @@ spec:
    en: https://link.langbot.app/en/platforms/dingtalk
    ja: https://link.langbot.app/ja/platforms/dingtalk
  config:
+    - name: one-click-create
+      label:
+        en_US: One-Click Create App
+        zh_Hans: 一键创建应用
+        zh_Hant: 一鍵建立應用
+      description:
+        en_US: "Scan QR code with DingTalk to automatically create an app and fill in credentials. Note: Robot Code cannot be obtained automatically, you need to copy it from the DingTalk Developer Backend manually."
+        zh_Hans: "使用钉钉扫码自动创建应用并填写凭据。注意：机器人代码无法自动获取，需前往钉钉开发者后台手动复制。"
+        zh_Hant: "使用釘釘掃碼自動建立應用並填寫憑證。注意：機器人代碼無法自動取得，需前往釘釘開發者後台手動複製。"
+      type: qr-code-login
+      login_platform: dingtalk
+      required: false
    - name: client_id
      label:
        en_US: Client ID
@@ -40,6 +52,10 @@ spec:
        en_US: Robot Code
        zh_Hans: 机器人代码
        zh_Hant: 機器人代碼
+      description:
+        en_US: "Required for image recognition, file upload and other features. Get it from DingTalk Developer Backend > Robot Configuration."
+        zh_Hans: "识图、上传文件等功能必填。请前往钉钉开发者后台 > 机器人配置中获取。"
+        zh_Hant: "識圖、上傳檔案等功能必填。請前往釘釘開發者後台 > 機器人設定中取得。"
      type: string
      required: true
      default: ""
--- a/src/langbot/pkg/platform/sources/lark.py
+++ b/src/langbot/pkg/platform/sources/lark.py
@@ -1025,7 +1025,90 @@ class LarkAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter):
        return api_client

    async def send_message(self, target_type: str, target_id: str, message: platform_message.MessageChain):
-        pass
+        text_elements, media_items = await self.message_converter.yiri2target(message, self.api_client)
+
+        # Map standard target_type to Feishu receive_id_type
+        if target_type == 'person':
+            receive_id_type = 'open_id'
+        elif target_type == 'group':
+            receive_id_type = 'chat_id'
+        else:
+            receive_id_type = target_type
+
+        # Send text message if there are text elements
+        if text_elements:
+            needs_post = any(ele['tag'] == 'at' for paragraph in text_elements for ele in paragraph)
+
+            if needs_post:
+                msg_type = 'post'
+                final_content = json.dumps(
+                    {
+                        'zh_Hans': {
+                            'title': '',
+                            'content': text_elements,
+                        },
+                    }
+                )
+            else:
+                msg_type = 'text'
+                parts = []
+                for paragraph in text_elements:
+                    para_text = ''.join(ele.get('text', '') for ele in paragraph)
+                    if para_text:
+                        parts.append(para_text)
+                final_content = json.dumps({'text': '\n\n'.join(parts)})
+
+            request: CreateMessageRequest = (
+                CreateMessageRequest.builder()
+                .receive_id_type(receive_id_type)
+                .request_body(
+                    CreateMessageRequestBody.builder()
+                    .receive_id(target_id)
+                    .content(final_content)
+                    .msg_type(msg_type)
+                    .uuid(str(uuid.uuid4()))
+                    .build()
+                )
+                .build()
+            )
+
+            app_access_token = self.get_app_access_token()
+            req_opt: RequestOption = (
+                RequestOption.builder().app_ticket(self.app_ticket).app_access_token(app_access_token).build()
+            )
+            response: CreateMessageResponse = self.api_client.im.v1.message.create(request, req_opt)
+
+            if not response.success():
+                raise Exception(
+                    f'client.im.v1.message.create failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}'
+                )
+
+        # Send media messages separately (image, audio, file, etc.)
+        for media in media_items:
+            request: CreateMessageRequest = (
+                CreateMessageRequest.builder()
+                .receive_id_type(receive_id_type)
+                .request_body(
+                    CreateMessageRequestBody.builder()
+                    .receive_id(target_id)
+                    .content(json.dumps(media['content']))
+                    .msg_type(media['msg_type'])
+                    .uuid(str(uuid.uuid4()))
+                    .build()
+                )
+                .build()
+            )
+
+            app_access_token = self.get_app_access_token()
+            req_opt: RequestOption = (
+                RequestOption.builder().app_ticket(self.app_ticket).app_access_token(app_access_token).build()
+            )
+            response: CreateMessageResponse = self.api_client.im.v1.message.create(request, req_opt)
+
+            if not response.success():
+                raise Exception(
+                    f'client.im.v1.message.create ({media["msg_type"]}) failed, code: {response.code}, msg: {response.msg}, log_id: {response.get_log_id()}, resp: \n{json.dumps(json.loads(response.raw.content), indent=4, ensure_ascii=False)}'
+                )

    async def is_stream_output_supported(self) -> bool:
        is_stream = False
--- a/src/langbot/pkg/platform/sources/lark.yaml
+++ b/src/langbot/pkg/platform/sources/lark.yaml
@@ -23,6 +23,20 @@ spec:
    en: https://link.langbot.app/en/platforms/lark
    ja: https://link.langbot.app/ja/platforms/lark
  config:
+    - name: one-click-create
+      label:
+        en_US: One-Click Create App
+        zh_Hans: 一键创建应用
+        zh_Hant: 一鍵建立應用
+        ja_JP: ワンクリックでアプリ作成
+      description:
+        en_US: Scan QR code to automatically create a Feishu app and fill in credentials
+        zh_Hans: 扫码自动创建飞书应用并填写凭据
+        zh_Hant: 掃碼自動建立飛書應用並填寫憑證
+        ja_JP: QRコードをスキャンしてFeishuアプリを自動作成し、認証情報を入力
+      type: qr-code-login
+      login_platform: feishu
+      required: false
    - name: app_id
      label:
        en_US: App ID
--- a/src/langbot/pkg/platform/sources/openclaw_weixin.yaml
+++ b/src/langbot/pkg/platform/sources/openclaw_weixin.yaml
@@ -32,6 +32,20 @@ spec:
      type: string
      required: true
      default: "https://ilinkai.weixin.qq.com"
+    - name: qr-login
+      label:
+        en_US: Scan QR Login
+        zh_Hans: 扫码登录
+        zh_Hant: 掃碼登入
+        ja_JP: QRコードでログイン
+      description:
+        en_US: Scan QR code with WeChat to authorize and automatically fill in the token
+        zh_Hans: 使用微信扫码授权，自动填写令牌
+        zh_Hant: 使用微信掃碼授權，自動填寫令牌
+        ja_JP: WeChatでQRコードをスキャンし、トークンを自動入力
+      type: qr-code-login
+      login_platform: weixin
+      required: false
    - name: token
      label:
        en_US: Token
--- a/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
+++ b/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
@@ -27,10 +27,7 @@ class WebPageBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter
    listeners: dict = pydantic.Field(default_factory=dict, exclude=True)
    _ws_adapter: typing.Any = None

-    class Config:
-        arbitrary_types_allowed = True
-        # Allow private attributes
-        underscore_attrs_are_private = True
+    model_config = pydantic.ConfigDict(arbitrary_types_allowed=True)

    def __init__(self, config: dict, logger: abstract_platform_logger.AbstractEventLogger, **kwargs):
        super().__init__(config=config, logger=logger, **kwargs)
--- a/src/langbot/pkg/platform/sources/wecombot.yaml
+++ b/src/langbot/pkg/platform/sources/wecombot.yaml
@@ -19,6 +19,18 @@ spec:
    en: https://link.langbot.app/en/platforms/wecombot
    ja: https://link.langbot.app/ja/platforms/wecombot
  config:
+    - name: one-click-create
+      label:
+        en_US: One-Click Create Bot
+        zh_Hans: 一键创建机器人
+        zh_Hant: 一鍵建立機器人
+      description:
+        en_US: "Scan QR code with WeCom to automatically create a bot and fill in BotId and Secret. Note: Robot Name needs to be filled in manually."
+        zh_Hans: "使用企业微信扫码自动创建机器人并填写 BotId 和 Secret。注意：机器人名称需手动填写。"
+        zh_Hant: "使用企業微信掃碼自動建立機器人並填寫 BotId 和 Secret。注意：機器人名稱需手動填寫。"
+      type: qr-code-login
+      login_platform: wecombot
+      required: false
    - name: BotId
      label:
        en_US: BotId
--- a/src/langbot/pkg/plugin/connector.py
+++ b/src/langbot/pkg/plugin/connector.py
@@ -11,12 +11,14 @@ import os
 import sys
 import httpx
 import sqlalchemy
+import yaml
 from async_lru import alru_cache
 from langbot_plugin.api.entities.builtin.pipeline.query import provider_session

 from ..core import app
 from . import handler
 from ..utils import platform
+from ..utils.managed_runtime import ManagedRuntimeConnector
 from langbot_plugin.runtime.io.controllers.stdio import (
    client as stdio_client_controller,
 )
@@ -34,10 +36,12 @@ from ..core import taskmgr
 from ..entity.persistence import plugin as persistence_plugin


-class PluginRuntimeConnector:
-    """Plugin runtime connector"""
+class PluginRuntimeNotConnectedError(RuntimeError):
+    """Raised when plugin runtime operations are requested before connection."""

-    ap: app.Application
+
+class PluginRuntimeConnector(ManagedRuntimeConnector):
+    """Plugin runtime connector"""

    handler: handler.RuntimeConnectionHandler

@@ -49,10 +53,6 @@ class PluginRuntimeConnector:

    ctrl: stdio_client_controller.StdioClientController | ws_client_controller.WebSocketClientController

-    runtime_subprocess_on_windows: asyncio.subprocess.Process | None = None
-
-    runtime_subprocess_on_windows_task: asyncio.Task | None = None
-
    runtime_disconnect_callback: typing.Callable[
        [PluginRuntimeConnector], typing.Coroutine[typing.Any, typing.Any, None]
    ]
@@ -67,7 +67,7 @@ class PluginRuntimeConnector:
            [PluginRuntimeConnector], typing.Coroutine[typing.Any, typing.Any, None]
        ],
    ):
-        self.ap = ap
+        super().__init__(ap)
        self.runtime_disconnect_callback = runtime_disconnect_callback
        self.is_enable_plugin = self.ap.instance_config.data.get('plugin', {}).get('enable', True)

@@ -135,19 +135,7 @@ class PluginRuntimeConnector:
            # We have to launch runtime via cmd but communicate via ws.
            self.ap.logger.info('(windows) use cmd to launch plugin runtime and communicate via ws')

-            if self.runtime_subprocess_on_windows is None:  # only launch once
-                python_path = sys.executable
-                env = os.environ.copy()
-                self.runtime_subprocess_on_windows = await asyncio.create_subprocess_exec(
-                    python_path,
-                    '-m',
-                    'langbot_plugin.cli.__init__',
-                    'rt',
-                    env=env,
-                )
-
-                # hold the process
-                self.runtime_subprocess_on_windows_task = asyncio.create_task(self.runtime_subprocess_on_windows.wait())
+            await self._start_runtime_subprocess('-m', 'langbot_plugin.cli.__init__', 'rt')

            ws_url = 'ws://localhost:5400/control/ws'

@@ -191,44 +179,300 @@ class PluginRuntimeConnector:

    async def ping_plugin_runtime(self):
        if not hasattr(self, 'handler'):
-            raise Exception('Plugin runtime is not connected')
+            raise PluginRuntimeNotConnectedError('Plugin runtime is not connected')

        return await self.handler.ping()

-    def _extract_deps_metadata(
+    def _inspect_plugin_package(
        self,
        file_bytes: bytes,
        task_context: taskmgr.TaskContext | None,
-    ):
-        """Extract dependency count from requirements.txt inside plugin zip."""
-        if task_context is None:
-            return
+    ) -> tuple[str | None, str | None]:
+        """Extract plugin identity and dependency metadata from a plugin package."""
+        plugin_author = None
+        plugin_name = None
+
        try:
            with zipfile.ZipFile(io.BytesIO(file_bytes)) as zf:
-                for name in zf.namelist():
-                    if name.endswith('requirements.txt'):
-                        content = zf.read(name).decode('utf-8', errors='ignore')
-                        deps = [
-                            line.strip()
-                            for line in content.splitlines()
-                            if line.strip() and not line.strip().startswith('#')
-                        ]
-                        task_context.metadata['deps_total'] = len(deps)
-                        task_context.metadata['deps_list'] = deps
-                        break
+                try:
+                    manifest = yaml.safe_load(zf.read('manifest.yaml').decode('utf-8', errors='ignore')) or {}
+                    metadata = manifest.get('metadata', {})
+                    plugin_author = metadata.get('author')
+                    plugin_name = metadata.get('name')
+                except Exception:
+                    pass
+
+                if task_context is not None:
+                    for name in zf.namelist():
+                        if name.endswith('requirements.txt'):
+                            content = zf.read(name).decode('utf-8', errors='ignore')
+                            deps = [
+                                line.strip()
+                                for line in content.splitlines()
+                                if line.strip() and not line.strip().startswith('#')
+                            ]
+                            task_context.metadata['deps_total'] = len(deps)
+                            task_context.metadata['deps_list'] = deps
+                            break
        except Exception:
            pass

+        return plugin_author, plugin_name
+
+    async def _install_mcp_from_marketplace(
+        self,
+        mcp_data: dict[str, Any],
+        task_context: taskmgr.TaskContext | None = None,
+    ):
+        """Install an MCP server from marketplace data."""
+        from ..entity.persistence import mcp as persistence_mcp
+        import uuid
+
+        config = mcp_data.get('config', {})
+        url = config.get('url', '')
+        # Use __ instead of / to avoid URL routing issues with slashes
+        name = f'{mcp_data.get("author", "")}__{mcp_data.get("name", "")}'
+
+        # Determine mode from URL
+        if 'sse' in url.lower():
+            mode = 'sse'
+        elif url.startswith('http'):
+            mode = 'http'
+        else:
+            mode = 'stdio'
+
+        # Build extra_args from config
+        extra_args = {
+            'url': url,
+            'timeout': config.get('timeout', 30),
+            'sse_read_timeout': config.get('sse_read_timeout', 300),
+        }
+
+        # Check if MCP server already exists
+        existing = await self.ap.persistence_mgr.execute_async(
+            sqlalchemy.select(persistence_mcp.MCPServer).where(persistence_mcp.MCPServer.name == name)
+        )
+        if existing.scalar_one_or_none():
+            self.ap.logger.info(f'MCP server {name} already exists, skipping installation')
+            return
+
+        # Create MCP server record
+        server_uuid = str(uuid.uuid4())
+        server_data = {
+            'uuid': server_uuid,
+            'name': name,
+            'enable': True,
+            'mode': mode,
+            'extra_args': extra_args,
+        }
+
+        await self.ap.persistence_mgr.execute_async(sqlalchemy.insert(persistence_mcp.MCPServer).values(server_data))
+
+        # Start the MCP server
+        result = await self.ap.persistence_mgr.execute_async(
+            sqlalchemy.select(persistence_mcp.MCPServer).where(persistence_mcp.MCPServer.uuid == server_uuid)
+        )
+        server_entity = result.first()
+        if server_entity:
+            server_config = self.ap.persistence_mgr.serialize_model(persistence_mcp.MCPServer, server_entity)
+            if self.ap.tool_mgr.mcp_tool_loader:
+                mcp_task = asyncio.create_task(self.ap.tool_mgr.mcp_tool_loader.host_mcp_server(server_config))
+                self.ap.tool_mgr.mcp_tool_loader._hosted_mcp_tasks.append(mcp_task)
+
+        self.ap.logger.info(f'Installed MCP server {name} from marketplace')
+
+    async def _install_skill_from_zip(
+        self,
+        file_bytes: bytes,
+        filename: str,
+        task_context: taskmgr.TaskContext | None = None,
+    ):
+        """Install a skill from marketplace ZIP data."""
+        from ..api.http.service.skill import SkillService
+
+        skill_service = SkillService(self.ap)
+
+        self.ap.logger.info(f'Installing skill from marketplace ZIP ({len(file_bytes)} bytes)')
+
+        # Install from ZIP using skill service
+        result = await skill_service.install_from_zip_upload(
+            file_bytes=file_bytes,
+            filename=filename + '.zip',
+        )
+        self.ap.logger.info(f'Skill installed successfully: {result}')
+
+    def _build_plugin_startup_failure_message(
+        self,
+        plugin_author: str,
+        plugin_name: str,
+        task_context: taskmgr.TaskContext | None,
+    ) -> str:
+        dep_hint = ''
+        if task_context is not None:
+            current_dep = task_context.metadata.get('current_dep')
+            if current_dep:
+                dep_hint = f' Last dependency: {current_dep}.'
+
+        return (
+            f'Plugin {plugin_author}/{plugin_name} failed to start after installation. '
+            f'Dependency installation or plugin initialization may have failed.{dep_hint} '
+            f'Please check the plugin requirements and runtime logs.'
+        )
+
+    async def _wait_for_installed_plugin_ready(
+        self,
+        plugin_author: str | None,
+        plugin_name: str | None,
+        task_context: taskmgr.TaskContext | None,
+        timeout: float = 30,
+    ):
+        """Wait until the installed plugin is registered by the runtime.
+
+        The plugin runtime launches plugins asynchronously. If dependency installation
+        fails, the plugin process exits before registration; without this check the
+        install task can incorrectly finish successfully.
+        """
+        if not plugin_author or not plugin_name:
+            return
+
+        deadline = time.time() + timeout
+        last_error: Exception | None = None
+        while time.time() < deadline:
+            try:
+                plugin = await self.get_plugin_info(plugin_author, plugin_name)
+                if plugin is not None:
+                    status = plugin.get('status')
+                    if status == 'initialized':
+                        return
+            except Exception as e:
+                last_error = e
+
+            await asyncio.sleep(0.5)
+
+        message = self._build_plugin_startup_failure_message(plugin_author, plugin_name, task_context)
+        if last_error is not None:
+            message = f'{message} Last runtime error: {last_error}'
+        raise RuntimeError(message)
+
    async def install_plugin(
        self,
        install_source: PluginInstallSource,
        install_info: dict[str, Any],
        task_context: taskmgr.TaskContext | None = None,
    ):
+        plugin_author = install_info.get('plugin_author')
+        plugin_name = install_info.get('plugin_name')
+
+        if install_source == PluginInstallSource.MARKETPLACE:
+            # Handle marketplace plugin/mcp/skill installation
+            plugin_author = install_info.get('plugin_author', '')
+            plugin_name = install_info.get('plugin_name', '')
+            space_url = (
+                self.ap.instance_config.data.get('space', {}).get('url', 'https://space.langbot.app').rstrip('/')
+            )
+
+            # Try MCP endpoint first
+            async with httpx.AsyncClient(trust_env=True, timeout=15) as client:
+                mcp_resp = await client.get(f'{space_url}/api/v1/marketplace/mcps/{plugin_author}/{plugin_name}')
+                if mcp_resp.status_code == 200:
+                    mcp_data = mcp_resp.json().get('data', {}).get('mcp', {})
+                    if mcp_data.get('config'):
+                        # It's an MCP - create server locally
+                        self.ap.logger.info(f'Installing MCP from marketplace: {plugin_author}/{plugin_name}')
+                        if task_context:
+                            task_context.set_current_action('installing mcp server')
+                        await self._install_mcp_from_marketplace(mcp_data, task_context)
+                        return
+                    else:
+                        raise Exception(f'MCP {plugin_author}/{plugin_name} has no config')
+                elif mcp_resp.status_code == 404:
+                    # Try skill endpoint - download ZIP and install
+                    self.ap.logger.info(f'Trying skill endpoint for: {plugin_author}/{plugin_name}')
+                    if task_context:
+                        task_context.set_current_action('checking skill marketplace')
+
+                    # Get skill detail to find version
+                    skill_resp = await client.get(
+                        f'{space_url}/api/v1/marketplace/skills/{plugin_author}/{plugin_name}'
+                    )
+                    if skill_resp.status_code == 200:
+                        self.ap.logger.info(f'Installing skill from marketplace: {plugin_author}/{plugin_name}')
+                        if task_context:
+                            task_context.set_current_action('installing skill from marketplace')
+
+                        # Download the skill ZIP (no version needed - uses latest)
+                        if task_context:
+                            task_context.set_current_action('downloading skill package')
+
+                        download_resp = await client.get(
+                            f'{space_url}/api/v1/marketplace/skills/download/{plugin_author}/{plugin_name}'
+                        )
+                        if download_resp.status_code != 200:
+                            raise Exception(
+                                f'Failed to download skill {plugin_author}/{plugin_name}: {download_resp.status_code}'
+                            )
+
+                        file_bytes = download_resp.content
+                        file_size = len(file_bytes)
+                        self.ap.logger.info(f'Downloaded skill ZIP ({file_size} bytes)')
+
+                        # Install skill from ZIP using skill service
+                        await self._install_skill_from_zip(file_bytes, f'{plugin_author}-{plugin_name}', task_context)
+                        return
+                    elif skill_resp.status_code == 404:
+                        # Try plugin endpoint - get versions and download
+                        self.ap.logger.info(f'Trying plugin endpoint for: {plugin_author}/{plugin_name}')
+                        if task_context:
+                            task_context.set_current_action('checking plugin marketplace')
+
+                        # Get plugin versions to find latest
+                        versions_resp = await client.get(
+                            f'{space_url}/api/v1/marketplace/plugins/{plugin_author}/{plugin_name}/versions'
+                        )
+                        if versions_resp.status_code == 200:
+                            versions_data = versions_resp.json().get('data', {}).get('versions', [])
+                            if versions_data:
+                                latest_version = versions_data[0].get('version', '')
+                                if latest_version:
+                                    self.ap.logger.info(
+                                        f'Installing plugin from marketplace: {plugin_author}/{plugin_name} v{latest_version}'
+                                    )
+                                    if task_context:
+                                        task_context.set_current_action('downloading plugin package')
+
+                                    download_resp = await client.get(
+                                        f'{space_url}/api/v1/marketplace/plugins/download/{plugin_author}/{plugin_name}/{latest_version}'
+                                    )
+                                    if download_resp.status_code != 200:
+                                        raise Exception(
+                                            f'Failed to download plugin {plugin_author}/{plugin_name}: {download_resp.status_code}'
+                                        )
+
+                                    file_bytes = download_resp.content
+                                    self._extract_deps_metadata(file_bytes, task_context)
+                                    file_key = await self.handler.send_file(file_bytes, 'lbpkg')
+                                    install_info['plugin_file_key'] = file_key
+                                    self.ap.logger.info(f'Transfered file {file_key} to plugin runtime')
+                                    # Continue to install via runtime
+                                else:
+                                    raise Exception(f'No version found for plugin {plugin_author}/{plugin_name}')
+                            else:
+                                raise Exception(f'Plugin {plugin_author}/{plugin_name} has no versions')
+                        else:
+                            raise Exception(f'Plugin {plugin_author}/{plugin_name} not found in marketplace')
+                    else:
+                        skill_resp.raise_for_status()
+                        raise Exception(f'Failed to get skill {plugin_author}/{plugin_name}')
+                else:
+                    mcp_resp.raise_for_status()
+                    raise Exception(f'Failed to get MCP {plugin_author}/{plugin_name}')
+
        if install_source == PluginInstallSource.LOCAL:
            # transfer file before install
            file_bytes = install_info['plugin_file']
-            self._extract_deps_metadata(file_bytes, task_context)
+            plugin_author, plugin_name = self._inspect_plugin_package(file_bytes, task_context)
+            if task_context is not None and plugin_author and plugin_name:
+                task_context.metadata['plugin_name'] = f'{plugin_author}/{plugin_name}'
            file_key = await self.handler.send_file(file_bytes, 'lbpkg')
            install_info['plugin_file_key'] = file_key
            del install_info['plugin_file']
@@ -265,7 +509,9 @@ class PluginRuntimeConnector:
                                task_context.metadata['download_speed'] = downloaded / elapsed if elapsed > 0 else 0

                    file_bytes = b''.join(chunks)
-                    self._extract_deps_metadata(file_bytes, task_context)
+                    plugin_author, plugin_name = self._inspect_plugin_package(file_bytes, task_context)
+                    if task_context is not None and plugin_author and plugin_name:
+                        task_context.metadata['plugin_name'] = f'{plugin_author}/{plugin_name}'
                    file_key = await self.handler.send_file(file_bytes, 'lbpkg')
                    install_info['plugin_file_key'] = file_key
                    self.ap.logger.info(f'Transfered file {file_key} to plugin runtime')
@@ -289,6 +535,8 @@ class PluginRuntimeConnector:
            if metadata is not None and task_context is not None:
                task_context.metadata.update(metadata)

+        await self._wait_for_installed_plugin_ready(plugin_author, plugin_name, task_context)
+
    async def upgrade_plugin(
        self,
        plugin_author: str,
@@ -534,13 +782,18 @@ class PluginRuntimeConnector:
        return await self.handler.retrieve_knowledge(plugin_author, plugin_name, retriever_name, retrieval_context)

    def dispose(self):
-        # No need to consider the shutdown on Windows
-        # for Windows can kill processes and subprocesses chainly
-
-        if self.is_enable_plugin and isinstance(self.ctrl, stdio_client_controller.StdioClientController):
+        # On non-Windows stdio mode, terminate via the controller's process handle.
+        # On Windows, the managed subprocess is cleaned up by the base class.
+        if (
+            self.is_enable_plugin
+            and hasattr(self, 'ctrl')
+            and isinstance(self.ctrl, stdio_client_controller.StdioClientController)
+        ):
            self.ap.logger.info('Terminating plugin runtime process...')
            self.ctrl.process.terminate()

+        self._dispose_subprocess()
+
        if self.heartbeat_task is not None:
            self.heartbeat_task.cancel()
            self.heartbeat_task = None
@@ -558,11 +811,12 @@ class PluginRuntimeConnector:
        Raises:
            ValueError: If plugin_id is not in the expected 'author/name' format.
        """
-        if '/' not in plugin_id:
+        segments = plugin_id.split('/')
+        if len(segments) != 2 or not all(segments):
            raise ValueError(
                f"Invalid plugin_id format: '{plugin_id}'. Expected 'author/name' format (e.g. 'langbot/rag-engine')."
            )
-        return plugin_id.split('/', 1)
+        return segments[0], segments[1]

    async def call_rag_ingest(self, plugin_id: str, context_data: dict[str, Any]) -> dict[str, Any]:
        """Call plugin to ingest document.
--- a/src/langbot/pkg/provider/modelmgr/requester.py
+++ b/src/langbot/pkg/provider/modelmgr/requester.py
@@ -340,6 +340,7 @@ class ProviderAPIRequester(metaclass=abc.ABCMeta):
    """Provider API请求器"""

    name: str = None
+    init_api_key: str = 'langbot-init-placeholder'

    ap: app.Application

--- a/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/chatcmpl.py
@@ -25,7 +25,7 @@ class OpenAIChatCompletions(requester.ProviderAPIRequester):

    async def initialize(self):
        self.client = openai.AsyncClient(
-            api_key='',
+            api_key=self.init_api_key,
            base_url=self.requester_cfg['base_url'].replace(' ', ''),
            timeout=self.requester_cfg['timeout'],
            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
--- a/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/modelscopechatcmpl.py
@@ -25,7 +25,7 @@ class ModelScopeChatCompletions(requester.ProviderAPIRequester):

    async def initialize(self):
        self.client = openai.AsyncClient(
-            api_key='',
+            api_key=self.init_api_key,
            base_url=self.requester_cfg['base_url'],
            timeout=self.requester_cfg['timeout'],
            http_client=httpx.AsyncClient(trust_env=True, timeout=self.requester_cfg['timeout']),
--- a/src/langbot/pkg/provider/modelmgr/token.py
+++ b/src/langbot/pkg/provider/modelmgr/token.py
@@ -14,7 +14,14 @@ class TokenManager:

    def __init__(self, name: str, tokens: list[str]):
        self.name = name
-        self.tokens = tokens
+        self.tokens = []
+        seen_tokens = set()
+        for token in tokens:
+            normalized_token = token.strip() if isinstance(token, str) else ''
+            if not normalized_token or normalized_token in seen_tokens:
+                continue
+            self.tokens.append(normalized_token)
+            seen_tokens.add(normalized_token)
        self.using_token_index = 0

    def get_token(self) -> str:
@@ -23,4 +30,6 @@ class TokenManager:
        return self.tokens[self.using_token_index]

    def next_token(self):
+        if len(self.tokens) == 0:
+            return
        self.using_token_index = (self.using_token_index + 1) % len(self.tokens)
--- a/src/langbot/pkg/provider/runner.py
+++ b/src/langbot/pkg/provider/runner.py
@@ -2,8 +2,12 @@ from __future__ import annotations

 import abc
 import typing
+from typing import TYPE_CHECKING

-from ..core import app
+if TYPE_CHECKING:
+    from ..core import app
+    import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+    import langbot_plugin.api.entities.builtin.provider.message as provider_message


 preregistered_runners: list[typing.Type[RequestRunner]] = []
@@ -35,7 +39,7 @@ class RequestRunner(abc.ABC):

    @abc.abstractmethod
    async def run(
-        self, query: core_entities.Query
-    ) -> typing.AsyncGenerator[llm_entities.Message | llm_entities.MessageChunk, None]:
+        self, query: pipeline_query.Query
+    ) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]:
        """运行请求"""
        pass
--- a/src/langbot/pkg/provider/runners/localagent.py
+++ b/src/langbot/pkg/provider/runners/localagent.py
@@ -5,6 +5,7 @@ import copy
 import typing
 from .. import runner
 from ..modelmgr import requester as modelmgr_requester
+from ..tools.loaders.native import EXEC_TOOL_NAME
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 import langbot_plugin.api.entities.builtin.rag.context as rag_context
@@ -24,11 +25,37 @@ Respond in the same language as the user's input.
 </user_message>
 """

+SANDBOX_EXEC_TOOL_NAME = 'sandbox_exec'
+SANDBOX_EXEC_SYSTEM_GUIDANCE = (
+    'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
+    'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
+    'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
+    'and then answer from the tool result.'
+)
+

@runner.runner_class('local-agent')
 class LocalAgentRunner(runner.RequestRunner):
    """Local agent request runner"""

+    def _build_request_messages(
+        self,
+        query: pipeline_query.Query,
+        user_message: provider_message.Message,
+    ) -> list[provider_message.Message]:
+        req_messages = query.prompt.messages.copy() + query.messages.copy()
+
+        if any(getattr(tool, 'name', None) == EXEC_TOOL_NAME for tool in query.use_funcs or []):
+            req_messages.append(
+                provider_message.Message(
+                    role='system',
+                    content=self.ap.box_service.get_system_guidance(),
+                )
+            )
+
+        req_messages.append(user_message)
+        return req_messages
+
    async def _get_model_candidates(
        self,
        query: pipeline_query.Query,
@@ -131,6 +158,7 @@ class LocalAgentRunner(runner.RequestRunner):
    ) -> typing.AsyncGenerator[provider_message.Message | provider_message.MessageChunk, None]:
        """Run request"""
        pending_tool_calls = []
+        initial_response_emitted = False

        # Get knowledge bases list from query variables (set by PreProcessor,
        # may have been modified by plugins during PromptPreProcessing)
@@ -236,7 +264,7 @@ class LocalAgentRunner(runner.RequestRunner):
                    ce.text = final_user_message_text
                    break

-        req_messages = query.prompt.messages.copy() + query.messages.copy() + [user_message]
+        req_messages = self._build_request_messages(query, user_message)

        try:
            is_stream = await query.adapter.is_stream_output_supported()
@@ -264,7 +292,6 @@ class LocalAgentRunner(runner.RequestRunner):
                query.use_funcs,
                remove_think,
            )
-            yield msg
            final_msg = msg
        else:
            # Streaming: invoke with fallback
@@ -312,6 +339,7 @@ class LocalAgentRunner(runner.RequestRunner):
                        is_final=msg.is_final,
                        msg_sequence=msg_sequence,
                    )
+                    initial_response_emitted = True

            final_msg = provider_message.MessageChunk(
                role=last_role,
@@ -325,6 +353,12 @@ class LocalAgentRunner(runner.RequestRunner):
        if isinstance(final_msg, provider_message.MessageChunk):
            first_end_sequence = final_msg.msg_sequence

+        if not is_stream:
+            yield final_msg
+        elif not initial_response_emitted:
+            yield final_msg
+            initial_response_emitted = True
+
        req_messages.append(final_msg)

        # Once a model succeeds, commit to it for the tool call loop
@@ -369,7 +403,15 @@ class LocalAgentRunner(runner.RequestRunner):

                    req_messages.append(msg)
                except Exception as e:
-                    err_msg = provider_message.Message(role='tool', content=f'err: {e}', tool_call_id=tool_call.id)
+                    if is_stream:
+                        err_msg = provider_message.MessageChunk(
+                            role='tool',
+                            content=f'err: {e}',
+                            tool_call_id=tool_call.id,
+                            is_final=True,
+                        )
+                    else:
+                        err_msg = provider_message.Message(role='tool', content=f'err: {e}', tool_call_id=tool_call.id)

                    yield err_msg

--- a/src/langbot/pkg/provider/tools/loader.py
+++ b/src/langbot/pkg/provider/tools/loader.py
@@ -2,12 +2,14 @@ from __future__ import annotations

 import abc
 import typing
+from typing import TYPE_CHECKING

 from langbot_plugin.api.entities.events import pipeline_query
-
-from ...core import app
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool

+if TYPE_CHECKING:
+    from ...core import app
+

 preregistered_loaders: list[typing.Type[ToolLoader]] = []

--- a/src/langbot/pkg/provider/tools/loaders/mcp.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp.py
@@ -20,6 +20,7 @@ from ....core import app
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 from ....entity.persistence import mcp as persistence_mcp
+from .mcp_stdio import BoxStdioSessionRuntime, MCPServerBoxConfig, MCPSessionErrorPhase  # noqa: F401


 class MCPSessionStatus(enum.Enum):
@@ -58,6 +59,12 @@ class RuntimeMCPSession:

    error_message: str | None = None

+    error_phase: MCPSessionErrorPhase | None = None
+
+    retry_count: int = 0
+
+    _box_stdio_runtime: BoxStdioSessionRuntime
+
    def __init__(self, server_name: str, server_config: dict, enable: bool, ap: app.Application):
        self.server_name = server_name
        self.server_uuid = server_config.get('uuid', '')
@@ -75,7 +82,33 @@ class RuntimeMCPSession:
        self._shutdown_event = asyncio.Event()
        self._ready_event = asyncio.Event()

+        self._box_stdio_runtime = BoxStdioSessionRuntime(self)
+        self.box_config = self._box_stdio_runtime.config
+
    async def _init_stdio_python_server(self):
+        if self._uses_box_stdio():
+            await self._box_stdio_runtime.initialize()
+            return
+
+        # Box is configured (ap.box_service exists) but currently unavailable
+        # (disabled by config or connection failed). Refuse stdio MCP rather
+        # than silently falling through to host-stdio — the operator asked
+        # for the sandbox and the failure mode should be visible.
+        #
+        # Set ``error_phase = BOX_UNAVAILABLE`` BEFORE raising so the retry
+        # wrapper can short-circuit (retrying is pointless when Box is
+        # deliberately off) and the frontend can render a localized,
+        # actionable message instead of this raw RuntimeError. Keep the
+        # message itself short — the frontend ignores it for this phase.
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is not None and not getattr(box_service, 'available', False):
+            self.error_phase = MCPSessionErrorPhase.BOX_UNAVAILABLE
+            if not getattr(box_service, 'enabled', True):
+                raise RuntimeError('box_disabled_in_config')
+            raise RuntimeError('box_unavailable')
+
+        # Legacy: no box_service installed at all (pre-Box dev mode). Fall
+        # through to host-stdio for backward compatibility.
        server_params = StdioServerParameters(
            command=self.server_config['command'],
            args=self.server_config['args'],
@@ -90,6 +123,9 @@ class RuntimeMCPSession:

        await self.session.initialize()

+    async def _init_box_stdio_server(self):
+        await self._box_stdio_runtime.initialize()
+
    async def _init_sse_server(self):
        sse_transport = await self.exit_stack.enter_async_context(
            sse_client(
@@ -124,8 +160,11 @@ class RuntimeMCPSession:

        await self.session.initialize()

+    _MAX_RETRIES = 3
+    _RETRY_DELAYS = [2, 4, 8]
+
    async def _lifecycle_loop(self):
-        """在后台任务中管理整个MCP会话的生命周期"""
+        """Manage the full MCP session lifecycle in a background task."""
        try:
            if self.server_config['mode'] == 'stdio':
                await self._init_stdio_python_server()
@@ -134,49 +173,109 @@ class RuntimeMCPSession:
            elif self.server_config['mode'] == 'http':
                await self._init_streamable_http_server()
            else:
-                raise ValueError(f'无法识别 MCP 服务器类型: {self.server_name}: {self.server_config}')
+                raise ValueError(f'Unknown MCP server mode: {self.server_name}: {self.server_config}')

            await self.refresh()

            self.status = MCPSessionStatus.CONNECTED

-            # 通知start()方法连接已建立
+            # Notify start() that connection is established
            self._ready_event.set()

-            # 等待shutdown信号
-            await self._shutdown_event.wait()
+            # Wait for shutdown signal, with optional health monitoring for Box stdio
+            if self._uses_box_stdio():
+                monitor_task = asyncio.create_task(self._box_stdio_runtime.monitor_process_health())
+                shutdown_task = asyncio.create_task(self._shutdown_event.wait())
+                done, pending = await asyncio.wait(
+                    [shutdown_task, monitor_task],
+                    return_when=asyncio.FIRST_COMPLETED,
+                )
+                for task in pending:
+                    task.cancel()
+                for task in done:
+                    if task is monitor_task and not self._shutdown_event.is_set():
+                        self.error_phase = MCPSessionErrorPhase.RUNTIME
+                        raise Exception('Box managed process exited unexpectedly')
+            else:
+                await self._shutdown_event.wait()

        except Exception as e:
            self.status = MCPSessionStatus.ERROR
            self.error_message = str(e)
            self.ap.logger.error(f'Error in MCP session lifecycle {self.server_name}: {e}\n{traceback.format_exc()}')
-            # 即使出错也要设置ready事件，让start()方法知道初始化已完成
-            self._ready_event.set()
+            # Do NOT set _ready_event here — let _lifecycle_loop_with_retry
+            # handle retries first. It will set the event when all retries
+            # are exhausted or on success.
+            raise  # Re-raise so _lifecycle_loop_with_retry can catch it
        finally:
-            # 在同一个任务中清理所有资源
+            # Clean up all resources in the same task
            try:
                if self.exit_stack:
                    await self.exit_stack.aclose()
+                    self.exit_stack = AsyncExitStack()
                self.functions.clear()
                self.session = None
            except Exception as e:
                self.ap.logger.error(f'Error cleaning up MCP session {self.server_name}: {e}\n{traceback.format_exc()}')
+            finally:
+                await self._cleanup_box_stdio_session()
+
+    async def _lifecycle_loop_with_retry(self):
+        """Wrap _lifecycle_loop with retry and exponential backoff."""
+        for attempt in range(self._MAX_RETRIES + 1):
+            try:
+                await self._lifecycle_loop()
+                return  # Normal shutdown, don't retry
+            except Exception as e:
+                self.retry_count = attempt + 1
+                if self._shutdown_event.is_set():
+                    return  # Shutdown requested, don't retry
+                # BOX_UNAVAILABLE is a deliberate refusal, not a transient
+                # failure — retrying produces log spam and a misleading
+                # "Failed after N attempts" message. Surface it immediately.
+                if self.error_phase == MCPSessionErrorPhase.BOX_UNAVAILABLE:
+                    self.status = MCPSessionStatus.ERROR
+                    self.error_message = str(e)
+                    self._ready_event.set()
+                    return
+                if attempt >= self._MAX_RETRIES:
+                    self.status = MCPSessionStatus.ERROR
+                    self.error_message = f'Failed after {self._MAX_RETRIES + 1} attempts: {e}'
+                    self._ready_event.set()
+                    return
+                delay = self._RETRY_DELAYS[attempt]
+                self.ap.logger.warning(
+                    f'MCP session {self.server_name} failed (attempt {attempt + 1}), retrying in {delay}s: {e}'
+                )
+                await self._cleanup_box_stdio_session()
+                # Reset status for retry
+                self.status = MCPSessionStatus.CONNECTING
+                self.error_message = None
+                self.error_phase = None
+                await asyncio.sleep(delay)
+
+    _MONITOR_POLL_INTERVAL = 5
+    _MONITOR_MAX_CONSECUTIVE_ERRORS = 3
+
+    async def _monitor_box_process_health(self):
+        await self._box_stdio_runtime.monitor_process_health()

    async def start(self):
        if not self.enable:
            return

-        # 创建后台任务来管理生命周期
-        self._lifecycle_task = asyncio.create_task(self._lifecycle_loop())
+        # Create background task for lifecycle management with retry
+        self._lifecycle_task = asyncio.create_task(self._lifecycle_loop_with_retry())

-        # 等待连接建立或失败（带超时）
+        # Wait for connection or failure (with timeout)
+        startup_timeout = (self.box_config.startup_timeout_sec + 30) if self._uses_box_stdio() else 30.0
        try:
-            await asyncio.wait_for(self._ready_event.wait(), timeout=30.0)
+            await asyncio.wait_for(self._ready_event.wait(), timeout=startup_timeout)
        except asyncio.TimeoutError:
            self.status = MCPSessionStatus.ERROR
-            raise Exception('Connection timeout after 30 seconds')
+            raise Exception(f'Connection timeout after {startup_timeout} seconds')

-        # 检查是否有错误
+        # Check for errors
        if self.status == MCPSessionStatus.ERROR:
            raise Exception('Connection failed, please check URL')

@@ -232,18 +331,25 @@ class RuntimeMCPSession:
        return self.functions

    def get_runtime_info_dict(self) -> dict:
-        return {
+        info = {
            'status': self.status.value,
            'error_message': self.error_message,
+            'error_phase': self.error_phase.value if self.error_phase else None,
+            'retry_count': self.retry_count,
            'tool_count': len(self.get_tools()),
            'tools': [
                {
                    'name': tool.name,
                    'description': tool.description,
+                    'parameters': tool.parameters,
                }
                for tool in self.get_tools()
            ],
        }
+        if self._uses_box_stdio():
+            info['box_session_id'] = self._build_box_session_id()
+            info['box_enabled'] = True
+        return info

    async def shutdown(self):
        """关闭会话并清理资源"""
@@ -267,6 +373,41 @@ class RuntimeMCPSession:
        except Exception as e:
            self.ap.logger.error(f'Error shutting down MCP session {self.server_name}: {e}\n{traceback.format_exc()}')

+    def _uses_box_stdio(self) -> bool:
+        return self._box_stdio_runtime.uses_box_stdio()
+
+    def _build_box_session_id(self) -> str:
+        return 'mcp-shared'
+
+    def _rewrite_path(self, path: str, host_path: str | None) -> str:
+        return self._box_stdio_runtime.rewrite_path(path, host_path)
+
+    def _infer_host_path(self) -> str | None:
+        return self._box_stdio_runtime.infer_host_path()
+
+    @staticmethod
+    def _unwrap_venv_path(directory: str) -> str:
+        return BoxStdioSessionRuntime.unwrap_venv_path(directory)
+
+    def _resolve_host_path(self) -> str | None:
+        return self._box_stdio_runtime.resolve_host_path()
+
+    @staticmethod
+    def _detect_install_command(host_path: str) -> str | None:
+        return BoxStdioSessionRuntime.detect_install_command(host_path)
+
+    def _build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict:
+        return self._box_stdio_runtime.build_box_session_payload(session_id, host_path)
+
+    def _build_box_process_payload(self, host_path: str | None = None) -> dict:
+        return self._box_stdio_runtime.build_box_process_payload(host_path)
+
+    def _rewrite_venv_command(self, command: str, host_path: str) -> str:
+        return self._box_stdio_runtime.rewrite_venv_command(command, host_path)
+
+    async def _cleanup_box_stdio_session(self) -> None:
+        await self._box_stdio_runtime.cleanup_session()
+

 # @loader.loader_class('mcp')
 class MCPLoader(loader.ToolLoader):
@@ -332,7 +473,7 @@ class MCPLoader(loader.ToolLoader):
        Args:
            server_config: 服务器配置字典，必须包含:
                - name: 服务器名称
-                - mode: 连接模式 (stdio/sse)
+                - mode: 连接模式 (stdio/sse/http)
                - enable: 是否启用
                - extra_args: 额外的配置参数 (可选)
        """
@@ -431,12 +572,13 @@ class MCPLoader(loader.ToolLoader):
        """获取所有服务器的信息"""
        info = {}
        for server_name, session in self.sessions.items():
+            tools = session.get_tools()
            info[server_name] = {
                'name': server_name,
                'mode': session.server_config.get('mode'),
                'enable': session.enable,
-                'tools_count': len(session.get_tools()),
-                'tool_names': [f.name for f in session.get_tools()],
+                'tools_count': len(tools),
+                'tool_names': [f.name for f in tools],
            }
        return info

--- a/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp_stdio.py
@@ -0,0 +1,366 @@
+from __future__ import annotations
+
+import enum
+import asyncio
+import os
+import shutil
+import shlex
+from typing import TYPE_CHECKING, Any
+
+import pydantic
+from mcp import ClientSession
+from mcp.client.websocket import websocket_client
+from ....box.workspace import (
+    BoxWorkspaceSession,
+    classify_python_workspace,
+    infer_workspace_host_path,
+    normalize_host_path,
+    rewrite_mounted_path,
+    rewrite_venv_command,
+    unwrap_venv_path,
+)
+
+if TYPE_CHECKING:
+    from .mcp import RuntimeMCPSession
+
+
+class MCPSessionErrorPhase(enum.Enum):
+    """Which phase of the MCP lifecycle failed."""
+
+    SESSION_CREATE = 'session_create'
+    DEP_INSTALL = 'dep_install'
+    PROCESS_START = 'process_start'
+    RELAY_CONNECT = 'relay_connect'
+    MCP_INIT = 'mcp_init'
+    RUNTIME = 'runtime'
+    TOOL_CALL = 'tool_call'
+    # Stdio MCP refused because Box is disabled in config or currently
+    # unavailable. Not transient — retries would be pointless. The frontend
+    # uses this phase to render a localized actionable message instead of
+    # the raw RuntimeError text.
+    BOX_UNAVAILABLE = 'box_unavailable'
+
+
+class MCPServerBoxConfig(pydantic.BaseModel):
+    """Structured configuration for running an MCP server inside a Box container."""
+
+    image: str | None = None
+    network: str = 'on'  # MCP servers need network for dependency installation
+    host_path: str | None = None
+    host_path_mode: str = 'ro'  # MCP servers default to read-write mount only when explicitly requested
+    env: dict[str, str] = pydantic.Field(default_factory=dict)
+    startup_timeout_sec: int = 120  # Longer default to allow dependency bootstrap
+    cpus: float | None = None
+    memory_mb: int | None = None
+    pids_limit: int | None = None
+    read_only_rootfs: bool | None = None
+
+    model_config = pydantic.ConfigDict(extra='ignore')
+
+
+class BoxStdioSessionRuntime:
+    """Encapsulate Box-backed stdio MCP session orchestration."""
+
+    def __init__(self, owner: RuntimeMCPSession):
+        self.owner = owner
+        self.config = MCPServerBoxConfig.model_validate(owner.server_config.get('box', {}))
+
+    @property
+    def ap(self):
+        return self.owner.ap
+
+    @property
+    def server_name(self) -> str:
+        return self.owner.server_name
+
+    @property
+    def server_config(self) -> dict:
+        return self.owner.server_config
+
+    def _build_workspace(
+        self,
+        *,
+        host_path: str | None | object = ...,
+        workdir: str = '/workspace',
+        mount_path: str = '/workspace',
+    ) -> BoxWorkspaceSession:
+        resolved_host_path = self.resolve_host_path() if host_path is ... else host_path
+        return BoxWorkspaceSession(
+            self.ap.box_service,
+            self.owner._build_box_session_id(),
+            host_path=resolved_host_path,
+            host_path_mode=self.config.host_path_mode,
+            workdir=workdir,
+            env=self.config.env,
+            mount_path=mount_path,
+            network=self.config.network,
+            read_only_rootfs=self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False,
+            image=self.config.image,
+            cpus=self.config.cpus,
+            memory_mb=self.config.memory_mb,
+            pids_limit=self.config.pids_limit,
+            persistent=True,
+        )
+
+    @property
+    def process_id(self) -> str:
+        """Each MCP server gets a unique process_id within the shared session."""
+        return self.owner.server_uuid
+
+    def uses_box_stdio(self) -> bool:
+        if self.server_config.get('mode') != 'stdio':
+            return False
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is None:
+            return False
+        # When Box is configured but currently unavailable (disabled or
+        # connection failed), do NOT silently fall through to host-stdio —
+        # that would bypass the sandbox the operator asked for. The caller
+        # is expected to refuse the stdio MCP server with a clear error.
+        return bool(getattr(box_service, 'available', False))
+
+    async def initialize(self) -> None:
+        await self._wait_for_box_runtime()
+
+        # All stdio MCP servers share one Box session. Per-server host paths
+        # are staged into the shared workspace instead of becoming session
+        # mounts, because an existing Docker container cannot add bind mounts.
+        workspace = self._build_workspace(host_path=None)
+        host_path = self.resolve_host_path()
+        process_cwd = '/workspace'
+
+        try:
+            await workspace.create_session()
+        except Exception:
+            self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE
+            raise
+
+        if host_path:
+            process_cwd = await self._stage_host_path_to_shared_workspace(host_path)
+            install_cmd = self.detect_install_command(host_path, process_cwd)
+            if install_cmd:
+                self.ap.logger.info(
+                    f'MCP server {self.server_name}: installing dependencies in Box with: {install_cmd}'
+                )
+                try:
+                    result = await workspace.execute_raw(
+                        install_cmd,
+                        workdir=process_cwd,
+                        timeout_sec=self.config.startup_timeout_sec or 120,
+                    )
+                except Exception:
+                    self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL
+                    raise
+                if not result.ok:
+                    self.owner.error_phase = MCPSessionErrorPhase.DEP_INSTALL
+                    stderr_preview = (result.stderr or '')[:500]
+                    raise Exception(f'Dependency install failed (exit code {result.exit_code}): {stderr_preview}')
+
+        try:
+            process_workspace = (
+                self._build_workspace(host_path=host_path, workdir=process_cwd, mount_path=process_cwd)
+                if host_path
+                else workspace
+            )
+            payload = process_workspace.build_process_payload(
+                self.server_config['command'],
+                self.server_config.get('args', []),
+                env=self.server_config.get('env', {}),
+                cwd=process_cwd,
+            )
+            payload['process_id'] = self.process_id
+            await workspace.box_service.start_managed_process(workspace.session_id, payload)
+        except Exception:
+            self.owner.error_phase = MCPSessionErrorPhase.PROCESS_START
+            raise
+
+        try:
+            websocket_url = workspace.get_managed_process_websocket_url(self.process_id)
+            transport = await self.owner.exit_stack.enter_async_context(websocket_client(websocket_url))
+            read_stream, write_stream = transport
+            self.owner.session = await self.owner.exit_stack.enter_async_context(
+                ClientSession(read_stream, write_stream)
+            )
+        except Exception:
+            self.owner.error_phase = MCPSessionErrorPhase.RELAY_CONNECT
+            raise
+
+        try:
+            await self.owner.session.initialize()
+        except Exception:
+            self.owner.error_phase = MCPSessionErrorPhase.MCP_INIT
+            raise
+
+    async def monitor_process_health(self) -> None:
+        from langbot_plugin.box.models import BoxManagedProcessStatus
+
+        workspace = self._build_workspace()
+        consecutive_errors = 0
+        while not self.owner._shutdown_event.is_set():
+            try:
+                info = await workspace.get_managed_process(self.process_id)
+                if isinstance(info, dict):
+                    status = info.get('status', '')
+                else:
+                    status = getattr(info, 'status', '')
+                if status == BoxManagedProcessStatus.EXITED.value or status == BoxManagedProcessStatus.EXITED:
+                    return
+                consecutive_errors = 0
+            except Exception as exc:
+                consecutive_errors += 1
+                self.ap.logger.warning(
+                    f'MCP monitor for {self.server_name}: get_managed_process failed '
+                    f'({consecutive_errors}/{self.owner._MONITOR_MAX_CONSECUTIVE_ERRORS}): '
+                    f'{type(exc).__name__}: {exc}'
+                )
+                if consecutive_errors >= self.owner._MONITOR_MAX_CONSECUTIVE_ERRORS:
+                    return
+            await asyncio.sleep(self.owner._MONITOR_POLL_INTERVAL)
+
+    async def _stage_host_path_to_shared_workspace(self, host_path: str) -> str:
+        source_path = normalize_host_path(host_path)
+        if not source_path:
+            return '/workspace'
+        if not os.path.isdir(source_path):
+            raise FileNotFoundError(f'MCP host_path does not exist or is not a directory: {host_path}')
+
+        self._validate_host_path(source_path)
+
+        shared_host_path = self._shared_workspace_host_path()
+        process_host_root = os.path.join(shared_host_path, '.mcp', self.process_id)
+        process_host_workspace = os.path.join(process_host_root, 'workspace')
+        await asyncio.to_thread(self._copy_workspace_tree, source_path, process_host_root, process_host_workspace)
+        return f'/workspace/.mcp/{self.process_id}/workspace'
+
+    def _validate_host_path(self, host_path: str) -> None:
+        self.ap.box_service.build_spec(
+            {
+                'session_id': f'mcp-validate-{self.process_id}',
+                'host_path': host_path,
+                'host_path_mode': self.config.host_path_mode,
+                'network': self.config.network,
+                'read_only_rootfs': self.config.read_only_rootfs if self.config.read_only_rootfs is not None else False,
+            }
+        )
+
+    def _shared_workspace_host_path(self) -> str:
+        default_workspace = getattr(self.ap.box_service, 'default_workspace', None)
+        if not default_workspace:
+            raise RuntimeError('Box default workspace is required for shared MCP host_path staging')
+        shared_host_path = normalize_host_path(default_workspace)
+        os.makedirs(shared_host_path, exist_ok=True)
+        return shared_host_path
+
+    @staticmethod
+    def _copy_workspace_tree(source_path: str, process_host_root: str, process_host_workspace: str) -> None:
+        shutil.rmtree(process_host_root, ignore_errors=True)
+        os.makedirs(process_host_root, exist_ok=True)
+        shutil.copytree(
+            source_path,
+            process_host_workspace,
+            symlinks=True,
+            ignore=shutil.ignore_patterns('.git', '__pycache__', '.pytest_cache', '.mypy_cache', '.ruff_cache'),
+        )
+
+    async def _cleanup_staged_workspace(self) -> None:
+        if not self.resolve_host_path():
+            return
+        try:
+            process_host_root = os.path.join(self._shared_workspace_host_path(), '.mcp', self.process_id)
+            await asyncio.to_thread(shutil.rmtree, process_host_root, True)
+        except Exception as exc:
+            self.ap.logger.warning(
+                f'MCP server {self.server_name}: failed to clean staged workspace '
+                f'process_id={self.process_id}: {type(exc).__name__}: {exc}'
+            )
+
+    async def _wait_for_box_runtime(self) -> None:
+        timeout_sec = max(float(self.config.startup_timeout_sec or 120), 1.0)
+        deadline = asyncio.get_running_loop().time() + timeout_sec
+        warned = False
+        while not getattr(self.ap.box_service, 'available', False):
+            if not warned:
+                self.ap.logger.warning(
+                    f'MCP server {self.server_name}: waiting for Box runtime before starting stdio process'
+                )
+                warned = True
+            if asyncio.get_running_loop().time() >= deadline:
+                self.owner.error_phase = MCPSessionErrorPhase.SESSION_CREATE
+                raise Exception(f'Box runtime is not available after {int(timeout_sec)} seconds')
+            await asyncio.sleep(1)
+
+    async def cleanup_session(self) -> None:
+        if not self.uses_box_stdio():
+            return
+
+        # In the shared-session model, we do not delete the session itself.
+        # Stop only this MCP server's managed process; deleting the session
+        # would kill other MCP servers sharing the same container.
+        workspace = self._build_workspace(host_path=None)
+        try:
+            await workspace.stop_managed_process(self.process_id)
+        except Exception as exc:
+            self.ap.logger.warning(
+                f'MCP server {self.server_name}: failed to stop managed process '
+                f'process_id={self.process_id}: {type(exc).__name__}: {exc}'
+            )
+            await self._cleanup_staged_workspace()
+            return
+        await self._cleanup_staged_workspace()
+        self.ap.logger.info(
+            f'MCP server {self.server_name}: stopped process_id={self.process_id} '
+            f'(shared session {self.owner._build_box_session_id()} kept alive)'
+        )
+
+    def rewrite_path(self, path: str, host_path: str | None) -> str:
+        return rewrite_mounted_path(path, host_path)
+
+    def infer_host_path(self) -> str | None:
+        return infer_workspace_host_path(self.server_config.get('command', ''), self.server_config.get('args', []))
+
+    @staticmethod
+    def unwrap_venv_path(directory: str) -> str:
+        return unwrap_venv_path(directory)
+
+    def resolve_host_path(self) -> str | None:
+        return self.config.host_path or self.infer_host_path()
+
+    @staticmethod
+    def detect_install_command(host_path: str, workspace_path: str = '/workspace') -> str | None:
+        workspace_kind = classify_python_workspace(host_path)
+        quoted_workspace_path = shlex.quote(workspace_path)
+        if workspace_kind == 'package':
+            return (
+                'mkdir -p /opt/_lb_src'
+                f' && tar -C {quoted_workspace_path}'
+                ' --exclude=.venv --exclude=.git --exclude=__pycache__'
+                ' --exclude=node_modules --exclude=.tox --exclude=.nox'
+                ' --exclude="*.egg-info" --exclude=.uv-cache'
+                ' -cf - .'
+                ' | tar -C /opt/_lb_src -xf -'
+                ' && pip install --no-cache-dir /opt/_lb_src'
+                ' && rm -rf /opt/_lb_src'
+            )
+        if workspace_kind == 'requirements':
+            return f'pip install --no-cache-dir -r {quoted_workspace_path}/requirements.txt'
+        return None
+
+    def build_box_session_payload(self, session_id: str, host_path: str | None = None) -> dict[str, Any]:
+        workspace = self._build_workspace()
+        workspace.session_id = session_id
+        if host_path is not None:
+            workspace.host_path = host_path
+        return workspace.build_session_payload()
+
+    def build_box_process_payload(self, host_path: str | None = None) -> dict[str, Any]:
+        workspace = self._build_workspace()
+        if host_path is not None:
+            workspace.host_path = host_path
+        return workspace.build_process_payload(
+            self.server_config['command'],
+            self.server_config.get('args', []),
+            env=self.server_config.get('env', {}),
+        )
+
+    def rewrite_venv_command(self, command: str, host_path: str) -> str:
+        return rewrite_venv_command(command, host_path)
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -0,0 +1,846 @@
+from __future__ import annotations
+
+import json
+import os
+
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+from langbot_plugin.api.entities.events import pipeline_query
+
+from .. import loader
+from . import skill as skill_loader
+
+EXEC_TOOL_NAME = 'exec'
+READ_TOOL_NAME = 'read'
+WRITE_TOOL_NAME = 'write'
+EDIT_TOOL_NAME = 'edit'
+GLOB_TOOL_NAME = 'glob'
+GREP_TOOL_NAME = 'grep'
+
+_ALL_TOOL_NAMES = {EXEC_TOOL_NAME, READ_TOOL_NAME, WRITE_TOOL_NAME, EDIT_TOOL_NAME, GLOB_TOOL_NAME, GREP_TOOL_NAME}
+
+# Skip these dirs during grep walk to avoid noise
+_SKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', '.tox', 'dist', 'build'}
+
+
+class NativeToolLoader(loader.ToolLoader):
+    def __init__(self, ap):
+        super().__init__(ap)
+        self._tools: list[resource_tool.LLMTool] | None = None
+        self._backend_available: bool | None = None
+
+    async def initialize(self):
+        """Check if backend is truly available at startup."""
+        self._backend_available = await self._check_backend_available()
+        if self._backend_available:
+            self.ap.logger.info('Native sandbox tools (exec/read/write/edit/glob/grep) are available.')
+        else:
+            self.ap.logger.warning(
+                'Native sandbox tools (exec/read/write/edit/glob/grep) are NOT available. '
+                'No sandbox backend (Docker/nsjail/E2B) is ready. '
+                'The LLM will not have access to code execution or file operation tools.'
+            )
+
+    async def _check_backend_available(self) -> bool:
+        """Check if the box backend is truly available (not just the runtime)."""
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is None:
+            return False
+        if not getattr(box_service, 'available', False):
+            return False
+        # Check if backend is truly available via get_status
+        try:
+            status = await box_service.get_status()
+            backend_info = status.get('backend', {})
+            return backend_info.get('available', False)
+        except Exception:
+            return False
+
+    async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
+        if not self._is_sandbox_available():
+            return []
+        if self._tools is None:
+            self._tools = [
+                self._build_exec_tool(),
+                self._build_read_tool(),
+                self._build_write_tool(),
+                self._build_edit_tool(),
+                self._build_glob_tool(),
+                self._build_grep_tool(),
+            ]
+        return list(self._tools)
+
+    async def has_tool(self, name: str) -> bool:
+        return name in _ALL_TOOL_NAMES and self._is_sandbox_available()
+
+    async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query):
+        if name == EXEC_TOOL_NAME:
+            self.ap.logger.info(
+                'exec tool invoked: '
+                f'query_id={query.query_id} '
+                f'parameters={json.dumps(self._summarize_parameters(parameters), ensure_ascii=False)}'
+            )
+            return await self._invoke_exec(parameters, query)
+        if name == READ_TOOL_NAME:
+            return await self._invoke_read(parameters, query)
+        if name == WRITE_TOOL_NAME:
+            return await self._invoke_write(parameters, query)
+        if name == EDIT_TOOL_NAME:
+            return await self._invoke_edit(parameters, query)
+        if name == GLOB_TOOL_NAME:
+            return await self._invoke_glob(parameters, query)
+        if name == GREP_TOOL_NAME:
+            return await self._invoke_grep(parameters, query)
+        raise ValueError(f'未找到工具: {name}')
+
+    async def shutdown(self):
+        pass
+
+    async def _invoke_exec(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        command = str(parameters['command'])
+        workdir = str(parameters.get('workdir', '/workspace') or '/workspace')
+
+        # Validate that skill references target activated skills.
+        selected_skill, _ = skill_loader.resolve_virtual_skill_path(
+            self.ap,
+            query,
+            workdir,
+            include_visible=False,
+            include_activated=True,
+        )
+        referenced_skill_names = skill_loader.find_referenced_skill_names(command)
+
+        if selected_skill is None and referenced_skill_names:
+            if len(referenced_skill_names) > 1:
+                raise ValueError('exec can target at most one activated skill package per call.')
+            selected_skill = skill_loader.get_activated_skill(query, referenced_skill_names[0])
+            if selected_skill is None:
+                raise ValueError(
+                    f'Skill "{referenced_skill_names[0]}" must be activated before exec can run in its package.'
+                )
+
+        if selected_skill is not None:
+            selected_skill_name = str(selected_skill.get('name', '') or '')
+            if referenced_skill_names and any(name != selected_skill_name for name in referenced_skill_names):
+                raise ValueError('exec can reference files from only one activated skill package per call.')
+
+            package_root = str(selected_skill.get('package_root', '') or '').strip()
+            if not package_root:
+                raise ValueError(f'Activated skill "{selected_skill_name}" has no package_root.')
+
+            # Wrap command with Python venv bootstrap if the skill has a Python project.
+            # The venv is created inside the skill's mount path.
+            skill_mount = f'/workspace/.skills/{selected_skill_name}'
+            if skill_loader.should_prepare_skill_python_env(package_root):
+                parameters = dict(parameters)
+                parameters['command'] = skill_loader.wrap_skill_command_with_python_env(command, mount_path=skill_mount)
+
+        # All exec calls (with or without skills) go through the same container
+        # via execute_tool. Skills are mounted at /workspace/.skills/{name}/
+        # via extra_mounts built by BoxService.
+        result = await self.ap.box_service.execute_tool(parameters, query)
+
+        if selected_skill is not None:
+            self._refresh_skill_from_disk(selected_skill)
+        return result
+
+    def _resolve_host_path(
+        self,
+        query: pipeline_query.Query,
+        sandbox_path: str,
+        *,
+        include_visible: bool,
+        include_activated: bool,
+    ) -> tuple[str, dict | None]:
+        selected_skill, rewritten_path = skill_loader.resolve_virtual_skill_path(
+            self.ap,
+            query,
+            sandbox_path,
+            include_visible=include_visible,
+            include_activated=include_activated,
+        )
+
+        box_service = self.ap.box_service
+        host_root = selected_skill.get('package_root') if selected_skill is not None else box_service.default_workspace
+        if not host_root:
+            raise ValueError('No host workspace configured for file operations.')
+
+        mount_path = '/workspace'
+        if not rewritten_path.startswith(mount_path):
+            raise ValueError(f'Path must be under {mount_path}.')
+
+        relative = rewritten_path[len(mount_path) :].lstrip('/')
+        host_path = os.path.realpath(os.path.join(host_root, relative))
+        host_root = os.path.realpath(host_root)
+
+        if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
+            raise ValueError('Path escapes the workspace boundary.')
+
+        return host_path, selected_skill
+
+    def _resolve_skill_relative_path(
+        self,
+        query: pipeline_query.Query,
+        sandbox_path: str,
+        *,
+        include_visible: bool,
+        include_activated: bool,
+    ) -> tuple[dict, str] | None:
+        selected_skill, rewritten_path = skill_loader.resolve_virtual_skill_path(
+            self.ap,
+            query,
+            sandbox_path,
+            include_visible=include_visible,
+            include_activated=include_activated,
+        )
+        if selected_skill is None:
+            return None
+
+        mount_path = '/workspace'
+        if not rewritten_path.startswith(mount_path):
+            raise ValueError(f'Path must be under {mount_path}.')
+        relative = rewritten_path[len(mount_path) :].lstrip('/') or '.'
+        return selected_skill, relative
+
+    def _should_use_box_workspace_files(self, selected_skill: dict | None) -> bool:
+        if selected_skill is not None:
+            return False
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is None or not hasattr(box_service, 'execute_tool'):
+            return False
+        default_workspace = getattr(box_service, 'default_workspace', None)
+        return bool(default_workspace and not os.path.isdir(os.path.realpath(default_workspace)))
+
+    async def _run_workspace_file_script(self, script: str, query: pipeline_query.Query) -> dict:
+        result = await self.ap.box_service.execute_tool(
+            {
+                'command': f"python - <<'PY'\n{script}\nPY",
+                'timeout_sec': 30,
+            },
+            query,
+        )
+        if not result.get('ok'):
+            return {'ok': False, 'error': result.get('stderr') or result.get('stdout') or 'Box execution failed'}
+        stdout = str(result.get('stdout') or '').strip()
+        try:
+            return json.loads(stdout.splitlines()[-1])
+        except Exception:
+            return {'ok': False, 'error': stdout or 'Box file operation returned no result'}
+
+    async def _read_workspace_via_box(self, path: str, query: pipeline_query.Query) -> dict:
+        script = f"""
+import json, os
+path = {json.dumps(path)}
+if not path.startswith('/workspace'):
+    print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
+elif not os.path.exists(path):
+    print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
+elif os.path.isdir(path):
+    print(json.dumps({{'ok': True, 'content': '\\n'.join(sorted(os.listdir(path))), 'is_directory': True}}))
+else:
+    with open(path, 'r', encoding='utf-8', errors='replace') as f:
+        print(json.dumps({{'ok': True, 'content': f.read()}}))
+""".strip()
+        return await self._run_workspace_file_script(script, query)
+
+    async def _write_workspace_via_box(self, path: str, content: str, query: pipeline_query.Query) -> dict:
+        script = f"""
+import json, os
+path = {json.dumps(path)}
+content = {json.dumps(content)}
+if not path.startswith('/workspace'):
+    print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
+else:
+    os.makedirs(os.path.dirname(path) or '/workspace', exist_ok=True)
+    with open(path, 'w', encoding='utf-8') as f:
+        f.write(content)
+    print(json.dumps({{'ok': True, 'path': path}}))
+""".strip()
+        return await self._run_workspace_file_script(script, query)
+
+    async def _edit_workspace_via_box(
+        self,
+        path: str,
+        old_string: str,
+        new_string: str,
+        query: pipeline_query.Query,
+    ) -> dict:
+        script = f"""
+import json, os
+path = {json.dumps(path)}
+old_string = {json.dumps(old_string)}
+new_string = {json.dumps(new_string)}
+if not path.startswith('/workspace'):
+    print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
+elif not os.path.isfile(path):
+    print(json.dumps({{'ok': False, 'error': f'File not found: {{path}}'}}))
+else:
+    with open(path, 'r', encoding='utf-8', errors='replace') as f:
+        content = f.read()
+    count = content.count(old_string)
+    if count == 0:
+        print(json.dumps({{'ok': False, 'error': 'old_string not found in file.'}}))
+    elif count > 1:
+        print(json.dumps({{'ok': False, 'error': f'old_string matches {{count}} locations; provide a more unique string.'}}))
+    else:
+        with open(path, 'w', encoding='utf-8') as f:
+            f.write(content.replace(old_string, new_string, 1))
+        print(json.dumps({{'ok': True, 'path': path}}))
+""".strip()
+        return await self._run_workspace_file_script(script, query)
+
+    async def _glob_workspace_via_box(self, path: str, pattern: str, query: pipeline_query.Query) -> dict:
+        script = f"""
+import json, os
+from pathlib import Path
+path = {json.dumps(path)}
+pattern = {json.dumps(pattern)}
+skip_dirs = {json.dumps(sorted(_SKIP_DIRS))}
+if not path.startswith('/workspace'):
+    print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
+elif not os.path.isdir(path):
+    print(json.dumps({{'ok': False, 'error': f'Path is not a directory: {{path}}'}}))
+else:
+    base = Path(path)
+    hits = [
+        item for item in base.rglob(pattern)
+        if not any(part in skip_dirs for part in item.parts)
+    ]
+    hits.sort(key=lambda item: item.stat().st_mtime if item.exists() else 0, reverse=True)
+    shown = hits[:100]
+    matches = []
+    for item in shown:
+        rel = os.path.relpath(str(item), path)
+        matches.append(os.path.join(path, rel).replace(os.sep, '/'))
+    print(json.dumps({{'ok': True, 'matches': matches, 'total': len(hits), 'truncated': len(hits) > 100}}))
+""".strip()
+        return await self._run_workspace_file_script(script, query)
+
+    async def _grep_workspace_via_box(
+        self,
+        path: str,
+        pattern: str,
+        include: str | None,
+        query: pipeline_query.Query,
+    ) -> dict:
+        script = f"""
+import json, os, re
+from pathlib import Path
+path = {json.dumps(path)}
+pattern = {json.dumps(pattern)}
+include = {json.dumps(include)}
+skip_dirs = {json.dumps(sorted(_SKIP_DIRS))}
+try:
+    regex = re.compile(pattern)
+except re.error as exc:
+    print(json.dumps({{'ok': False, 'error': f'Invalid regex: {{exc}}'}}))
+else:
+    if not path.startswith('/workspace'):
+        print(json.dumps({{'ok': False, 'error': 'Path must be under /workspace.'}}))
+    elif not os.path.exists(path):
+        print(json.dumps({{'ok': False, 'error': f'Path not found: {{path}}'}}))
+    else:
+        base = Path(path)
+        if base.is_file():
+            files = [base]
+        else:
+            files = []
+            for item in base.rglob(include or '*'):
+                if any(part in skip_dirs for part in item.parts):
+                    continue
+                if item.is_file():
+                    files.append(item)
+                if len(files) >= 5000:
+                    break
+
+        matches = []
+        for fp in files:
+            try:
+                text = fp.read_text(errors='ignore')
+            except OSError:
+                continue
+            for lineno, line in enumerate(text.splitlines(), 1):
+                if regex.search(line):
+                    if base.is_file():
+                        file_path = path
+                    else:
+                        rel = os.path.relpath(str(fp), path)
+                        file_path = os.path.join(path, rel).replace(os.sep, '/')
+                    matches.append({{'file': file_path, 'line': lineno, 'content': line.rstrip()}})
+                    if len(matches) >= 200:
+                        break
+            if len(matches) >= 200:
+                break
+
+        print(json.dumps({{'ok': True, 'matches': matches, 'total': len(matches), 'truncated': len(matches) >= 200}}))
+""".strip()
+        return await self._run_workspace_file_script(script, query)
+
+    async def _invoke_read(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        path = parameters['path']
+        self.ap.logger.info(f'read tool invoked: query_id={query.query_id} path={path}')
+        skill_request = self._resolve_skill_relative_path(
+            query,
+            path,
+            include_visible=True,
+            include_activated=True,
+        )
+        if skill_request is not None and hasattr(self.ap.box_service, 'read_skill_file'):
+            selected_skill, relative = skill_request
+            try:
+                result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
+                return {'ok': True, 'content': result.get('content', '')}
+            except Exception:
+                try:
+                    result = await self.ap.box_service.list_skill_files(selected_skill['name'], relative)
+                    entries = [entry['name'] for entry in result.get('entries', [])]
+                    return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
+                except Exception as exc:
+                    return {'ok': False, 'error': str(exc)}
+
+        host_path, selected_skill = self._resolve_host_path(
+            query,
+            path,
+            include_visible=True,
+            include_activated=True,
+        )
+        if self._should_use_box_workspace_files(selected_skill):
+            return await self._read_workspace_via_box(path, query)
+        if not os.path.exists(host_path):
+            return {'ok': False, 'error': f'File not found: {path}'}
+        if os.path.isdir(host_path):
+            entries = os.listdir(host_path)
+            return {'ok': True, 'content': '\n'.join(sorted(entries)), 'is_directory': True}
+        with open(host_path, 'r', errors='replace') as f:
+            content = f.read()
+        return {'ok': True, 'content': content}
+
+    async def _invoke_write(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        path = parameters['path']
+        content = parameters['content']
+        self.ap.logger.info(f'write tool invoked: query_id={query.query_id} path={path} length={len(content)}')
+        skill_request = self._resolve_skill_relative_path(
+            query,
+            path,
+            include_visible=False,
+            include_activated=True,
+        )
+        if skill_request is not None and hasattr(self.ap.box_service, 'write_skill_file'):
+            selected_skill, relative = skill_request
+            await self.ap.box_service.write_skill_file(selected_skill['name'], relative, content)
+            await self.ap.skill_mgr.reload_skills()
+            return {'ok': True, 'path': path}
+
+        host_path, selected_skill = self._resolve_host_path(
+            query,
+            path,
+            include_visible=False,
+            include_activated=True,
+        )
+        if self._should_use_box_workspace_files(selected_skill):
+            return await self._write_workspace_via_box(path, content, query)
+        os.makedirs(os.path.dirname(host_path), exist_ok=True)
+        with open(host_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        self._refresh_skill_from_disk(selected_skill)
+        return {'ok': True, 'path': path}
+
+    async def _invoke_edit(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        path = parameters['path']
+        old_string = parameters['old_string']
+        new_string = parameters['new_string']
+        self.ap.logger.info(
+            f'edit tool invoked: query_id={query.query_id} path={path} '
+            f'old_len={len(old_string)} new_len={len(new_string)}'
+        )
+        skill_request = self._resolve_skill_relative_path(
+            query,
+            path,
+            include_visible=False,
+            include_activated=True,
+        )
+        if (
+            skill_request is not None
+            and hasattr(self.ap.box_service, 'read_skill_file')
+            and hasattr(self.ap.box_service, 'write_skill_file')
+        ):
+            selected_skill, relative = skill_request
+            try:
+                result = await self.ap.box_service.read_skill_file(selected_skill['name'], relative)
+            except Exception:
+                return {'ok': False, 'error': f'File not found: {path}'}
+            content = result.get('content', '')
+            count = content.count(old_string)
+            if count == 0:
+                return {'ok': False, 'error': 'old_string not found in file.'}
+            if count > 1:
+                return {'ok': False, 'error': f'old_string matches {count} locations; provide a more unique string.'}
+            new_content = content.replace(old_string, new_string, 1)
+            await self.ap.box_service.write_skill_file(selected_skill['name'], relative, new_content)
+            await self.ap.skill_mgr.reload_skills()
+            return {'ok': True, 'path': path}
+
+        host_path, selected_skill = self._resolve_host_path(
+            query,
+            path,
+            include_visible=False,
+            include_activated=True,
+        )
+        if self._should_use_box_workspace_files(selected_skill):
+            return await self._edit_workspace_via_box(path, old_string, new_string, query)
+        if not os.path.isfile(host_path):
+            return {'ok': False, 'error': f'File not found: {path}'}
+        with open(host_path, 'r', encoding='utf-8', errors='replace') as f:
+            content = f.read()
+        count = content.count(old_string)
+        if count == 0:
+            return {'ok': False, 'error': 'old_string not found in file.'}
+        if count > 1:
+            return {'ok': False, 'error': f'old_string matches {count} locations; provide a more unique string.'}
+        new_content = content.replace(old_string, new_string, 1)
+        with open(host_path, 'w', encoding='utf-8') as f:
+            f.write(new_content)
+        self._refresh_skill_from_disk(selected_skill)
+        return {'ok': True, 'path': path}
+
+    def _refresh_skill_from_disk(self, selected_skill: dict | None) -> None:
+        if selected_skill is None:
+            return
+
+        skill_mgr = getattr(self.ap, 'skill_mgr', None)
+        if skill_mgr is None:
+            return
+
+        refresh_skill = getattr(skill_mgr, 'refresh_skill_from_disk', None)
+        if callable(refresh_skill):
+            refresh_skill(selected_skill.get('name', ''))
+
+    def _is_sandbox_available(self) -> bool:
+        """Check if sandbox backend is available.
+
+        This checks the cached backend availability from initialization,
+        not just whether the box_service process is running.
+        """
+        return bool(self._backend_available)
+
+    def _build_exec_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=EXEC_TOOL_NAME,
+            human_desc='Execute a command in an isolated environment',
+            description=(
+                'Run shell commands in an isolated execution environment. '
+                'Use this tool for bash commands, Python execution, and exact calculations over '
+                'user-provided data. Activated skill packages are addressable under '
+                '/workspace/.skills/<skill-name>; when running inside one, set workdir to that path. '
+                'To create a new skill package, prepare it under /workspace first, then use register_skill.'
+            ),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'command': {
+                        'type': 'string',
+                        'description': 'Shell command to execute.',
+                    },
+                    'workdir': {
+                        'type': 'string',
+                        'description': 'Working directory for the command. Defaults to /workspace.',
+                        'default': '/workspace',
+                    },
+                    'timeout_sec': {
+                        'type': 'integer',
+                        'description': 'Execution timeout in seconds. Defaults to 30.',
+                        'default': 30,
+                        'minimum': 1,
+                    },
+                    'env': {
+                        'type': 'object',
+                        'description': 'Optional environment variables for the execution.',
+                        'additionalProperties': {'type': 'string'},
+                        'default': {},
+                    },
+                    'description': {
+                        'type': 'string',
+                        'description': 'Brief description of what this command does, for logging and audit.',
+                    },
+                },
+                'required': ['command'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    def _build_read_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=READ_TOOL_NAME,
+            human_desc='Read a file from the workspace',
+            description=(
+                'Read the contents of a file at the given path under /workspace. '
+                'Visible skill packages can be inspected through /workspace/.skills/<skill-name>/... .'
+            ),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'path': {
+                        'type': 'string',
+                        'description': 'Absolute path to the file (must be under /workspace).',
+                    },
+                },
+                'required': ['path'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    def _build_write_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=WRITE_TOOL_NAME,
+            human_desc='Write a file to the workspace',
+            description=(
+                'Create or overwrite a file at the given path under /workspace with the provided content. '
+                'Activated skill packages can be modified through /workspace/.skills/<skill-name>/... . '
+                'For new skills, write files under /workspace and then call register_skill.'
+            ),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'path': {
+                        'type': 'string',
+                        'description': 'Absolute path to the file (must be under /workspace).',
+                    },
+                    'content': {
+                        'type': 'string',
+                        'description': 'Content to write to the file.',
+                    },
+                },
+                'required': ['path', 'content'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    def _build_edit_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=EDIT_TOOL_NAME,
+            human_desc='Edit a file in the workspace',
+            description=(
+                'Perform an exact string replacement in a file under /workspace. '
+                'The old_string must appear exactly once in the file. Activated skill packages '
+                'can be edited through /workspace/.skills/<skill-name>/... . '
+                'For new skills, edit files under /workspace and then call register_skill.'
+            ),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'path': {
+                        'type': 'string',
+                        'description': 'Absolute path to the file (must be under /workspace).',
+                    },
+                    'old_string': {
+                        'type': 'string',
+                        'description': 'The exact string to find and replace.',
+                    },
+                    'new_string': {
+                        'type': 'string',
+                        'description': 'The replacement string.',
+                    },
+                },
+                'required': ['path', 'old_string', 'new_string'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    def _build_glob_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=GLOB_TOOL_NAME,
+            human_desc='Find files matching a glob pattern',
+            description=(
+                'Find files matching a glob pattern under /workspace. '
+                'Supports ** for recursive matching (e.g. **/*.py). '
+                'Results are sorted by modification time (newest first). '
+                'Visible and activated skill packages can be searched through /workspace/.skills/<skill-name>/...'
+            ),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'pattern': {
+                        'type': 'string',
+                        'description': 'Glob pattern, e.g. **/*.py or src/**/*.ts',
+                    },
+                    'path': {
+                        'type': 'string',
+                        'description': 'Directory to search in (must be under /workspace, default: /workspace)',
+                        'default': '/workspace',
+                    },
+                },
+                'required': ['pattern'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    def _build_grep_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=GREP_TOOL_NAME,
+            human_desc='Search file contents with regex',
+            description=(
+                'Search file contents with regex pattern under /workspace. '
+                'Returns matching lines with file path and line number. '
+                'Visible and activated skill packages can be searched through /workspace/.skills/<skill-name>/...'
+            ),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'pattern': {
+                        'type': 'string',
+                        'description': 'Regex pattern to search for',
+                    },
+                    'path': {
+                        'type': 'string',
+                        'description': 'File or directory to search (must be under /workspace, default: /workspace)',
+                        'default': '/workspace',
+                    },
+                    'include': {
+                        'type': 'string',
+                        'description': 'Only search files matching this glob (e.g. *.py)',
+                    },
+                },
+                'required': ['pattern'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    async def _invoke_glob(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        pattern = parameters['pattern']
+        path = str(parameters.get('path', '/workspace') or '/workspace')
+        self.ap.logger.info(f'glob tool invoked: query_id={query.query_id} pattern={pattern} path={path}')
+
+        host_path, selected_skill = self._resolve_host_path(
+            query,
+            path,
+            include_visible=True,
+            include_activated=True,
+        )
+        if self._should_use_box_workspace_files(selected_skill):
+            return await self._glob_workspace_via_box(path, pattern, query)
+
+        if not os.path.isdir(host_path):
+            return {'ok': False, 'error': f'Path is not a directory: {path}'}
+
+        from pathlib import Path
+
+        base = Path(host_path)
+        hits = list(base.rglob(pattern))
+
+        # Filter out skipped directories
+        hits = [h for h in hits if not any(skip in h.parts for skip in _SKIP_DIRS)]
+
+        # Sort by mtime, newest first
+        hits.sort(key=lambda p: p.stat().st_mtime if p.exists() else 0, reverse=True)
+
+        total = len(hits)
+        shown = hits[:100]
+
+        # Convert back to sandbox paths
+        sandbox_paths = []
+        for h in shown:
+            rel = os.path.relpath(str(h), host_path)
+            sandbox_path = os.path.join(path, rel)
+            sandbox_paths.append(sandbox_path)
+
+        result_lines = sandbox_paths
+        result = '\n'.join(result_lines)
+
+        if total > 100:
+            result += f'\n... ({total} matches, showing first 100)'
+
+        return {'ok': True, 'matches': result_lines, 'total': total, 'truncated': total > 100}
+
+    async def _invoke_grep(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        pattern = parameters['pattern']
+        path = str(parameters.get('path', '/workspace') or '/workspace')
+        include = parameters.get('include')
+        self.ap.logger.info(f'grep tool invoked: query_id={query.query_id} pattern={pattern} path={path}')
+
+        import re
+        from pathlib import Path
+
+        try:
+            regex = re.compile(pattern)
+        except re.error as e:
+            return {'ok': False, 'error': f'Invalid regex: {e}'}
+
+        host_path, selected_skill = self._resolve_host_path(
+            query,
+            path,
+            include_visible=True,
+            include_activated=True,
+        )
+        if self._should_use_box_workspace_files(selected_skill):
+            return await self._grep_workspace_via_box(path, pattern, include, query)
+
+        if not os.path.exists(host_path):
+            return {'ok': False, 'error': f'Path not found: {path}'}
+
+        base = Path(host_path)
+
+        if base.is_file():
+            files = [base]
+        else:
+            files = self._grep_walk(base, include)
+
+        matches = []
+        for fp in files:
+            try:
+                text = fp.read_text(errors='ignore')
+            except OSError:
+                continue
+            for lineno, line in enumerate(text.splitlines(), 1):
+                if regex.search(line):
+                    rel = os.path.relpath(str(fp), host_path)
+                    sandbox_path = os.path.join(path, rel)
+                    matches.append(
+                        {
+                            'file': sandbox_path,
+                            'line': lineno,
+                            'content': line.rstrip(),
+                        }
+                    )
+                    if len(matches) >= 200:
+                        break
+            if len(matches) >= 200:
+                break
+
+        return {
+            'ok': True,
+            'matches': matches,
+            'total': len(matches),
+            'truncated': len(matches) >= 200,
+        }
+
+    @staticmethod
+    def _grep_walk(root, include: str | None) -> list:
+        """Walk dir tree for grep, skipping junk dirs."""
+        results = []
+        for item in root.rglob(include or '*'):
+            if any(skip in item.parts for skip in _SKIP_DIRS):
+                continue
+            if item.is_file():
+                results.append(item)
+            if len(results) >= 5000:
+                break
+        return results
+
+    def _summarize_parameters(self, parameters: dict) -> dict:
+        summary = dict(parameters)
+        cmd = str(summary.get('command', '')).strip()
+        if len(cmd) > 400:
+            cmd = f'{cmd[:397]}...'
+        summary['command'] = cmd
+
+        env = summary.get('env')
+        if isinstance(env, dict):
+            summary['env_keys'] = sorted(str(key) for key in env.keys())
+            del summary['env']
+
+        return summary
--- a/src/langbot/pkg/provider/tools/loaders/skill.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill.py
@@ -0,0 +1,157 @@
+from __future__ import annotations
+
+import re
+import typing
+
+from ....box import workspace as box_workspace
+
+if typing.TYPE_CHECKING:
+    from ....core import app
+    from langbot_plugin.api.entities.events import pipeline_query
+
+ACTIVATED_SKILLS_KEY = '_activated_skills'
+PIPELINE_BOUND_SKILLS_KEY = '_pipeline_bound_skills'
+SKILL_MOUNT_PREFIX = '/workspace/.skills'
+_SKILL_MOUNT_PATTERN = re.compile(r'/workspace/\.skills/([A-Za-z0-9_-]+)')
+
+
+def get_virtual_skill_mount_path(skill_name: str) -> str:
+    return f'{SKILL_MOUNT_PREFIX}/{skill_name}'
+
+
+def get_bound_skill_names(query: pipeline_query.Query) -> list[str] | None:
+    if query.variables is None:
+        return None
+
+    bound_skills = query.variables.get(PIPELINE_BOUND_SKILLS_KEY)
+    if bound_skills is None:
+        return None
+    if isinstance(bound_skills, list):
+        return [str(item) for item in bound_skills]
+    return None
+
+
+def get_visible_skills(ap: app.Application, query: pipeline_query.Query) -> dict[str, dict]:
+    skill_mgr = getattr(ap, 'skill_mgr', None)
+    if skill_mgr is None:
+        return {}
+
+    visible_skills = getattr(skill_mgr, 'skills', {})
+    bound_skills = get_bound_skill_names(query)
+    if bound_skills is None:
+        return visible_skills
+
+    return {skill_name: skill_data for skill_name, skill_data in visible_skills.items() if skill_name in bound_skills}
+
+
+def get_visible_skill(ap: app.Application, query: pipeline_query.Query, skill_name: str) -> dict | None:
+    return get_visible_skills(ap, query).get(skill_name)
+
+
+def get_activated_skills(query: pipeline_query.Query) -> dict[str, dict]:
+    if query.variables is None:
+        return {}
+
+    activated = query.variables.get(ACTIVATED_SKILLS_KEY, {})
+    if not isinstance(activated, dict):
+        return {}
+    return activated
+
+
+def get_activated_skill(query: pipeline_query.Query, skill_name: str) -> dict | None:
+    return get_activated_skills(query).get(skill_name)
+
+
+def register_activated_skill(query: pipeline_query.Query, skill_data: dict) -> None:
+    if query.variables is None:
+        query.variables = {}
+
+    activated = query.variables.setdefault(ACTIVATED_SKILLS_KEY, {})
+    skill_name = str(skill_data.get('name', '') or '').strip()
+    if skill_name and skill_name not in activated:
+        activated[skill_name] = skill_data
+
+
+def parse_skill_mount_path(sandbox_path: str) -> tuple[str | None, str]:
+    normalized_path = str(sandbox_path or '/workspace').strip() or '/workspace'
+    if normalized_path == SKILL_MOUNT_PREFIX:
+        raise ValueError(f'Path must include a skill name under {SKILL_MOUNT_PREFIX}/<skill-name>.')
+    prefix = f'{SKILL_MOUNT_PREFIX}/'
+    if not normalized_path.startswith(prefix):
+        return None, normalized_path
+
+    remainder = normalized_path[len(prefix) :]
+    skill_name, separator, tail = remainder.partition('/')
+    if not skill_name:
+        raise ValueError(f'Path must include a skill name under {SKILL_MOUNT_PREFIX}/<skill-name>.')
+
+    rewritten_path = '/workspace'
+    if separator:
+        rewritten_path = f'/workspace/{tail}'
+    return skill_name, rewritten_path
+
+
+def resolve_virtual_skill_path(
+    ap: app.Application,
+    query: pipeline_query.Query,
+    sandbox_path: str,
+    *,
+    include_visible: bool,
+    include_activated: bool,
+) -> tuple[dict | None, str]:
+    skill_name, rewritten_path = parse_skill_mount_path(sandbox_path)
+    if skill_name is None:
+        return None, rewritten_path
+
+    if include_activated:
+        activated_skill = get_activated_skill(query, skill_name)
+        if activated_skill is not None:
+            return activated_skill, rewritten_path
+
+    if include_visible:
+        visible_skill = get_visible_skill(ap, query, skill_name)
+        if visible_skill is not None:
+            return visible_skill, rewritten_path
+
+    activated_names = ', '.join(sorted(get_activated_skills(query).keys())) or 'none'
+    visible_names = ', '.join(sorted(get_visible_skills(ap, query).keys())) or 'none'
+    raise ValueError(
+        f'Skill "{skill_name}" is not available at this path. '
+        f'Activated skills: {activated_names}. Visible skills: {visible_names}.'
+    )
+
+
+def find_referenced_skill_names(text: str) -> list[str]:
+    if not text:
+        return []
+
+    seen: list[str] = []
+    for match in _SKILL_MOUNT_PATTERN.findall(text):
+        if match not in seen:
+            seen.append(match)
+    return seen
+
+
+def rewrite_command_for_skill_mount(command: str, skill_name: str) -> str:
+    virtual_root = get_virtual_skill_mount_path(skill_name)
+    rewritten = command.replace(f'{virtual_root}/', '/workspace/')
+    return rewritten.replace(virtual_root, '/workspace')
+
+
+def build_skill_session_id(skill_data: dict, query: pipeline_query.Query) -> str:
+    skill_identifier = str(skill_data.get('name', 'unknown') or 'unknown')
+    launcher_type = getattr(query, 'launcher_type', None)
+    launcher_id = getattr(query, 'launcher_id', None)
+    query_id = getattr(query, 'query_id', 'unknown')
+
+    if launcher_type is not None and launcher_id is not None:
+        return f'skill-{launcher_type}_{launcher_id}-{skill_identifier}'
+    return f'skill-{query_id}-{skill_identifier}'
+
+
+def should_prepare_skill_python_env(package_root: str | None) -> bool:
+    return box_workspace.should_prepare_python_env(package_root)
+
+
+def wrap_skill_command_with_python_env(command: str, *, mount_path: str = '/workspace') -> str:
+    return box_workspace.wrap_python_command_with_env(command, mount_path=mount_path).rstrip()
--- a/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
+++ b/src/langbot/pkg/provider/tools/loaders/skill_authoring.py
@@ -0,0 +1,304 @@
+from __future__ import annotations
+
+import os
+import typing
+
+import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
+
+from .. import loader
+
+# Align with Claude Code's Skill tool design:
+# - activate: Activate a skill via Tool Call, returns SKILL.md content
+# - register_skill: Register a skill from sandbox directory to data/skills/
+# - This protects KV Cache and follows industry standard
+
+ACTIVATE_SKILL_TOOL_NAME = 'activate'
+REGISTER_SKILL_TOOL_NAME = 'register_skill'
+
+SKILL_TOOL_NAMES = {
+    ACTIVATE_SKILL_TOOL_NAME,
+    REGISTER_SKILL_TOOL_NAME,
+}
+
+
+class SkillToolLoader(loader.ToolLoader):
+    """Skill tools aligned with Claude Code's design."""
+
+    def __init__(self, ap):
+        super().__init__(ap)
+        self._tools: list[resource_tool.LLMTool] = []
+        self._sandbox_available: bool = False
+
+    async def initialize(self):
+        # Check if sandbox backend is available (same check as native tools)
+        self._sandbox_available = await self._check_sandbox_available()
+        if self._sandbox_available:
+            self._tools = [
+                self._build_activate_skill_tool(),
+                self._build_register_skill_tool(),
+            ]
+        else:
+            self.ap.logger.info(
+                'Skill tools (activate/register_skill) are NOT available. '
+                'No sandbox backend (Docker/nsjail/E2B) is ready.'
+            )
+
+    async def _check_sandbox_available(self) -> bool:
+        """Check if the box backend is truly available (not just the runtime)."""
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is None:
+            return False
+        if not getattr(box_service, 'available', False):
+            return False
+        # Check if backend is truly available via get_status
+        try:
+            status = await box_service.get_status()
+            backend_info = status.get('backend', {})
+            return backend_info.get('available', False)
+        except Exception:
+            return False
+
+    async def get_tools(self, bound_plugins: list[str] | None = None) -> list[resource_tool.LLMTool]:
+        if not self._is_available():
+            return []
+        return list(self._tools)
+
+    async def has_tool(self, name: str) -> bool:
+        return self._is_available() and name in SKILL_TOOL_NAMES
+
+    def _is_available(self) -> bool:
+        """Check if skill tools should be available.
+
+        Skill tools require both a skill manager and a sandbox backend.
+        """
+        return self._has_skill_manager() and self._sandbox_available
+
+    async def invoke_tool(self, name: str, parameters: dict, query) -> typing.Any:
+        if name == ACTIVATE_SKILL_TOOL_NAME:
+            return await self._invoke_activate_skill(parameters, query)
+        if name == REGISTER_SKILL_TOOL_NAME:
+            return await self._invoke_register_skill(parameters)
+        raise ValueError(f'Unknown skill tool: {name}')
+
+    async def shutdown(self):
+        pass
+
+    def _has_skill_manager(self) -> bool:
+        return getattr(self.ap, 'skill_mgr', None) is not None
+
+    async def _invoke_activate_skill(self, parameters: dict, query) -> typing.Any:
+        """Activate a skill and return SKILL.md content via Tool Result."""
+        skill_name = str(parameters.get('skill_name', '') or '').strip()
+        if not skill_name:
+            raise ValueError('skill_name is required')
+
+        skill_mgr = self.ap.skill_mgr
+        skill_data = skill_mgr.get_skill_by_name(skill_name)
+        if skill_data is None:
+            visible_skills = getattr(skill_mgr, 'skills', {})
+            available_names = ', '.join(sorted(visible_skills.keys())) or 'none'
+            raise ValueError(f'Skill "{skill_name}" not found. Available skills: {available_names}')
+
+        # Register activated skill for sandbox mount path resolution
+        from . import skill as skill_loader
+
+        skill_loader.register_activated_skill(query, skill_data)
+
+        # Return SKILL.md content as Tool Result (injects into context)
+        instructions = skill_data.get('instructions', '')
+        package_root = skill_data.get('package_root', '')
+        mount_path = skill_loader.get_virtual_skill_mount_path(skill_name)
+
+        # Build Tool Result content
+        result_content = f'<command-message>The "{skill_name}" skill is activated</command-message>\n'
+        result_content += '<skill-activation>\n'
+        result_content += f'<skill-name>{skill_name}</skill-name>\n'
+        result_content += f'<mount-path>{mount_path}</mount-path>\n'
+        result_content += f'<package-root>{package_root}</package-root>\n'
+        result_content += f'\n## Instructions\n{instructions}\n'
+        result_content += '\n## Runtime Context\n'
+        result_content += f'The skill package is mounted at {mount_path}. Use the standard tools to interact with it:\n'
+        result_content += f'- Use `read` to inspect files under {mount_path}\n'
+        result_content += f'- Use `exec` with workdir set to {mount_path} to run commands in that package\n'
+        result_content += '- Use `write` and `edit` on that path when the instructions require updating files\n'
+        result_content += '</skill-activation>\n'
+
+        return {
+            'activated': True,
+            'skill_name': skill_name,
+            'mount_path': mount_path,
+            'content': result_content,
+        }
+
+    async def _invoke_register_skill(self, parameters: dict) -> typing.Any:
+        """Register a skill from sandbox directory to data/skills/."""
+        sandbox_path = str(parameters.get('path', '') or '').strip()
+        if not sandbox_path:
+            raise ValueError('path is required')
+
+        # Resolve sandbox path to host path
+        host_path = self._resolve_workspace_directory(sandbox_path)
+
+        # Get or create skill service
+        skill_service = getattr(self.ap, 'skill_service', None)
+        if skill_service is None:
+            raise ValueError('Skill service not available')
+
+        # Scan and register the skill
+        scanned = await skill_service.scan_directory_async(host_path)
+
+        # Override name if provided
+        skill_name = str(parameters.get('name') or scanned['name']).strip()
+        if not skill_name:
+            raise ValueError('skill name is required')
+
+        # Create the skill
+        created = await skill_service.create_skill(
+            {
+                'name': skill_name,
+                'display_name': str(parameters.get('display_name') or scanned.get('display_name', '')).strip(),
+                'description': str(parameters.get('description') or scanned.get('description', '')).strip(),
+                'instructions': str(parameters.get('instructions') or scanned.get('instructions', '')),
+                'package_root': host_path,
+            }
+        )
+
+        return {
+            'registered': True,
+            'skill_name': skill_name,
+            'source_path': sandbox_path,
+            'skill': created,
+        }
+
+    def _resolve_workspace_directory(self, sandbox_path: str) -> str:
+        """Resolve sandbox path to host filesystem path."""
+        box_service = getattr(self.ap, 'box_service', None)
+        workspace_root = getattr(box_service, 'default_workspace', None)
+        if not workspace_root:
+            raise ValueError('No default workspace configured')
+
+        normalized_path = str(sandbox_path).strip() or '/workspace'
+        if not normalized_path.startswith('/workspace'):
+            raise ValueError('path must be under /workspace')
+
+        relative = normalized_path[len('/workspace') :].lstrip('/')
+        host_root = os.path.realpath(workspace_root)
+        host_path = os.path.realpath(os.path.join(host_root, relative))
+
+        # Security check: ensure path doesn't escape workspace
+        if not (host_path == host_root or host_path.startswith(host_root + os.sep)):
+            raise ValueError('path escapes the workspace boundary')
+
+        if getattr(box_service, 'available', False):
+            return host_path
+
+        if not os.path.isdir(host_path):
+            raise ValueError(f'Directory does not exist: {sandbox_path}')
+
+        return host_path
+
+    def _build_activate_skill_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=ACTIVATE_SKILL_TOOL_NAME,
+            human_desc='Activate a skill',
+            description=self._build_activate_tool_description(),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'skill_name': {
+                        'type': 'string',
+                        'description': 'The skill name to activate (no arguments). E.g., "pdf" or "data-analysis"',
+                    },
+                },
+                'required': ['skill_name'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    def _build_register_skill_tool(self) -> resource_tool.LLMTool:
+        return resource_tool.LLMTool(
+            name=REGISTER_SKILL_TOOL_NAME,
+            human_desc='Register a skill from sandbox',
+            description=(
+                "Register a skill package from a directory under /workspace into LangBot's skill store. "
+                'Use this after creating or preparing a skill in the sandbox with exec/read/write/edit. '
+                'The directory must contain a SKILL.md file. '
+                'After registration, the skill can be activated with the activate tool.'
+            ),
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'path': {
+                        'type': 'string',
+                        'description': 'Directory path under /workspace containing the skill package (must have SKILL.md)',
+                    },
+                    'name': {
+                        'type': 'string',
+                        'description': 'Optional skill name override. Defaults to the name in SKILL.md or directory name.',
+                    },
+                    'display_name': {
+                        'type': 'string',
+                        'description': 'Optional display name override.',
+                    },
+                    'description': {
+                        'type': 'string',
+                        'description': 'Optional description override.',
+                    },
+                    'instructions': {
+                        'type': 'string',
+                        'description': 'Optional instructions override.',
+                    },
+                },
+                'required': ['path'],
+                'additionalProperties': False,
+            },
+            func=lambda parameters: parameters,
+        )
+
+    def _build_activate_tool_description(self) -> str:
+        """Build tool description with embedded available_skills list."""
+        skill_mgr = getattr(self.ap, 'skill_mgr', None)
+        if skill_mgr is None:
+            return 'Activate a skill. No skills are currently available.'
+
+        skills = getattr(skill_mgr, 'skills', {})
+        if not skills:
+            return 'Activate a skill. No skills are currently available.'
+
+        # Build <available_skills> section
+        available_skills_lines = ['<available_skills>']
+        for skill_name, skill_data in sorted(skills.items()):
+            description = skill_data.get('description', '')
+            available_skills_lines.append('<skill>')
+            available_skills_lines.append(f'<name>{skill_name}</name>')
+            available_skills_lines.append(f'<description>{description}</description>')
+            available_skills_lines.append('</skill>')
+        available_skills_lines.append('</available_skills>')
+
+        available_skills_block = '\n'.join(available_skills_lines)
+
+        return f"""Activate a skill within the main conversation.
+
+<skills_instructions>
+When users ask you to perform tasks, check if any of the available skills
+below can help complete the task more effectively. Skills provide specialized
+capabilities and domain knowledge.
+
+How to use skills:
+- Invoke skills using this tool with the skill name only (no arguments)
+- When you invoke a skill, you will see <command-message>
+The skill is activated
+</command-message>
+- The skill's instructions will be provided in the tool result
+- Examples:
+  - skill_name: "pdf" - invoke the pdf skill
+  - skill_name: "data-analysis" - invoke the data-analysis skill
+
+Important:
+- Only use skills listed in <available_skills> below
+- Do not invoke a skill that is already running
+- To create a new skill: prepare it in /workspace, then use register_skill tool
+</skills_instructions>
+
+{available_skills_block}"""
--- a/src/langbot/pkg/provider/tools/toolmgr.py
+++ b/src/langbot/pkg/provider/tools/toolmgr.py
@@ -1,15 +1,19 @@
 from __future__ import annotations

 import typing
+from typing import TYPE_CHECKING

-from ...core import app
-from langbot.pkg.utils import importutil
-from langbot.pkg.provider.tools import loaders
-from langbot.pkg.provider.tools.loaders import mcp as mcp_loader, plugin as plugin_loader
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query

-importutil.import_modules_in_pkg(loaders)
+if TYPE_CHECKING:
+    from ...core import app
+    from langbot.pkg.provider.tools.loaders import (
+        mcp as mcp_loader,
+        native as native_loader,
+        plugin as plugin_loader,
+        skill_authoring as skill_authoring_loader,
+    )


 class ToolManager:
@@ -17,31 +21,53 @@ class ToolManager:

    ap: app.Application

+    native_tool_loader: native_loader.NativeToolLoader
    plugin_tool_loader: plugin_loader.PluginToolLoader
    mcp_tool_loader: mcp_loader.MCPLoader
+    skill_tool_loader: skill_authoring_loader.SkillToolLoader

    def __init__(self, ap: app.Application):
        self.ap = ap

    async def initialize(self):
+        from langbot.pkg.utils import importutil
+        from langbot.pkg.provider.tools import loaders
+        from langbot.pkg.provider.tools.loaders import (
+            mcp as mcp_loader,
+            native as native_loader,
+            plugin as plugin_loader,
+            skill_authoring as skill_authoring_loader,
+        )
+
+        importutil.import_modules_in_pkg(loaders)
+
+        self.native_tool_loader = native_loader.NativeToolLoader(self.ap)
+        await self.native_tool_loader.initialize()
+
        self.plugin_tool_loader = plugin_loader.PluginToolLoader(self.ap)
        await self.plugin_tool_loader.initialize()
        self.mcp_tool_loader = mcp_loader.MCPLoader(self.ap)
        await self.mcp_tool_loader.initialize()
+        self.skill_tool_loader = skill_authoring_loader.SkillToolLoader(self.ap)
+        await self.skill_tool_loader.initialize()

    async def get_all_tools(
-        self, bound_plugins: list[str] | None = None, bound_mcp_servers: list[str] | None = None
+        self,
+        bound_plugins: list[str] | None = None,
+        bound_mcp_servers: list[str] | None = None,
+        include_skill_authoring: bool = False,
    ) -> list[resource_tool.LLMTool]:
-        """获取所有函数"""
        all_functions: list[resource_tool.LLMTool] = []

+        all_functions.extend(await self.native_tool_loader.get_tools())
+        if include_skill_authoring:
+            all_functions.extend(await self.skill_tool_loader.get_tools())
        all_functions.extend(await self.plugin_tool_loader.get_tools(bound_plugins))
        all_functions.extend(await self.mcp_tool_loader.get_tools(bound_mcp_servers))

        return all_functions

    async def generate_tools_for_openai(self, use_funcs: list[resource_tool.LLMTool]) -> list:
-        """生成函数列表"""
        tools = []

        for function in use_funcs:
@@ -58,28 +84,6 @@ class ToolManager:
        return tools

    async def generate_tools_for_anthropic(self, use_funcs: list[resource_tool.LLMTool]) -> list:
-        """为anthropic生成函数列表
-
-        e.g.
-
-        [
-          {
-            "name": "get_stock_price",
-            "description": "Get the current stock price for a given ticker symbol.",
-            "input_schema": {
-              "type": "object",
-              "properties": {
-                "ticker": {
-                  "type": "string",
-                  "description": "The stock ticker symbol, e.g. AAPL for Apple Inc."
-                }
-              },
-              "required": ["ticker"]
-            }
-          }
-        ]
-        """
-
        tools = []

        for function in use_funcs:
@@ -93,16 +97,18 @@ class ToolManager:
        return tools

    async def execute_func_call(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
-        """执行函数调用"""
-
+        if await self.native_tool_loader.has_tool(name):
+            return await self.native_tool_loader.invoke_tool(name, parameters, query)
        if await self.plugin_tool_loader.has_tool(name):
            return await self.plugin_tool_loader.invoke_tool(name, parameters, query)
-        elif await self.mcp_tool_loader.has_tool(name):
+        if await self.mcp_tool_loader.has_tool(name):
            return await self.mcp_tool_loader.invoke_tool(name, parameters, query)
-        else:
-            raise ValueError(f'未找到工具: {name}')
+        if await self.skill_tool_loader.has_tool(name):
+            return await self.skill_tool_loader.invoke_tool(name, parameters, query)
+        raise ValueError(f'未找到工具: {name}')

    async def shutdown(self):
-        """关闭所有工具"""
+        await self.native_tool_loader.shutdown()
        await self.plugin_tool_loader.shutdown()
        await self.mcp_tool_loader.shutdown()
+        await self.skill_tool_loader.shutdown()
--- a/src/langbot/pkg/rag/service/runtime.py
+++ b/src/langbot/pkg/rag/service/runtime.py
@@ -1,8 +1,12 @@
 from __future__ import annotations

 import posixpath
-from typing import Any
-from langbot.pkg.core import app
+import re
+from typing import TYPE_CHECKING, Any
+from urllib.parse import unquote
+
+if TYPE_CHECKING:
+    from langbot.pkg.core import app


 class RAGRuntimeService:
@@ -109,8 +113,17 @@ class RAGRuntimeService:
        regardless of the underlying storage provider.
        """
        # Validate storage_path to prevent path traversal
-        normalized = posixpath.normpath(storage_path)
-        if normalized.startswith('/') or '..' in normalized.split('/'):
+        decoded_path = unquote(storage_path).replace('\\', '/')
+        decoded_segments = decoded_path.split('/')
+        normalized = posixpath.normpath(decoded_path)
+        if (
+            not storage_path
+            or '\x00' in decoded_path
+            or normalized.startswith('/')
+            or '..' in decoded_segments
+            or '..' in normalized.split('/')
+            or re.match(r'^[A-Za-z]:/', normalized)
+        ):
            raise ValueError('Invalid storage path')
        content_bytes = await self.ap.storage_mgr.storage_provider.load(normalized)
        return content_bytes if content_bytes else b''
--- a/src/langbot/pkg/skill/init.py
+++ b/src/langbot/pkg/skill/init.py
@@ -0,0 +1,3 @@
+from .manager import SkillManager
+
+__all__ = ['SkillManager']
--- a/src/langbot/pkg/skill/activation.py
+++ b/src/langbot/pkg/skill/activation.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+import typing
+
+from ..provider.tools.loaders import skill as skill_loader
+
+if typing.TYPE_CHECKING:
+    from ..core import app
+    import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
+
+
+# Skill activation is now handled through Tool Call mechanism (activate tool).
+# This file is kept for potential future extensions but the text marker
+# detection mechanism has been removed.
+
+
+def register_activated_skill(
+    ap: app.Application,
+    query: pipeline_query.Query,
+    skill_name: str,
+) -> bool:
+    """Register an activated skill for sandbox mount path resolution.
+
+    This is called by the activate tool when a skill is activated via Tool Call.
+    """
+    skill_mgr = getattr(ap, 'skill_mgr', None)
+    if skill_mgr is None:
+        return False
+
+    skill_data = skill_mgr.get_skill_by_name(skill_name)
+    if skill_data is None:
+        return False
+
+    skill_loader.register_activated_skill(query, skill_data)
+    return True
--- a/src/langbot/pkg/skill/manager.py
+++ b/src/langbot/pkg/skill/manager.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+import os
+import typing
+
+from ..core import app
+
+if typing.TYPE_CHECKING:
+    pass
+
+
+class SkillManager:
+    """Skill manager backed by Box-managed or local filesystem packages.
+
+    In sandbox deployments, skills are loaded from the Box runtime. Local
+    data/skills remains as the fallback for non-Box development.
+
+    Skills are activated through the `activate` tool (Tool Call mechanism),
+    aligned with Claude Code's design. This protects KV Cache and follows
+    industry standard.
+    """
+
+    ap: app.Application
+    skills: dict[str, dict]
+
+    def __init__(self, ap: app.Application):
+        self.ap = ap
+        self.skills = {}
+
+    async def initialize(self):
+        await self.reload_skills()
+
+    async def reload_skills(self):
+        """Reload all skills from the Box runtime.
+
+        Box is the only source of truth for skills. When Box is unavailable
+        (disabled in config or unreachable) the cache is emptied — there is
+        no local filesystem fallback. Skills whose ``package_root`` is no
+        longer visible on the LangBot-side filesystem are dropped so they
+        don't surface as stale ``extra_mounts``.
+        """
+        self.skills = {}
+
+        box_service = getattr(self.ap, 'box_service', None)
+        if box_service is None or not getattr(box_service, 'available', False):
+            self.ap.logger.info('Box runtime unavailable; skill cache is empty.')
+            return
+
+        try:
+            dropped = 0
+            for skill_data in await box_service.list_skills():
+                skill_name = skill_data.get('name')
+                if not skill_name:
+                    continue
+                package_root = str(skill_data.get('package_root', '') or '').strip()
+                if package_root and not os.path.isdir(package_root):
+                    self.ap.logger.warning(
+                        f'Skill "{skill_name}" reported by Box runtime but '
+                        f'package_root missing on LangBot filesystem '
+                        f'({package_root}); dropping from in-memory cache.'
+                    )
+                    dropped += 1
+                    continue
+                self.skills[skill_name] = skill_data
+            if dropped:
+                self.ap.logger.warning(
+                    f'Loaded {len(self.skills)} skills from Box runtime '
+                    f'({dropped} dropped due to missing package_root).'
+                )
+            else:
+                self.ap.logger.info(f'Loaded {len(self.skills)} skills from Box runtime')
+        except Exception as exc:
+            self.ap.logger.warning(f'Failed to load skills from Box runtime: {exc}')
+
+    def refresh_skill_from_disk(self, skill_name: str) -> bool:
+        """Confirm a single skill is present in the cache.
+
+        With Box as the only source of truth, the actual reload is driven by
+        SkillService callers awaiting ``reload_skills``; this method only
+        reports whether the cache still has the skill.
+        """
+        if not skill_name:
+            return False
+        return skill_name in self.skills
+
+    def get_skill_by_name(self, name: str) -> dict | None:
+        """Get skill data by name."""
+        return self.skills.get(name)
+
+    def get_skill_index(self, bound_skills: list[str] | None = None) -> str:
+        """Render the pipeline-visible skills as a short ``name: description``
+        index suitable for the system prompt.
+
+        ``bound_skills`` follows the same convention as
+        ``query.variables['_pipeline_bound_skills']``: ``None`` means every
+        loaded skill is exposed; an explicit list filters to that subset.
+        Returns an empty string when no skills are visible.
+        """
+        lines: list[str] = []
+        for skill in self.skills.values():
+            name = skill.get('name')
+            if not name:
+                continue
+            if bound_skills is not None and name not in bound_skills:
+                continue
+            display = skill.get('display_name') or name
+            description = (skill.get('description') or '').strip().replace('\n', ' ')
+            lines.append(f'- {name} ({display}): {description}')
+
+        if not lines:
+            return ''
+        return 'Available Skills:\n' + '\n'.join(lines)
+
+    def build_skill_aware_prompt_addition(self, bound_skills: list[str] | None = None) -> str:
+        """Build the system-prompt addendum that makes the LLM aware of the
+        pipeline-visible skills.
+
+        Only metadata (name + description) is injected — the full SKILL.md is
+        loaded later via the ``activate`` Tool Call, protecting KV cache and
+        matching Claude Code's progressive disclosure pattern. Returns an
+        empty string when no skills are visible (no prompt change at all).
+        """
+        skill_index = self.get_skill_index(bound_skills)
+        if not skill_index:
+            return ''
+        return (
+            '\n\n'
+            f'{skill_index}\n\n'
+            "When the user's request clearly matches one or more skills "
+            'based on their descriptions above, call the `activate` tool with '
+            'the skill name to load its full instructions. Only the name and '
+            'description are visible here; the actual instructions arrive as '
+            'the tool result. If no skill is a clear match, respond normally '
+            'without activating any skill.'
+        )
--- a/src/langbot/pkg/skill/utils.py
+++ b/src/langbot/pkg/skill/utils.py
@@ -0,0 +1,37 @@
+"""Shared utilities for skill file parsing."""
+
+import yaml
+
+
+def parse_frontmatter(content: str) -> tuple[dict, str]:
+    """Parse YAML frontmatter from markdown content.
+
+    Expects format:
+        ---
+        name: my-skill
+        description: Does something
+        ---
+        # Actual instructions...
+
+    Returns:
+        Tuple of (metadata dict, remaining content)
+    """
+    if not content.startswith('---'):
+        return {}, content
+
+    parts = content.split('---', 2)
+    if len(parts) < 3:
+        return {}, content
+
+    frontmatter_str = parts[1].strip()
+    instructions = parts[2].strip()
+
+    try:
+        metadata = yaml.safe_load(frontmatter_str) or {}
+    except yaml.YAMLError:
+        metadata = {}
+
+    if not isinstance(metadata, dict):
+        metadata = {}
+
+    return metadata, instructions
--- a/src/langbot/pkg/telemetry/telemetry.py
+++ b/src/langbot/pkg/telemetry/telemetry.py
@@ -13,12 +13,11 @@ class TelemetryManager:
        await telemetry.send({ ... })
    """

-    send_tasks: list[asyncio.Task] = []
-
    def __init__(self, ap: core_app.Application):
        self.ap = ap

        self.telemetry_config = {}
+        self.send_tasks: list[asyncio.Task] = []

    async def initialize(self):
        self.telemetry_config = self.ap.instance_config.data.get('space', {})
--- a/src/langbot/pkg/utils/funcschema.py
+++ b/src/langbot/pkg/utils/funcschema.py
@@ -83,7 +83,7 @@ def get_func_schema(function: typing.Callable) -> dict:

        parameters['properties'][param.name] = {
            'type': param_type,
-            'description': args_doc[param.name],
+            'description': args_doc.get(param.name, ''),
        }

        # add schema for array
--- a/src/langbot/pkg/utils/image.py
+++ b/src/langbot/pkg/utils/image.py
@@ -145,7 +145,8 @@ def get_qq_image_downloadable_url(image_url: str) -> tuple[str, dict]:
    """获取QQ图片的下载链接"""
    parsed = urlparse(image_url)
    query = parse_qs(parsed.query)
-    return f'http://{parsed.netloc}{parsed.path}', query
+    scheme = parsed.scheme or 'http'
+    return f'{scheme}://{parsed.netloc}{parsed.path}', query


 async def get_qq_image_bytes(image_url: str, query: dict = {}) -> tuple[bytes, str]:
--- a/src/langbot/pkg/utils/managed_runtime.py
+++ b/src/langbot/pkg/utils/managed_runtime.py
@@ -0,0 +1,88 @@
+"""Base class for connectors that may manage a local runtime subprocess."""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import sys
+from typing import TYPE_CHECKING, Awaitable, Callable
+
+if TYPE_CHECKING:
+    from ..core import app as core_app
+
+
+class ManagedRuntimeConnector:
+    """Base class for connectors that may manage a local runtime subprocess.
+
+    Provides shared lifecycle helpers: subprocess launch, health-check retry,
+    and graceful termination.  Concrete connectors (plugin, box, …) inherit
+    this and add their own protocol-specific logic.
+    """
+
+    ap: 'core_app.Application'
+    runtime_subprocess: asyncio.subprocess.Process | None
+    runtime_subprocess_task: asyncio.Task | None
+
+    def __init__(self, ap: 'core_app.Application'):
+        self.ap = ap
+        self.runtime_subprocess = None
+        self.runtime_subprocess_task = None
+
+    async def _start_runtime_subprocess(self, *args: str) -> None:
+        """Launch a local runtime as a subprocess of the current Python interpreter.
+
+        If a subprocess is already running (no *returncode* yet), this is a no-op.
+        """
+        if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is None:
+            return
+
+        python_path = sys.executable
+        env = os.environ.copy()
+        self.runtime_subprocess = await asyncio.create_subprocess_exec(
+            python_path,
+            *args,
+            env=env,
+        )
+        self.runtime_subprocess_task = asyncio.create_task(self.runtime_subprocess.wait())
+
+    async def _wait_until_ready(
+        self,
+        check: Callable[[], Awaitable[None]],
+        retries: int = 40,
+        interval: float = 0.25,
+        runtime_name: str = 'runtime',
+    ) -> None:
+        """Repeatedly call *check* until it succeeds or retries are exhausted.
+
+        Between attempts the method sleeps for *interval* seconds.  If the
+        managed subprocess exits before readiness is confirmed, a
+        ``RuntimeError`` is raised immediately.
+        """
+        last_exc: Exception | None = None
+        for _ in range(retries):
+            # Fast-fail if the process already died.
+            if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is not None:
+                raise RuntimeError(
+                    f'local {runtime_name} exited before becoming ready (code {self.runtime_subprocess.returncode})'
+                )
+
+            try:
+                await check()
+                return
+            except Exception as exc:
+                last_exc = exc
+                await asyncio.sleep(interval)
+
+        if last_exc is not None:
+            raise last_exc
+        raise RuntimeError(f'local {runtime_name} did not become ready')
+
+    def _dispose_subprocess(self) -> None:
+        """Terminate the managed subprocess and cancel its wait task."""
+        if self.runtime_subprocess is not None and self.runtime_subprocess.returncode is None:
+            self.ap.logger.info('Terminating managed runtime process...')
+            self.runtime_subprocess.terminate()
+
+        if self.runtime_subprocess_task is not None:
+            self.runtime_subprocess_task.cancel()
+            self.runtime_subprocess_task = None
--- a/src/langbot/pkg/utils/paths.py
+++ b/src/langbot/pkg/utils/paths.py
@@ -1,37 +1,70 @@
-"""Utility functions for finding package resources"""
+"""Utility functions for finding package resources and runtime data roots."""

 import os
 from pathlib import Path


 _is_source_install = None
+_source_root = None
+
+
+def _find_source_root() -> Path | None:
+    """Locate the LangBot repository root when running from source."""
+    global _source_root
+
+    if _source_root is not None:
+        return _source_root
+
+    current = Path(__file__).resolve()
+    for parent in current.parents:
+        if (parent / 'pyproject.toml').exists() and (parent / 'main.py').exists():
+            _source_root = parent
+            return parent
+
+    _source_root = None
+    return None


 def _check_if_source_install() -> bool:
    """
-    Check if we're running from source directory or an installed package.
-    Cached to avoid repeated file I/O.
+    Check if we're running from the LangBot source tree.
+    Cached to avoid repeated filesystem scans.
    """
    global _is_source_install

    if _is_source_install is not None:
        return _is_source_install

-    # Check if main.py exists in current directory with LangBot marker
-    if os.path.exists('main.py'):
-        try:
-            with open('main.py', 'r', encoding='utf-8') as f:
-                # Only read first 500 chars to check for marker
-                content = f.read(500)
-                if 'LangBot/main.py' in content:
-                    _is_source_install = True
-                    return True
-        except (IOError, OSError, UnicodeDecodeError):
-            # If we can't read the file, assume not a source install
-            pass
+    _is_source_install = _find_source_root() is not None
+    return _is_source_install

-    _is_source_install = False
-    return False
+
+def get_data_root() -> str:
+    """
+    Get the runtime data root.
+
+    Priority:
+    1. LANGBOT_DATA_ROOT environment override
+    2. Source checkout root /data when running from source
+    3. Current working directory /data for installed-package usage
+    """
+    env_root = os.environ.get('LANGBOT_DATA_ROOT', '').strip()
+    if env_root:
+        return str(Path(env_root).expanduser().resolve())
+
+    source_root = _find_source_root()
+    if source_root is not None:
+        return str((source_root / 'data').resolve())
+
+    return str((Path.cwd() / 'data').resolve())
+
+
+def get_data_path(*parts: str) -> str:
+    """Join path segments under the resolved data root."""
+    data_root = Path(get_data_root())
+    if not parts:
+        return str(data_root)
+    return str((data_root.joinpath(*parts)).resolve())


 def get_frontend_path() -> str:
@@ -76,8 +109,11 @@ def get_resource_path(resource: str) -> str:
        Absolute path to the resource
    """
    # First, check if resource exists in current directory (source install)
-    if _check_if_source_install() and os.path.exists(resource):
-        return resource
+    source_root = _find_source_root()
+    if source_root is not None:
+        source_resource = source_root / resource
+        if source_resource.exists():
+            return str(source_resource)

    # Second, check current directory anyway
    if os.path.exists(resource):
--- a/src/langbot/pkg/utils/pkgmgr.py
+++ b/src/langbot/pkg/utils/pkgmgr.py
@@ -23,7 +23,10 @@ def run_pip(params: list):
    pipmain(params)


-def install_requirements(file, extra_params: list = []):
+def install_requirements(file, extra_params: list | None = None):
+    if extra_params is None:
+        extra_params = []
+
    pipmain(
        [
            'install',
--- a/src/langbot/pkg/utils/platform.py
+++ b/src/langbot/pkg/utils/platform.py
@@ -16,7 +16,14 @@ def get_platform() -> str:

 standalone_runtime = False

+standalone_box = False
+

 def use_websocket_to_connect_plugin_runtime() -> bool:
    """是否使用 websocket 连接插件运行时"""
    return standalone_runtime
+
+
+def use_websocket_to_connect_box_runtime() -> bool:
+    """Whether to use WebSocket to connect to an external box runtime."""
+    return standalone_box
--- a/src/langbot/pkg/utils/runner.py
+++ b/src/langbot/pkg/utils/runner.py
@@ -1,5 +1,7 @@
 from __future__ import annotations

+import ipaddress
+import re
 from urllib.parse import urlparse


@@ -44,6 +46,40 @@ LOCAL_PATTERNS = [
    '172.31.',
 ]

+HOST_LABEL_PATTERN = re.compile(r'^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$')
+
+
+def _is_valid_hostname(host: str) -> bool:
+    if host == 'localhost':
+        return True
+
+    try:
+        ipaddress.ip_address(host)
+        return True
+    except ValueError:
+        pass
+
+    if not host or len(host) > 253 or any(char.isspace() for char in host):
+        return False
+
+    host = host.rstrip('.')
+    if not host:
+        return False
+
+    return all(HOST_LABEL_PATTERN.match(label) for label in host.split('.'))
+
+
+def _is_local_host(host: str) -> bool:
+    if host == 'localhost':
+        return True
+
+    try:
+        ip_address = ipaddress.ip_address(host)
+    except ValueError:
+        return False
+
+    return ip_address.is_private or ip_address.is_loopback or ip_address.is_unspecified
+

 def get_runner_category(runner_name: str, runner_url: str) -> str:
    if not runner_url:
@@ -52,12 +88,15 @@ def get_runner_category(runner_name: str, runner_url: str) -> str:
    try:
        parsed_url = urlparse(runner_url)
        host = parsed_url.hostname.lower() if parsed_url.hostname else ''
+        _ = parsed_url.port
    except Exception:
        return RunnerCategory.UNKNOWN

-    for pattern in LOCAL_PATTERNS:
-        if host.startswith(pattern):
-            return RunnerCategory.LOCAL
+    if not parsed_url.scheme or not host or not _is_valid_hostname(host):
+        return RunnerCategory.UNKNOWN
+
+    if _is_local_host(host):
+        return RunnerCategory.LOCAL

    for domain in CLOUD_DOMAINS:
        if host.endswith(domain):
--- a/src/langbot/pkg/utils/version.py
+++ b/src/langbot/pkg/utils/version.py
@@ -1,6 +1,5 @@
 from __future__ import annotations

-import os
 import typing
 import logging

@@ -11,7 +10,7 @@ from . import constants


 class VersionManager:
-    """版本管理器"""
+    """Version manager"""

    ap: app.Application

@@ -22,190 +21,68 @@ class VersionManager:
        pass

    def get_current_version(self) -> str:
-        current_tag = constants.semantic_version
-
-        return current_tag
+        return constants.semantic_version

    async def get_release_list(self) -> list:
-        """获取发行列表"""
+        """Fetch release list from Space API (cached GitHub releases)."""
        try:
            rls_list_resp = requests.get(
-                url='https://api.github.com/repos/langbot-app/LangBot/releases',
+                url='https://space.langbot.app/api/v1/dist/info/releases',
                proxies=self.ap.proxy_mgr.get_forward_proxies(),
-                timeout=5,
+                timeout=10,
            )
-            rls_list_resp.raise_for_status()  # 检查请求是否成功
-            rls_list = rls_list_resp.json()
-            return rls_list
+            rls_list_resp.raise_for_status()
+            resp_json = rls_list_resp.json()
+            if resp_json.get('code') == 0 and isinstance(resp_json.get('data'), list):
+                return resp_json['data']
+            self.ap.logger.warning(f'Failed to fetch release list: unexpected response: {resp_json.get("msg", "")}')
+            return []
        except Exception as e:
-            self.ap.logger.warning(f'获取发行列表失败: {e}')
-            pass
+            self.ap.logger.warning(f'Failed to fetch release list: {e}')
        return []

-    async def update_all(self):
-        """检查更新并下载源码"""
-
-        current_tag = self.get_current_version()
-
-        rls_list = await self.get_release_list()
-
-        latest_rls = {}
-        rls_notes = []
-        latest_tag_name = ''
-        for rls in rls_list:
-            rls_notes.append(rls['name'])  # 使用发行名称作为note
-            if latest_tag_name == '':
-                latest_tag_name = rls['tag_name']
-
-            if rls['tag_name'] == current_tag:
-                break
-
-            if latest_rls == {}:
-                latest_rls = rls
-        self.ap.logger.info('更新日志: {}'.format(rls_notes))
-
-        if latest_rls == {} and not self.is_newer(latest_tag_name, current_tag):  # 没有新版本
-            return False
-
-        # 下载最新版本的zip到temp目录
-        self.ap.logger.info('开始下载最新版本: {}'.format(latest_rls['zipball_url']))
-
-        zip_url = latest_rls['zipball_url']
-        zip_resp = requests.get(url=zip_url, proxies=self.ap.proxy_mgr.get_forward_proxies())
-        zip_data = zip_resp.content
-
-        # 检查temp/updater目录
-        if not os.path.exists('temp'):
-            os.mkdir('temp')
-        if not os.path.exists('temp/updater'):
-            os.mkdir('temp/updater')
-        with open('temp/updater/{}.zip'.format(latest_rls['tag_name']), 'wb') as f:
-            f.write(zip_data)
-
-        self.ap.logger.info('下载最新版本完成: {}'.format('temp/updater/{}.zip'.format(latest_rls['tag_name'])))
-
-        # 解压zip到temp/updater/<tag_name>/
-        import zipfile
-
-        # 检查目标文件夹
-        if os.path.exists('temp/updater/{}'.format(latest_rls['tag_name'])):
-            import shutil
-
-            shutil.rmtree('temp/updater/{}'.format(latest_rls['tag_name']))
-        os.mkdir('temp/updater/{}'.format(latest_rls['tag_name']))
-        with zipfile.ZipFile('temp/updater/{}.zip'.format(latest_rls['tag_name']), 'r') as zip_ref:
-            zip_ref.extractall('temp/updater/{}'.format(latest_rls['tag_name']))
-
-        # 覆盖源码
-        source_root = ''
-        # 找到temp/updater/<tag_name>/中的第一个子目录路径
-        for root, dirs, files in os.walk('temp/updater/{}'.format(latest_rls['tag_name'])):
-            if root != 'temp/updater/{}'.format(latest_rls['tag_name']):
-                source_root = root
-                break
-
-        # 覆盖源码
-        import shutil
-
-        for root, dirs, files in os.walk(source_root):
-            # 覆盖所有子文件子目录
-            for file in files:
-                src = os.path.join(root, file)
-                dst = src.replace(source_root, '.')
-                if os.path.exists(dst):
-                    os.remove(dst)
-
-                # 检查目标文件夹是否存在
-                if not os.path.exists(os.path.dirname(dst)):
-                    os.makedirs(os.path.dirname(dst))
-                # 检查目标文件是否存在
-                if not os.path.exists(dst):
-                    # 创建目标文件
-                    open(dst, 'w').close()
-
-                shutil.copy(src, dst)
-
-        # 把current_tag写入文件
-        current_tag = latest_rls['tag_name']
-        with open('current_tag', 'w') as f:
-            f.write(current_tag)
-
-        # TODO statistics
-
    async def is_new_version_available(self) -> bool:
-        """检查是否有新版本"""
-        # 从github获取release列表
+        """Check whether a newer version is available."""
        rls_list = await self.get_release_list()
-        if rls_list is None:
+        if not rls_list:
            return False

-        # 获取当前版本
        current_tag = self.get_current_version()

-        # 检查是否有新版本
        latest_tag_name = ''
        for rls in rls_list:
-            if latest_tag_name == '':
-                latest_tag_name = rls['tag_name']
-                break
+            latest_tag_name = rls.get('tag_name', '')
+            break

-        return self.is_newer(latest_tag_name, current_tag)
+        return self._is_newer(latest_tag_name, current_tag)

-    def is_newer(self, new_tag: str, old_tag: str):
-        """判断版本是否更新，忽略第四位版本和第一位版本"""
-        if new_tag == old_tag:
+    def _is_newer(self, new_tag: str, old_tag: str) -> bool:
+        """Check if new_tag is a newer version than old_tag.
+
+        Compares the first three segments (major.minor.patch) only.
+        Returns False if the major version differs (breaking change boundary).
+        """
+        if not new_tag or not old_tag or new_tag == old_tag:
            return False

-        new_tag = new_tag.split('.')
-        old_tag = old_tag.split('.')
+        new_parts = new_tag.split('.')
+        old_parts = old_tag.split('.')

-        # 判断主版本是否相同
-        if new_tag[0] != old_tag[0]:
+        # Different major version — not considered an upgrade
+        if new_parts[0] != old_parts[0]:
            return False

-        if len(new_tag) < 4:
+        if len(new_parts) < 4:
            return True

-        # 合成前三段，判断是否相同
-        new_tag = '.'.join(new_tag[:3])
-        old_tag = '.'.join(old_tag[:3])
-
-        return new_tag != old_tag
-
-    def compare_version_str(v0: str, v1: str) -> int:
-        """比较两个版本号"""
-
-        # 删除版本号前的v
-        if v0.startswith('v'):
-            v0 = v0[1:]
-        if v1.startswith('v'):
-            v1 = v1[1:]
-
-        v0: list = v0.split('.')
-        v1: list = v1.split('.')
-
-        # 如果两个版本号节数不同，把短的后面用0补齐
-        if len(v0) < len(v1):
-            v0.extend(['0'] * (len(v1) - len(v0)))
-        elif len(v0) > len(v1):
-            v1.extend(['0'] * (len(v0) - len(v1)))
-
-        # 从高位向低位比较
-        for i in range(len(v0)):
-            if int(v0[i]) > int(v1[i]):
-                return 1
-            elif int(v0[i]) < int(v1[i]):
-                return -1
-
-        return 0
+        return '.'.join(new_parts[:3]) != '.'.join(old_parts[:3])

    async def show_version_update(self) -> typing.Tuple[str, int]:
        try:
-            if await self.ap.ver_mgr.is_new_version_available():
+            if await self.is_new_version_available():
                return (
-                    'New version available:\n有新版本可用，根据文档更新: \nhttps://link.langbot.app/zh/docs/update',
+                    'New version available. Update guide: https://link.langbot.app/en/docs/update',
                    logging.INFO,
                )
-
        except Exception as e:
            return f'Error checking version update: {e}', logging.WARNING
--- a/src/langbot/templates/config.yaml
+++ b/src/langbot/templates/config.yaml
@@ -104,6 +104,31 @@ monitoring:
        check_interval_hours: 1
        # Number of expired rows to delete per table batch
        delete_batch_size: 1000
+box:
+    # Master switch for the Box sandbox runtime. When false, LangBot does NOT
+    # attempt to connect to a remote Box runtime nor start a local stdio Box
+    # subprocess. Disabling Box also disables every feature that depends on it:
+    # the native sandbox tools (exec/read/write/edit/glob/grep), the activate
+    # skill tool, skill add/edit, and stdio-mode MCP servers. Skills can still
+    # be listed read-only and http/sse MCP servers continue to work.
+    enabled: true
+    backend: 'local'  # 'local' (Docker/nsjail), 'docker', 'nsjail', or 'e2b'. BOX_BACKEND env var takes precedence.
+    runtime:
+        endpoint: ''  # External Box Runtime base URL, e.g. 'ws://127.0.0.1:5410'. Leave empty for local auto-managed runtime.
+    local:
+        profile: 'default'
+        image: ''  # Custom local sandbox image. Leave empty to use the profile default.
+        host_root: './data/box'  # Base host directory for local workspace mounts. Docker deployments should override this with an absolute host path.
+        default_workspace: ''  # Defaults to '<host_root>/default'. Relative paths are resolved under host_root.
+        skills_root: 'skills'  # Box-owned skill package directory. Relative paths are resolved under host_root.
+        allowed_mount_roots:  # Defaults to ['<host_root>'] when left empty.
+            - './data/box'
+            - '/tmp'
+        workspace_quota_mb: null  # Optional disk quota override (>= 0). null = profile default.
+    e2b:
+        api_key: ''  # Can also be set via E2B_API_KEY env var.
+        api_url: ''  # Custom API URL for self-hosted deployments.
+        template: ''  # Default template ID (e.g. 'base', 'python-3.11').
 space:
    # Space service URL for OAuth and API
    url: 'https://space.langbot.app'
--- a/src/langbot/templates/default-pipeline-config.json
+++ b/src/langbot/templates/default-pipeline-config.json
@@ -50,10 +50,11 @@
            "prompt": [
                {
                    "role": "system",
-                    "content": "You are a helpful assistant."
+                    "content": "You are a helpful assistant. When tools are available, use them for exact calculations, data processing, and code execution instead of guessing. Unless the user explicitly asks for code or a script, return the result directly instead of printing the generated code."
                }
            ],
            "knowledge-bases": [],
+            "box-session-id-template": "{launcher_type}_{launcher_id}",
            "rerank-model": "",
            "rerank-top-k": 5
        },
--- a/src/langbot/templates/metadata/pipeline/ai.yaml
+++ b/src/langbot/templates/metadata/pipeline/ai.yaml
@@ -124,6 +124,99 @@ stages:
          field: __system.is_wizard
          operator: neq
          value: true
+      - name: box-session-id-template
+        label:
+          en_US: Sandbox Scope
+          zh_Hans: 沙箱作用域
+          zh_Hant: 沙箱作用域
+          ja_JP: サンドボックススコープ
+          vi_VN: Phạm vi Sandbox
+          th_TH: ขอบเขต Sandbox
+          es_ES: Alcance del Sandbox
+          ru_RU: Область песочницы
+        description:
+          en_US: Determines how sandbox environments are shared across messages.
+          zh_Hans: 决定沙箱环境在不同消息间的共享方式。
+          zh_Hant: 決定沙箱環境在不同訊息間的共享方式。
+          ja_JP: メッセージ間でサンドボックス環境を共有する方法を決定します。
+          vi_VN: Xác định cách chia sẻ môi trường sandbox giữa các tin nhắn.
+          th_TH: กำหนดวิธีแชร์สภาพแวดล้อม Sandbox ระหว่างข้อความ
+          es_ES: Determina cómo se comparten los entornos sandbox entre mensajes.
+          ru_RU: Определяет, как песочницы используются совместно между сообщениями.
+        disable_if:
+          field: __system.box_available
+          operator: eq
+          value: false
+        disabled_tooltip:
+          en_US: >-
+            Box sandbox is disabled or unavailable. Enable it in config.yaml
+            (box.enabled = true) and ensure the runtime is reachable to change
+            this setting.
+          zh_Hans: Box 沙箱已禁用或不可用。请在配置中启用（box.enabled = true）并确认运行时连接正常，才能修改此项。
+          zh_Hant: Box 沙箱已停用或無法使用。請在設定中啟用（box.enabled = true）並確認執行時連線正常，才能修改此項。
+          ja_JP: Box サンドボックスが無効または利用できません。設定で有効化（box.enabled = true）し、ランタイムが接続できることを確認してから変更してください。
+          vi_VN: Sandbox Box đã tắt hoặc không khả dụng. Hãy bật trong cấu hình (box.enabled = true) và đảm bảo runtime hoạt động để chỉnh sửa.
+          th_TH: Sandbox Box ถูกปิดใช้งานหรือไม่พร้อมใช้งาน กรุณาเปิดใช้งานในการตั้งค่า (box.enabled = true) และตรวจสอบว่ารันไทม์เชื่อมต่อปกติก่อนปรับค่า
+          es_ES: El sandbox de Box está desactivado o no disponible. Actívelo en la configuración (box.enabled = true) y asegúrese de que el runtime esté conectado para modificar este ajuste.
+          ru_RU: Песочница Box отключена или недоступна. Включите её в конфигурации (box.enabled = true) и убедитесь, что среда выполнения работает, чтобы изменить эту настройку.
+        type: select
+        required: false
+        default: "{launcher_type}_{launcher_id}"
+        options:
+          - name: "{global}"
+            label:
+              en_US: Global (shared by all)
+              zh_Hans: 全局（所有人共享）
+              zh_Hant: 全域（所有人共用）
+              ja_JP: グローバル（全員共有）
+              vi_VN: Toàn cục (chia sẻ cho tất cả)
+              th_TH: ทั่วไป (แชร์ทั้งหมด)
+              es_ES: Global (compartido por todos)
+              ru_RU: Глобальный (общий для всех)
+          - name: "{launcher_type}_{launcher_id}"
+            label:
+              en_US: Per chat (Recommended)
+              zh_Hans: 每个会话（推荐）
+              zh_Hant: 每個會話（推薦）
+              ja_JP: チャットごと（推奨）
+              vi_VN: Mỗi cuộc trò chuyện (Khuyến nghị)
+              th_TH: ต่อแชท (แนะนำ)
+              es_ES: Por chat (Recomendado)
+              ru_RU: По чату (Рекомендуется)
+          - name: "{launcher_type}_{launcher_id}_{sender_id}"
+            label:
+              en_US: Per user in chat
+              zh_Hans: 会话中每个用户
+              zh_Hant: 會話中每個用戶
+              ja_JP: チャット内のユーザーごと
+              vi_VN: Mỗi người dùng trong cuộc trò chuyện
+              th_TH: ต่อผู้ใช้ในแชท
+              es_ES: Por usuario en chat
+              ru_RU: По пользователю в чате
+          - name: "{launcher_type}_{launcher_id}_{conversation_id}"
+            label:
+              en_US: Per conversation context
+              zh_Hans: 每个对话上下文
+              zh_Hant: 每個對話上下文
+              ja_JP: 会話コンテキストごと
+              vi_VN: Mỗi ngữ cảnh hội thoại
+              th_TH: ต่อบริบทการสนทนา
+              es_ES: Por contexto de conversación
+              ru_RU: По контексту разговора
+          - name: "{query_id}"
+            label:
+              en_US: Per message (isolated)
+              zh_Hans: 每条消息（完全隔离）
+              zh_Hant: 每條訊息（完全隔離）
+              ja_JP: メッセージごと（隔離）
+              vi_VN: Mỗi tin nhắn (cách ly)
+              th_TH: ต่อข้อความ (แยกส่วน)
+              es_ES: Por mensaje (aislado)
+              ru_RU: По сообщению (изолированно)
+        show_if:
+          field: __system.is_wizard
+          operator: neq
+          value: true
      - name: rerank-model
        label:
          en_US: Rerank Model
--- a/test-embed.html
+++ b/test-embed.html
@@ -0,0 +1,21 @@
+<!DOCTYPE html>
+<html lang="zh">
+<head>
+  <meta charset="UTF-8">
+  <title>LangBot Embed Widget Test</title>
+  <style>
+    body { font-family: sans-serif; padding: 40px; background: #f5f5f5; }
+    h1 { margin-bottom: 10px; }
+    p { color: #666; }
+    code { background: #e0e0e0; padding: 2px 6px; border-radius: 3px; }
+  </style>
+</head>
+<body>
+  <h1>LangBot Embed Widget Test Page</h1>
+  <p>If the widget loaded correctly, you should see a blue chat bubble in the bottom-right corner.</p>
+  <p>Replace the <code>BOT_UUID</code> below with your actual bot UUID.</p>
+
+  <!-- Replace BOT_UUID with your real bot UUID -->
+<script data-title="LangBot" src="http://localhost:5300/api/v1/embed/a0ab80e7-742a-445f-bd0e-7d9758f1cfa7/widget.js"></script>
+</body>
+</html>
--- a/tests/README.md
+++ b/tests/README.md
@@ -2,6 +2,48 @@

 This directory contains the test suite for LangBot, with a focus on comprehensive unit testing of pipeline stages.

+## Quality Gate Layers
+
+LangBot uses a layered quality gate system for developers and CI:
+
+| Layer | Command | What it runs | When to use |
+|-------|---------|--------------|-------------|
+| **Quick** | `make test-quick` or `bash scripts/test-quick.sh` | Ruff lint + Unit tests + Smoke tests | Before every commit |
+| **Fast Integration** | `make test-integration-fast` or `bash scripts/test-integration-fast.sh` | SQLite/API/Pipeline integration (no external services) | Before PR, weekly |
+| **Coverage Gate** | `make test-coverage` or `bash scripts/test-coverage.sh` | All tests with coverage, threshold: 18% | Before merge, CI |
+| **Full Local** | `make test-all-local` | Quick + Integration + Coverage | Before major changes |
+
+**Note**: PostgreSQL migration tests and slow tests are NOT in local default gates. They run in separate CI workflows.
+
+### Developer Workflow
+
+```bash
+# Daily: Quick self-test
+bash scripts/test-quick.sh
+
+# Before PR: Full local gate
+make test-all-local
+
+# Or run each layer separately:
+bash scripts/test-quick.sh           # ~2 min
+bash scripts/test-integration-fast.sh # ~3 min
+bash scripts/test-coverage.sh         # ~8 min
+```
+
+### Coverage Baseline
+
+Current coverage threshold: **18%**
+Actual coverage: **30%**
+
+This is a conservative baseline to prevent coverage regression. It does NOT represent the final quality target. Key modules have higher coverage:
+- `pipeline.preproc.preproc`: 53%
+- `pipeline.process.process`: 96%
+- `pipeline.respback.respback`: 88%
+- `telemetry.telemetry`: 87%
+- `provider.session.sessionmgr`: 100%
+- `provider.tools.toolmgr`: 83%
+- `storage.providers.s3storage`: 80%
+
 ## Important Note

 Due to circular import dependencies in the pipeline module structure, the test files use **lazy imports** via `importlib.import_module()` instead of direct imports. This ensures tests can run without triggering circular import errors.
@@ -10,19 +52,81 @@ Due to circular import dependencies in the pipeline module structure, the test f

 ```
 tests/
-├── pipeline/                      # Pipeline stage tests
-│   ├── conftest.py               # Shared fixtures and test infrastructure
-│   ├── test_simple.py            # Basic infrastructure tests (always pass)
-│   ├── test_bansess.py           # BanSessionCheckStage tests
-│   ├── test_ratelimit.py         # RateLimit stage tests
-│   ├── test_preproc.py           # PreProcessor stage tests
-│   ├── test_respback.py          # SendResponseBackStage tests
-│   ├── test_resprule.py          # GroupRespondRuleCheckStage tests
-│   ├── test_pipelinemgr.py       # PipelineManager tests
-│   └── test_stages_integration.py # Integration tests
-└── README.md                      # This file
+├── __init__.py
+├── factories/                    # Shared test factories
+│   ├── __init__.py              # Factory exports
+│   ├── app.py                   # FakeApp factory
+│   ├── message.py               # Message/query factories
+│   ├── provider.py              # FakeProvider factory
+│   └── platform.py              # FakePlatform factory
+├── integration/                  # Integration tests (real resources)
+│   ├── __init__.py
+│   ├── api/                     # HTTP API tests
+│   │   ├── __init__.py
+│   │   └── test_smoke.py        # API smoke tests
+│   ├── pipeline/                # Pipeline stage-chain tests
+│   │   ├── __init__.py
+│   │   └── test_full_flow.py    # Full flow integration
+│   └── persistence/             # Database/persistence tests
+│       ├── __init__.py
+│       └── test_migrations.py   # Alembic migration tests
+├── smoke/                        # Smoke tests (quick validation)
+│   └── test_fake_message_flow.py
+├── unit_tests/                   # Unit tests
+│   ├── box/                      # Box module tests
+│   ├── config/                   # Configuration tests
+│   ├── pipeline/                 # Pipeline stage tests
+│   │   └── conftest.py          # Shared fixtures and test infrastructure
+│   ├── platform/                 # Platform adapter tests
+│   ├── plugin/                   # Plugin system tests
+│   │   └── test_handler_actions.py # Action handler tests
+│   ├── provider/                 # Provider tests
+│   │   ├── test_session_manager.py # SessionManager tests
+│   │   └── test_tool_manager.py    # ToolManager tests
+│   ├── rag/                      # RAG tests
+│   │   └── test_file_storage.py   # File/ZIP storage tests
+│   ├── storage/                  # Storage tests
+│   │   └── test_s3storage.py      # S3StorageProvider tests
+│   ├── vector/                   # Vector tests
+│   │   └── test_vdb_filter_conversion.py # VDB filter tests
+│   └── telemetry/                # Telemetry tests (rewritten)
+├── utils/                        # Test utilities
+│   ├── __init__.py
+│   └── import_isolation.py      # sys.modules isolation for circular imports
+└── README.md                     # This file
 ```

+## Test Factories
+
+The `tests/factories/` package provides reusable test factories:
+
+```python
+from tests.factories import (
+    FakeApp,          # Mock application
+    FakeProvider,     # Fake LLM provider
+    FakePlatform,     # Fake platform adapter
+    text_query,       # Create text query
+    group_text_query, # Create group query
+    command_query,    # Create command query
+)
+
+# Create fake app
+app = FakeApp()
+
+# Create query with text
+query = text_query("hello world")
+
+# Create fake provider that returns specific response
+provider = FakeProvider().returns("test response")
+
+# Create fake platform for outbound capture
+platform = FakePlatform()
+await platform.reply_message(query.message_event, reply_chain)
+outbound = platform.get_outbound_messages()
+```
+
+See `tests/factories/__init__.py` for all available factories.
+
 ## Test Architecture

 ### Fixtures (`conftest.py`)
@@ -43,7 +147,28 @@ The test suite uses a centralized fixture system that provides:

 ## Running Tests

-### Using the test runner script (recommended)
+### Quick self-test for developers
+
+For local branch validation without real provider keys:
+
+```bash
+make test-quick
+```
+
+or
+
+```bash
+bash scripts/test-quick.sh
+```
+
+This runs:
+1. Ruff lint check
+2. Unit tests
+3. Smoke tests
+
+Suitable for quick validation before committing.
+
+### Using the test runner script (recommended for full coverage)
 ```bash
 bash run_tests.sh
 ```
@@ -56,38 +181,135 @@ This script automatically:

 ### Manual test execution

-#### Run all tests
+#### Run all unit tests
 ```bash
-pytest tests/pipeline/
+uv run pytest tests/unit_tests/ --cov=langbot --cov-report=xml --cov-report=term
 ```

-#### Run only simple tests (no imports, always pass)
+#### Run specific test module
 ```bash
-pytest tests/pipeline/test_simple.py -v
+uv run pytest tests/unit_tests/pipeline/ -v
 ```

 #### Run specific test file
 ```bash
-pytest tests/pipeline/test_bansess.py -v
+uv run pytest tests/unit_tests/pipeline/test_bansess.py -v
 ```

 #### Run with coverage
 ```bash
-pytest tests/pipeline/ --cov=pkg/pipeline --cov-report=html
+uv run pytest tests/unit_tests/pipeline/ --cov=langbot --cov-report=html
 ```

 #### Run specific test
 ```bash
-pytest tests/pipeline/test_bansess.py::test_bansess_whitelist_allow -v
+uv run pytest tests/unit_tests/pipeline/test_bansess.py::test_bansess_whitelist_allow -v
 ```

+### Using markers
+
+```bash
+# Run only unit tests
+uv run pytest tests/unit_tests/ -m unit
+
+# Run only integration tests
+uv run pytest tests/integration/ -m integration
+
+# Run integration tests excluding slow ones
+uv run pytest tests/integration/ -m "not slow" -q
+
+# Skip slow tests
+uv run pytest tests/unit_tests/ -m "not slow"
+```
+
+### Running integration tests
+
+Integration tests validate real system behavior with actual database/network resources.
+
+```bash
+# Run all integration tests (excluding slow ones)
+uv run pytest tests/integration/ -m "not slow" -q
+
+# Run SQLite migration integration tests
+uv run pytest tests/integration/persistence/test_migrations.py -q --tb=short
+
+# Run API smoke integration tests
+uv run pytest tests/integration/api/test_smoke.py -q
+
+# Run pipeline full-flow integration tests
+uv run pytest tests/integration/pipeline/test_full_flow.py -q
+
+# Run with verbose output
+uv run pytest tests/integration/ -v
+```
+
+Note: Integration tests use:
+- Temporary databases (tmp_path) for persistence tests
+- Fake app/services for API tests (no real provider/platform)
+- Fake runner/provider for pipeline tests (no real LLM API)
+- Do not require external services
+
+### Running migration tests locally
+
+SQLite migration tests can be run locally without any external dependencies:
+
+```bash
+# SQLite migration tests (uses tmp_path, no external DB needed)
+uv run pytest tests/integration/persistence/test_migrations.py -q --tb=short
+```
+
+PostgreSQL migration tests require an external PostgreSQL database:
+
+```bash
+# PostgreSQL migration tests (requires PostgreSQL service)
+# Tests are marked as slow and skipped if TEST_POSTGRES_URL is not set
+TEST_POSTGRES_URL=postgresql+asyncpg://user:pass@localhost:5432/test_db \
+    uv run pytest tests/integration/persistence/test_migrations_postgres.py -q --tb=short
+
+# Or skip by default (no PostgreSQL available)
+uv run pytest tests/integration/persistence/test_migrations_postgres.py -q --tb=short
+# Output: SKIPPED (TEST_POSTGRES_URL not set)
+```
+
+Note: PostgreSQL tests are **not** included in fast integration gate because they:
+- Require external PostgreSQL service
+- Are marked with `@pytest.mark.slow`
+- Need `TEST_POSTGRES_URL` environment variable
+
+CI workflow `.github/workflows/test-migrations.yml` runs:
+- SQLite tests in `test-migrations-sqlite` job (fast, no external services)
+- PostgreSQL tests in `test-migrations-postgres` job (uses PostgreSQL service container)
+
+### Running pipeline integration tests locally
+
+Pipeline full-flow integration tests validate real stage interactions:
+
+```bash
+# Run pipeline integration tests (uses fake runner, no real LLM API)
+uv run pytest tests/integration/pipeline/test_full_flow.py -q --tb=short
+
+# Run with coverage for pipeline modules
+uv run pytest tests/integration/pipeline \
+    --cov=langbot.pkg.pipeline.preproc.preproc \
+    --cov=langbot.pkg.pipeline.process.process \
+    --cov=langbot.pkg.pipeline.respback.respback \
+    --cov-report=term -q
+```
+
+These tests:
+- Use `FakeRunner` class to simulate LLM responses without real API calls
+- Import real `PreProcessor`, `MessageProcessor`, `SendResponseBackStage` stages
+- Validate stage chain: PreProcessor → Processor → SendResponseBackStage
+- Test prevent_default, exception handling, and full message flow
+- Do not require real LLM provider keys
+
 ### Known Issues

 Some tests may encounter circular import errors. This is a known issue with the current module structure. The test infrastructure is designed to work around this using lazy imports, but if you encounter issues:

 1. Make sure you're running from the project root directory
-2. Ensure the virtual environment is activated
-3. Try running `test_simple.py` first to verify the test infrastructure works
+2. Ensure dependencies are installed: `uv sync --dev`
+3. Try running a simple test first to verify the test infrastructure works

 ## CI/CD Integration

@@ -97,7 +319,7 @@ Tests are automatically run on:
 - Push to PR branch
 - Push to master/develop branches

-The workflow runs tests on Python 3.10, 3.11, and 3.12 to ensure compatibility.
+The workflow runs tests on Python 3.11, 3.12, and 3.13 to ensure compatibility.

 ## Adding New Tests

@@ -111,8 +333,8 @@ Create a new test file `test_<stage_name>.py`:
 """

 import pytest
-from pkg.pipeline.<module>.<stage> import <StageClass>
-from pkg.pipeline import entities as pipeline_entities
+from langbot.pkg.pipeline.<module>.<stage> import <StageClass>
+from langbot.pkg.pipeline import entities as pipeline_entities


@pytest.mark.asyncio
@@ -128,7 +350,7 @@ async def test_stage_basic_flow(mock_app, sample_query):

 ### 2. For additional fixtures

-Add new fixtures to `conftest.py`:
+Add new fixtures to the appropriate `conftest.py`:

 ```python
@pytest.fixture
@@ -142,7 +364,7 @@ def my_custom_fixture():
 Use the helper functions in `conftest.py`:

 ```python
-from tests.pipeline.conftest import create_stage_result, assert_result_continue
+from tests.unit_tests.pipeline.conftest import create_stage_result, assert_result_continue

 result = create_stage_result(
    result_type=pipeline_entities.ResultType.CONTINUE,
@@ -166,7 +388,7 @@ assert_result_continue(result)
 ### Import errors
 Make sure you've installed the package in development mode:
 ```bash
-uv pip install -e .
+uv sync --dev
 ```

 ### Async test failures
@@ -177,7 +399,11 @@ Check that you're mocking at the right level and using `AsyncMock` for async fun

 ## Future Enhancements

- [ ] Add integration tests for full pipeline execution
+- [x] Add integration tests for database migrations (SQLite)
+- [x] Add PostgreSQL migration integration tests (G-003)
+- [x] Add integration tests for full pipeline execution
+- [x] Add API smoke integration tests
+- [ ] Add E2E tests
 - [ ] Add performance benchmarks
 - [ ] Add mutation testing for better coverage quality
- [ ] Add property-based testing with Hypothesis
+- [ ] Add property-based testing with Hypothesis
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -0,0 +1,102 @@
+"""E2E test fixtures.
+
+Provides fixtures for starting real LangBot process with minimal configuration.
+"""
+
+from __future__ import annotations
+
+import pytest
+import tempfile
+import shutil
+import logging
+from pathlib import Path
+
+from tests.e2e.utils.config_factory import create_minimal_config, create_test_directories
+from tests.e2e.utils.process_manager import LangBotProcess, find_project_root
+
+logger = logging.getLogger(__name__)
+
+pytestmark = pytest.mark.e2e
+
+
+@pytest.fixture(scope='session')
+def e2e_port():
+    """Port for E2E testing (non-default to avoid conflicts)."""
+    return 15300
+
+
+@pytest.fixture(scope='session')
+def e2e_tmpdir():
+    """Create temporary directory for E2E testing."""
+    tmpdir = Path(tempfile.mkdtemp(prefix='langbot_e2e_'))
+    logger.info(f'E2E tmpdir: {tmpdir}')
+
+    yield tmpdir
+
+    # Cleanup
+    logger.info(f'Cleaning up E2E tmpdir: {tmpdir}')
+    shutil.rmtree(tmpdir, ignore_errors=True)
+
+
+@pytest.fixture(scope='session')
+def e2e_config_path(e2e_tmpdir, e2e_port):
+    """Create minimal config.yaml for E2E testing."""
+    config_path = create_minimal_config(e2e_tmpdir, port=e2e_port)
+    create_test_directories(e2e_tmpdir)
+    logger.info(f'E2E config: {config_path}')
+    return config_path
+
+
+@pytest.fixture(scope='session')
+def langbot_process(e2e_config_path, e2e_port, e2e_tmpdir):
+    """Start real LangBot process for E2E testing.
+
+    This fixture starts LangBot once per session and reuses it for all tests.
+    Coverage data is collected from the subprocess.
+    """
+    project_root = find_project_root()
+    collect_coverage = True
+
+    proc = LangBotProcess(
+        project_root=project_root,
+        work_dir=e2e_tmpdir,  # Run in tmpdir where data/config.yaml exists
+        port=e2e_port,
+        timeout=60,  # Longer timeout for first startup
+        collect_coverage=collect_coverage,
+    )
+
+    success = proc.start()
+    if not success:
+        stdout, stderr = proc.get_logs()
+        pytest.fail(f'LangBot failed to start:\nstdout: {stdout}\nstderr: {stderr}')
+
+    yield proc
+
+    # Cleanup
+    proc.stop()
+
+    # Combine coverage data if collected
+    if collect_coverage and proc.get_coverage_file():
+        coverage_file = proc.get_coverage_file()
+        if coverage_file.exists():
+            # Copy coverage data to project root for combining
+            target = project_root / '.coverage.e2e'
+            shutil.copy(coverage_file, target)
+            logger.info(f'Coverage data saved to: {target}')
+
+
+@pytest.fixture
+def e2e_client(e2e_port, langbot_process):
+    """HTTP client for E2E testing."""
+    import httpx
+
+    base_url = f'http://127.0.0.1:{e2e_port}'
+
+    with httpx.Client(base_url=base_url, timeout=10.0) as client:
+        yield client
+
+
+@pytest.fixture(scope='session')
+def e2e_db_path(e2e_tmpdir):
+    """Path to SQLite database file."""
+    return e2e_tmpdir / 'data' / 'langbot.db'
--- a/tests/e2e/test_startup.py
+++ b/tests/e2e/test_startup.py
@@ -0,0 +1,142 @@
+"""E2E tests for LangBot startup flow.
+
+Tests the complete startup process including:
+- boot.py startup orchestration
+- stages/ (build_app, load_config, migrate, etc.)
+- database initialization
+- API availability
+
+Run: uv run pytest tests/e2e/test_startup.py -v -m e2e
+"""
+
+from __future__ import annotations
+
+import pytest
+
+pytestmark = pytest.mark.e2e
+
+
+class TestStartupFlow:
+    """Tests for LangBot startup process."""
+
+    def test_process_is_running(self, langbot_process):
+        """Verify LangBot process is running."""
+        assert langbot_process.is_running()
+
+    def test_health_check(self, langbot_process, e2e_port):
+        """Verify LangBot API is responding."""
+        assert langbot_process.health_check()
+
+    def test_system_info_endpoint(self, e2e_client):
+        """Test /api/v1/system/info endpoint."""
+        response = e2e_client.get('/api/v1/system/info')
+        assert response.status_code == 200
+
+        data = response.json()
+        assert data['code'] == 0
+        assert 'data' in data
+        # System info should contain version info
+        assert 'version' in data['data'] or 'edition' in data['data']
+
+    def test_database_initialized(self, e2e_db_path):
+        """Verify SQLite database was created and initialized."""
+        assert e2e_db_path.exists()
+
+        # Database should have some tables after migration
+        import sqlite3
+        conn = sqlite3.connect(str(e2e_db_path))
+        cursor = conn.cursor()
+
+        # Check that core tables exist
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+        tables = [row[0] for row in cursor.fetchall()]
+
+        # Core tables should be created by Alembic migrations
+        # Note: table names may differ (legacy_pipelines instead of pipelines)
+        expected_tables = ['legacy_pipelines', 'bots', 'model_providers', 'llm_models']
+        for table in expected_tables:
+            assert table in tables, f'Table {table} should exist. Available: {tables}'
+
+        conn.close()
+
+    def test_chroma_directory_created(self, e2e_tmpdir):
+        """Verify Chroma vector database directory was created."""
+        chroma_path = e2e_tmpdir / 'chroma'
+        # Created by the E2E config factory before startup.
+        assert chroma_path.exists()
+
+    def test_pipelines_endpoint(self, e2e_client):
+        """Test /api/v1/pipelines endpoint (requires auth)."""
+        # Without auth, should return 401
+        response = e2e_client.get('/api/v1/pipelines')
+        assert response.status_code == 401
+
+    def test_auth_endpoint(self, e2e_client, e2e_tmpdir):
+        """Test auth endpoint."""
+        # First startup may allow initial setup
+        response = e2e_client.post('/api/v1/user/auth', json={
+            'username': 'admin',
+            'password': 'admin',
+        })
+
+        # Response could be:
+        # - 200 if auth succeeds
+        # - 400 if credentials wrong
+        # - 401 if user not initialized
+        assert response.status_code in [200, 400, 401]
+
+
+class TestStartupStages:
+    """Tests that verify individual startup stages worked correctly."""
+
+    def test_config_loaded(self, e2e_client):
+        """Verify config was loaded correctly by checking API port."""
+        # If API responds on e2e_port, config was loaded
+        assert e2e_client.get('/api/v1/system/info').status_code == 200
+
+    def test_migrations_applied(self, e2e_db_path):
+        """Verify database migrations were applied."""
+        import sqlite3
+        conn = sqlite3.connect(str(e2e_db_path))
+        cursor = conn.cursor()
+
+        # Check alembic_version table exists and has version
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='alembic_version';")
+        result = cursor.fetchone()
+        assert result is not None, 'alembic_version table should exist'
+
+        cursor.execute('SELECT version_num FROM alembic_version;')
+        version = cursor.fetchone()
+        assert version is not None, 'Migration version should be set'
+
+        conn.close()
+
+    def test_http_controller_initialized(self, e2e_client):
+        """Verify HTTP controller was initialized."""
+        # Multiple endpoints should be available
+        endpoints = [
+            '/api/v1/system/info',
+            '/api/v1/pipelines',
+            '/api/v1/provider/providers',
+            '/api/v1/platform/bots',
+        ]
+
+        for endpoint in endpoints:
+            response = e2e_client.get(endpoint)
+            # Should get a real route response, even if auth is required.
+            assert response.status_code in [200, 401, 403], f'{endpoint} should be registered'
+
+
+class TestMinimalStartupNoLLM:
+    """Tests verifying LangBot can start without LLM providers."""
+
+    def test_api_available_without_llm(self, e2e_client):
+        """API should be available even without LLM providers configured."""
+        response = e2e_client.get('/api/v1/system/info')
+        assert response.status_code == 200
+
+    def test_pipeline_metadata_available(self, e2e_client):
+        """Pipeline metadata endpoint should work without LLM."""
+        # Requires auth, but endpoint should exist
+        response = e2e_client.get('/api/v1/pipelines/_/metadata')
+        assert response.status_code in [200, 401]  # Not 404 or 500
--- a/tests/e2e/utils/config_factory.py
+++ b/tests/e2e/utils/config_factory.py
@@ -0,0 +1,179 @@
+"""E2E test configuration factory.
+
+Generates minimal config.yaml for testing LangBot startup without external dependencies.
+"""
+
+from __future__ import annotations
+
+import yaml
+from pathlib import Path
+
+
+def create_minimal_config(tmpdir: Path, port: int = 15300) -> Path:
+    """Create minimal config.yaml for E2E testing.
+
+    Uses embedded databases (SQLite, Chroma) to avoid external dependencies.
+    Config is created at tmpdir/data/config.yaml (LangBot expects this location).
+    """
+    # LangBot expects config at data/config.yaml
+    data_dir = tmpdir / 'data'
+    data_dir.mkdir(parents=True, exist_ok=True)
+
+    config = {
+        'admins': [],
+        'api': {
+            'port': port,
+            'webhook_prefix': f'http://127.0.0.1:{port}',
+            'extra_webhook_prefix': '',
+        },
+        'command': {
+            'enable': True,
+            'prefix': ['!', '!'],
+            'privilege': {},
+        },
+        'concurrency': {
+            'pipeline': 20,
+            'session': 1,
+        },
+        'proxy': {
+            'http': '',
+            'https': '',
+        },
+        'system': {
+            'instance_id': '',
+            'edition': 'community',
+            'recovery_key': '',
+            'allow_modify_login_info': True,
+            'disabled_adapters': [],
+            'limitation': {
+                'max_bots': -1,
+                'max_pipelines': -1,
+                'max_extensions': -1,
+            },
+            'task_retention': {
+                'completed_limit': 200,
+            },
+            'jwt': {
+                'expire': 604800,
+                'secret': 'e2e-test-secret-key',
+            },
+        },
+        'database': {
+            'use': 'sqlite',
+            'sqlite': {
+                'path': str(tmpdir / 'data' / 'langbot.db'),
+            },
+            'postgresql': {
+                'host': '127.0.0.1',
+                'port': 5432,
+                'user': 'postgres',
+                'password': 'postgres',
+                'database': 'postgres',
+            },
+        },
+        'vdb': {
+            'use': 'chroma',  # Chroma is embedded, no external dependency
+            'chroma': {
+                'path': str(tmpdir / 'chroma'),
+            },
+            'qdrant': {
+                'url': '',
+                'host': 'localhost',
+                'port': 6333,
+                'api_key': '',
+            },
+            'seekdb': {
+                'mode': 'embedded',
+                'path': str(tmpdir / 'seekdb'),
+                'database': 'langbot',
+                'host': 'localhost',
+                'port': 2881,
+                'user': 'root',
+                'password': '',
+                'tenant': '',
+            },
+            'milvus': {
+                'uri': 'http://127.0.0.1:19530',
+                'token': '',
+                'db_name': '',
+            },
+            'pgvector': {
+                'host': '127.0.0.1',
+                'port': 5433,
+                'database': 'langbot',
+                'user': 'postgres',
+                'password': 'postgres',
+            },
+        },
+        'storage': {
+            'use': 'local',
+            'cleanup': {
+                'enabled': False,  # Disable cleanup for tests
+                'check_interval_hours': 1,
+                'uploaded_file_retention_days': 7,
+                'log_retention_days': 3,
+            },
+            'local': {
+                'path': str(tmpdir / 'storage'),
+            },
+            's3': {
+                'endpoint_url': '',
+                'access_key_id': '',
+                'secret_access_key': '',
+                'region': 'us-east-1',
+                'bucket': 'langbot-storage',
+            },
+        },
+        'plugin': {
+            'enable': False,  # Disable plugin system for minimal startup
+            'runtime_ws_url': '',
+            'enable_marketplace': False,
+            'display_plugin_debug_url': '',
+            'binary_storage': {
+                'max_value_bytes': 10485760,
+            },
+        },
+        'monitoring': {
+            'auto_cleanup': {
+                'enabled': False,  # Disable cleanup for tests
+                'retention_days': 30,
+                'check_interval_hours': 1,
+                'delete_batch_size': 1000,
+            },
+        },
+        'space': {
+            'url': 'https://space.langbot.app',
+            'models_gateway_api_url': 'https://api.langbot.cloud/v1',
+            'oauth_authorize_url': 'https://space.langbot.app/auth/authorize',
+            'disable_models_service': True,  # Disable external services
+            'disable_telemetry': True,  # Disable telemetry for tests
+        },
+        'provider': {},  # Empty providers - minimal startup
+        'llm': [],  # Empty LLM models
+    }
+
+    # Ensure data directory exists (LangBot expects config at data/config.yaml)
+    data_dir = tmpdir / 'data'
+    data_dir.mkdir(parents=True, exist_ok=True)
+
+    # Write config to data/config.yaml (LangBot's expected location)
+    config_path = data_dir / 'config.yaml'
+    with open(config_path, 'w', encoding='utf-8') as f:
+        yaml.dump(config, f, default_flow_style=False)
+
+    return config_path
+
+
+def create_test_directories(tmpdir: Path) -> dict[str, Path]:
+    """Create necessary directories for LangBot testing."""
+    directories = {
+        'data': tmpdir / 'data',
+        'logs': tmpdir / 'logs',
+        'storage': tmpdir / 'storage',
+        'chroma': tmpdir / 'chroma',
+    }
+
+    for path in directories.values():
+        path.mkdir(parents=True, exist_ok=True)
+
+    return directories
--- a/tests/e2e/utils/process_manager.py
+++ b/tests/e2e/utils/process_manager.py
@@ -0,0 +1,204 @@
+"""E2E test process manager.
+
+Manages LangBot subprocess lifecycle for E2E testing.
+"""
+
+from __future__ import annotations
+
+import subprocess
+import time
+import signal
+import os
+from pathlib import Path
+from typing import Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class LangBotProcess:
+    """Manages a LangBot subprocess for E2E testing."""
+
+    def __init__(
+        self,
+        project_root: Path,
+        work_dir: Path,
+        port: int = 15300,
+        timeout: int = 30,
+        collect_coverage: bool = True,
+    ):
+        self.project_root = project_root
+        self.work_dir = work_dir  # Directory containing data/config.yaml
+        self.port = port
+        self.timeout = timeout
+        self.collect_coverage = collect_coverage
+        self.process: Optional[subprocess.Popen] = None
+        self._stdout_data: bytes = b''
+        self._stderr_data: bytes = b''
+        self._coverage_file: Optional[Path] = None
+
+    def start(self) -> bool:
+        """Start LangBot process and wait for it to be ready."""
+        import httpx
+
+        # Prepare environment
+        env = os.environ.copy()
+        env['PYTHONPATH'] = str(self.project_root / 'src')
+
+        # Set API port via environment variable
+        env['API__PORT'] = str(self.port)
+        env['API__WEBHOOK_PREFIX'] = f'http://127.0.0.1:{self.port}'
+
+        # Disable telemetry
+        env['SPACE__DISABLE_TELEMETRY'] = 'true'
+        env['SPACE__DISABLE_MODELS_SERVICE'] = 'true'
+
+        # Build command
+        if self.collect_coverage:
+            # Use coverage.py to collect coverage data
+            # Set COVERAGE_PROCESS_START to enable coverage in subprocess
+            self._coverage_file = self.work_dir / '.coverage.e2e'
+            env['COVERAGE_PROCESS_START'] = str(self.project_root / '.coveragerc')
+            env['COVERAGE_FILE'] = str(self._coverage_file)
+
+            # Create .coveragerc for subprocess
+            coveragerc_content = """
+[run]
+source = langbot.pkg
+parallel = True
+data_file = {}
+omit =
+    */tests/*
+    */test_*.py
+
+[report]
+precision = 2
+""".format(str(self._coverage_file))
+            coveragerc_path = self.work_dir / '.coveragerc'
+            with open(coveragerc_path, 'w') as f:
+                f.write(coveragerc_content)
+
+            cmd = [
+                'coverage', 'run',
+                '--rcfile=' + str(coveragerc_path),
+                '-m', 'langbot',
+            ]
+        else:
+            cmd = ['uv', 'run', 'python', '-m', 'langbot']
+
+        logger.info(f'Starting LangBot in: {self.work_dir}')
+        logger.info(f'Command: {cmd}')
+
+        # Start process (run in work_dir so it finds data/config.yaml)
+        self.process = subprocess.Popen(
+            cmd,
+            cwd=self.work_dir,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            preexec_fn=os.setsid if os.name != 'nt' else None,
+        )
+
+        # Wait for startup
+        start_time = time.time()
+        while time.time() - start_time < self.timeout:
+            # Check if process died
+            if self.process.poll() is not None:
+                self._stdout_data, self._stderr_data = self.process.communicate()
+                logger.error(f'LangBot process died: {self._stderr_data.decode()}')
+                return False
+
+            # Try to connect
+            try:
+                r = httpx.get(
+                    f'http://127.0.0.1:{self.port}/api/v1/system/info',
+                    timeout=2.0,
+                )
+                if r.status_code == 200:
+                    logger.info(f'LangBot started successfully on port {self.port}')
+                    return True
+            except (httpx.ConnectError, httpx.TimeoutException):
+                pass
+
+            time.sleep(1)
+
+        # Timeout
+        logger.error(f'LangBot startup timeout after {self.timeout}s')
+        self.stop()
+        return False
+
+    def stop(self) -> None:
+        """Stop LangBot process gracefully."""
+        if self.process is None:
+            return
+
+        logger.info('Stopping LangBot process...')
+
+        # Try graceful shutdown first
+        if os.name != 'nt':
+            # Send SIGTERM to process group
+            os.killpg(os.getpgid(self.process.pid), signal.SIGTERM)
+        else:
+            self.process.terminate()
+
+        # Wait for graceful shutdown
+        try:
+            self.process.wait(timeout=5)
+            logger.info('LangBot stopped gracefully')
+        except subprocess.TimeoutExpired:
+            # Force kill
+            logger.warning('Force killing LangBot process')
+            if os.name != 'nt':
+                os.killpg(os.getpgid(self.process.pid), signal.SIGKILL)
+            else:
+                self.process.kill()
+            self.process.wait()
+
+        # Collect output for debugging
+        if self.process.stdout or self.process.stderr:
+            self._stdout_data, self._stderr_data = self.process.communicate()
+
+        self.process = None
+
+    def is_running(self) -> bool:
+        """Check if process is still running."""
+        return self.process is not None and self.process.poll() is None
+
+    def get_logs(self) -> tuple[str, str]:
+        """Get stdout and stderr logs."""
+        stdout = self._stdout_data.decode('utf-8', errors='replace')
+        stderr = self._stderr_data.decode('utf-8', errors='replace')
+        return stdout, stderr
+
+    def get_coverage_file(self) -> Optional[Path]:
+        """Get coverage data file path."""
+        return self._coverage_file
+
+    def health_check(self) -> bool:
+        """Check if LangBot API is responding."""
+        import httpx
+
+        if not self.is_running():
+            return False
+
+        try:
+            r = httpx.get(
+                f'http://127.0.0.1:{self.port}/api/v1/system/info',
+                timeout=5.0,
+            )
+            return r.status_code == 200
+        except Exception:
+            return False
+
+
+def find_project_root() -> Path:
+    """Find LangBot project root directory."""
+    current = Path(__file__).resolve()
+
+    # Walk up until we find src/langbot
+    for parent in current.parents:
+        if (parent / 'src' / 'langbot').exists():
+            return parent
+
+    # Fallback to LangBot-test-build directory
+    return Path('/home/glwuy/langbot-app/LangBot-test-build')
--- a/tests/factories/init.py
+++ b/tests/factories/init.py
@@ -0,0 +1,102 @@
+"""
+Shared test factories for LangBot tests.
+
+Provides reusable factories for:
+- Fake application (app.py)
+- Messages and queries (message.py)
+- Fake providers (provider.py)
+- Fake platforms (platform.py)
+
+Usage:
+    from tests.factories import FakeApp, text_query, FakeProvider
+
+    app = FakeApp()
+    query = text_query("hello")
+    provider = FakeProvider.returns("response")
+"""
+
+from tests.factories.app import FakeApp, fake_app
+from tests.factories.message import (
+    text_chain,
+    group_text_chain,
+    mention_chain,
+    image_chain,
+    text_query,
+    group_text_query,
+    private_text_query,
+    command_query,
+    mention_query,
+    empty_query,
+    image_query,
+    file_query,
+    unsupported_query,
+    voice_query,
+    at_all_query,
+    query_with_session,
+    query_with_config,
+    friend_message_event,
+    group_message_event,
+    mock_adapter,
+)
+from tests.factories.provider import (
+    FakeProvider,
+    fake_provider,
+    fake_provider_pong,
+    fake_provider_timeout,
+    fake_provider_auth_error,
+    fake_provider_rate_limit,
+    fake_provider_malformed,
+    fake_model,
+)
+from tests.factories.platform import (
+    FakePlatform,
+    fake_platform,
+    fake_platform_with_streaming,
+    fake_platform_with_failure,
+    mock_platform_adapter,
+)
+
+__all__ = [
+    # App
+    "FakeApp",
+    "fake_app",
+    # Message chains
+    "text_chain",
+    "group_text_chain",
+    "mention_chain",
+    "image_chain",
+    # Message events
+    "friend_message_event",
+    "group_message_event",
+    # Mock adapters
+    "mock_adapter",
+    # Queries
+    "text_query",
+    "group_text_query",
+    "private_text_query",
+    "command_query",
+    "mention_query",
+    "empty_query",
+    "image_query",
+    "file_query",
+    "unsupported_query",
+    "voice_query",
+    "at_all_query",
+    "query_with_session",
+    "query_with_config",
+    # Provider
+    "FakeProvider",
+    "fake_provider",
+    "fake_provider_pong",
+    "fake_provider_timeout",
+    "fake_provider_auth_error",
+    "fake_provider_rate_limit",
+    "fake_provider_malformed",
+    "fake_model",
+    # Platform
+    "FakePlatform",
+    "fake_platform",
+    "fake_platform_with_streaming",
+    "fake_platform_with_failure",
+    "mock_platform_adapter",
+]
--- a/Show More
+++ b/Show More