test: add frontend smoke and backend e2e CI

feat(agent-runner): support host tool lookup (#2244 )
fix(provider): preserve litellm usage details (#2246 )
2026-06-16 18:56:02 +00:00 · 2026-06-16 10:54:57 +08:00 · 2026-06-14 11:29:57 +08:00 · 2026-06-14 11:12:29 +08:00 · 2026-06-14 10:57:53 +08:00
26 changed files with 1011 additions and 43 deletions
--- a/.github/workflows/frontend-tests.yml
+++ b/.github/workflows/frontend-tests.yml
@@ -0,0 +1,46 @@
+name: Frontend Tests
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths:
+      - 'web/**'
+      - '.github/workflows/frontend-tests.yml'
+  push:
+    branches:
+      - master
+      - develop
+    paths:
+      - 'web/**'
+      - '.github/workflows/frontend-tests.yml'
+
+jobs:
+  playwright-smoke:
+    name: Playwright Smoke
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '25'
+
+      - name: Install pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          version: 8.9.2
+
+      - name: Install dependencies
+        working-directory: web
+        run: pnpm install --frozen-lockfile
+
+      - name: Install Playwright browsers
+        working-directory: web
+        run: pnpm exec playwright install --with-deps chromium
+
+      - name: Run Playwright smoke tests
+        working-directory: web
+        run: pnpm test:e2e
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -29,7 +29,7 @@ jobs:
        run: uv sync --dev

      - name: Run ruff check
-        run: uv run ruff check src
+        run: uv run ruff check src/langbot/ tests/ --output-format=concise

      - name: Run ruff format
        run: uv run ruff format src --check
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -84,6 +84,67 @@ jobs:
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY

+  e2e:
+    name: E2E Startup Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Run E2E startup tests
+        run: uv run pytest tests/e2e -q --tb=short
+
+      - name: E2E Test Summary
+        if: always()
+        run: |
+          echo "## E2E Startup Test Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+
+  box-integration:
+    name: Box Integration Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Check Docker runtime
+        run: docker info
+
+      - name: Run Box integration tests
+        run: uv run pytest tests/integration_tests -q --tb=short
+
+      - name: Box Integration Test Summary
+        if: always()
+        run: |
+          echo "## Box Integration Test Results" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Test Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+
  coverage:
    name: Coverage Gate
    runs-on: ubuntu-latest
@@ -129,4 +190,4 @@ jobs:
          echo "## Coverage Results" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "Threshold: 18%" >> $GITHUB_STEP_SUMMARY
-          echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
+          echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
--- a/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
+++ b/src/langbot/pkg/platform/sources/web_page_bot_adapter.py
@@ -84,6 +84,18 @@ class WebPageBotAdapter(abstract_platform_adapter.AbstractMessagePlatformAdapter
    ):
        self.listeners.pop(event_type, None)

+    async def is_stream_output_supported(self) -> bool:
+        """Delegate stream output check to ws_adapter."""
+        if self._ws_adapter is not None:
+            return await self._ws_adapter.is_stream_output_supported()
+        return False
+
+    async def create_message_card(self, message_id: str | int, event: platform_events.MessageEvent) -> bool:
+        """Delegate create_message_card to ws_adapter."""
+        if self._ws_adapter is not None:
+            return await self._ws_adapter.create_message_card(message_id, event)
+        return False
+
    async def is_muted(self, group_id: int) -> bool:
        return False

--- a/src/langbot/pkg/provider/modelmgr/requester.py
+++ b/src/langbot/pkg/provider/modelmgr/requester.py
@@ -12,6 +12,19 @@ import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message


+LLM_USAGE_QUERY_VARIABLE = '_llm_usage'
+STREAM_USAGE_QUERY_VARIABLE = '_stream_usage'
+
+
+def _store_llm_usage(query: pipeline_query.Query | None, usage_info: dict | None) -> None:
+    """Store the latest provider usage on the query for upstream action handlers."""
+    if query is None or not usage_info:
+        return
+    if query.variables is None:
+        query.variables = {}
+    query.variables[LLM_USAGE_QUERY_VARIABLE] = dict(usage_info)
+
+
 class RuntimeProvider:
    """运行时模型提供商"""

@@ -67,6 +80,7 @@ class RuntimeProvider:
            if isinstance(result, tuple):
                msg, usage_info = result
                if usage_info:
+                    _store_llm_usage(query, usage_info)
                    input_tokens = usage_info.get('prompt_tokens', 0)
                    output_tokens = usage_info.get('completion_tokens', 0)
                return msg
@@ -146,11 +160,12 @@ class RuntimeProvider:
            if query:
                if query.variables is None:
                    query.variables = {}
-                if '_stream_usage' in query.variables:
-                    usage_info = query.variables['_stream_usage']
+                if STREAM_USAGE_QUERY_VARIABLE in query.variables:
+                    usage_info = query.variables[STREAM_USAGE_QUERY_VARIABLE]
+                    _store_llm_usage(query, usage_info)
                    input_tokens = usage_info.get('prompt_tokens', 0)
                    output_tokens = usage_info.get('completion_tokens', 0)
-                    del query.variables['_stream_usage']
+                    del query.variables[STREAM_USAGE_QUERY_VARIABLE]
        except Exception as e:
            status = 'error'
            error_message = str(e)
--- a/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
+++ b/src/langbot/pkg/provider/modelmgr/requesters/litellmchat.py
@@ -262,32 +262,82 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
        - dict with the same keys
        - missing ``total_tokens`` (derived from prompt + completion)
        - ``None`` / partially-populated usage (defaults to 0)
+        - provider-specific token details, including cache token counters
        """
-        if usage is None:
-            return {'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0}

-        def _get(key: str) -> typing.Any:
-            if isinstance(usage, dict):
-                return usage.get(key)
-            return getattr(usage, key, None)
+        def _plain_value(value: typing.Any) -> typing.Any:
+            if value is None:
+                return None
+            if isinstance(value, dict):
+                return {k: _plain_value(v) for k, v in value.items() if v is not None}
+            if isinstance(value, (list, tuple)):
+                return [_plain_value(v) for v in value]

-        prompt_tokens = _get('prompt_tokens') or 0
-        completion_tokens = _get('completion_tokens') or 0
-        total_tokens = _get('total_tokens') or 0
+            model_dump = getattr(value, 'model_dump', None)
+            if callable(model_dump):
+                try:
+                    dumped = model_dump()
+                    if isinstance(dumped, dict):
+                        return _plain_value(dumped)
+                except Exception:
+                    pass
+
+            return value
+
+        def _usage_dict(value: typing.Any) -> dict[str, typing.Any]:
+            if value is None:
+                return {}
+            plain = _plain_value(value)
+            if isinstance(plain, dict):
+                return plain
+
+            def _is_mock_attr(attr: typing.Any) -> bool:
+                return type(attr).__module__.startswith('unittest.mock')
+
+            data: dict[str, typing.Any] = {}
+            for key in (
+                'prompt_tokens',
+                'completion_tokens',
+                'total_tokens',
+                'prompt_tokens_details',
+                'completion_tokens_details',
+                'cache_creation_input_tokens',
+                'cache_read_input_tokens',
+                'input_token_details',
+                'output_token_details',
+            ):
+                attr_value = getattr(value, key, None)
+                if attr_value is not None and not _is_mock_attr(attr_value):
+                    data[key] = _plain_value(attr_value)
+            return data
+
+        def _to_int(value: typing.Any) -> int:
+            try:
+                return int(value or 0)
+            except (TypeError, ValueError):
+                return 0
+
+        normalized = _usage_dict(usage)
+
+        prompt_tokens = _to_int(normalized.get('prompt_tokens'))
+        completion_tokens = _to_int(normalized.get('completion_tokens'))
+        total_tokens = _to_int(normalized.get('total_tokens'))

        # Some providers omit total_tokens in streaming usage; derive it.
        if not total_tokens:
            total_tokens = prompt_tokens + completion_tokens

-        return {
-            'prompt_tokens': int(prompt_tokens),
-            'completion_tokens': int(completion_tokens),
-            'total_tokens': int(total_tokens),
-        }
+        normalized['prompt_tokens'] = prompt_tokens
+        normalized['completion_tokens'] = completion_tokens
+        normalized['total_tokens'] = total_tokens
+        return normalized

-    def _extract_usage(self, response) -> dict:
+    def _extract_usage(self, response) -> dict | None:
        """Extract usage info from a non-streaming LiteLLM response."""
-        return self._normalize_usage(getattr(response, 'usage', None))
+        usage = getattr(response, 'usage', None)
+        if usage is None:
+            return None
+        return self._normalize_usage(usage)

    @staticmethod
    def _as_dict(value: typing.Any) -> dict:
@@ -486,7 +536,7 @@ class LiteLLMRequester(requester.ProviderAPIRequester):
                    if query is not None:
                        if query.variables is None:
                            query.variables = {}
-                        query.variables['_stream_usage'] = usage_info
+                        query.variables[requester.STREAM_USAGE_QUERY_VARIABLE] = usage_info

                if not hasattr(chunk, 'choices') or not chunk.choices:
                    continue
--- a/src/langbot/pkg/provider/tools/errors.py
+++ b/src/langbot/pkg/provider/tools/errors.py
@@ -0,0 +1,6 @@
+class ToolNotFoundError(ValueError):
+    """Raised when a requested tool cannot be found in any active loader."""
+
+    def __init__(self, name: str):
+        self.name = name
+        super().__init__(f'Tool not found: {name}')
--- a/src/langbot/pkg/provider/tools/loader.py
+++ b/src/langbot/pkg/provider/tools/loader.py
@@ -4,12 +4,15 @@ import abc
 import typing
 from typing import TYPE_CHECKING

+from langbot_plugin.api.definition.components.manifest import ComponentManifest
 from langbot_plugin.api.entities.events import pipeline_query
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool

 if TYPE_CHECKING:
    from ...core import app

+ToolLookupResult = resource_tool.LLMTool | ComponentManifest
+

 preregistered_loaders: list[typing.Type[ToolLoader]] = []

@@ -43,6 +46,13 @@ class ToolLoader(abc.ABC):
        """获取所有工具"""
        pass

+    async def get_tool(self, name: str) -> ToolLookupResult | None:
+        """Get one tool by name."""
+        for tool in await self.get_tools():
+            if tool.name == name:
+                return tool
+        return None
+
    @abc.abstractmethod
    async def has_tool(self, name: str) -> bool:
        """检查工具是否存在"""
--- a/src/langbot/pkg/provider/tools/loaders/mcp.py
+++ b/src/langbot/pkg/provider/tools/loaders/mcp.py
@@ -567,6 +567,13 @@ class MCPLoader(loader.ToolLoader):
                    return True
        return False

+    async def get_tool(self, name: str) -> resource_tool.LLMTool | None:
+        for session in self.sessions.values():
+            for function in session.get_tools():
+                if function.name == name:
+                    return function
+        return None
+
    async def invoke_tool(self, name: str, parameters: dict, query: pipeline_query.Query) -> typing.Any:
        """执行工具调用"""
        for session in self.sessions.values():
--- a/src/langbot/pkg/provider/tools/loaders/native.py
+++ b/src/langbot/pkg/provider/tools/loaders/native.py
@@ -7,6 +7,7 @@ import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query

 from .. import loader
+from ..errors import ToolNotFoundError
 from . import skill as skill_loader

 EXEC_TOOL_NAME = 'exec'
@@ -90,7 +91,7 @@ class NativeToolLoader(loader.ToolLoader):
            return await self._invoke_glob(parameters, query)
        if name == GREP_TOOL_NAME:
            return await self._invoke_grep(parameters, query)
-        raise ValueError(f'未找到工具: {name}')
+        raise ToolNotFoundError(name)

    async def shutdown(self):
        pass
--- a/src/langbot/pkg/provider/tools/loaders/plugin.py
+++ b/src/langbot/pkg/provider/tools/loaders/plugin.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 import typing
 import traceback

+from langbot_plugin.api.definition.components.manifest import ComponentManifest
 from langbot_plugin.api.entities.events import pipeline_query

 from .. import loader
@@ -39,7 +40,7 @@ class PluginToolLoader(loader.ToolLoader):
                return True
        return False

-    async def _get_tool(self, name: str) -> resource_tool.LLMTool:
+    async def get_tool(self, name: str) -> ComponentManifest | None:
        for tool in await self.ap.plugin_connector.list_tools():
            if tool.metadata.name == name:
                return tool
--- a/src/langbot/pkg/provider/tools/toolmgr.py
+++ b/src/langbot/pkg/provider/tools/toolmgr.py
@@ -6,6 +6,9 @@ from typing import TYPE_CHECKING
 import langbot_plugin.api.entities.builtin.resource.tool as resource_tool
 from langbot_plugin.api.entities.events import pipeline_query

+from . import loader as tool_loader
+from .errors import ToolNotFoundError
+
 if TYPE_CHECKING:
    from ...core import app
    from langbot.pkg.provider.tools.loaders import (
@@ -67,6 +70,20 @@ class ToolManager:

        return all_functions

+    async def get_tool_by_name(self, name: str) -> tool_loader.ToolLookupResult | None:
+        """Get tool by name from any active loader."""
+        for active_loader in (
+            self.native_tool_loader,
+            self.plugin_tool_loader,
+            self.mcp_tool_loader,
+            self.skill_tool_loader,
+        ):
+            tool = await active_loader.get_tool(name)
+            if tool:
+                return tool
+
+        return None
+
    async def generate_tools_for_openai(self, use_funcs: list[resource_tool.LLMTool]) -> list:
        tools = []

@@ -98,7 +115,7 @@ class ToolManager:
        if await self.skill_tool_loader.has_tool(name):
            telemetry_features.increment(query, 'tool_calls', 'skill')
            return await self.skill_tool_loader.invoke_tool(name, parameters, query)
-        raise ValueError(f'未找到工具: {name}')
+        raise ToolNotFoundError(name)

    async def shutdown(self):
        await self.native_tool_loader.shutdown()
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,6 +1,7 @@
 # LangBot Test Suite

-This directory contains the test suite for LangBot, with a focus on comprehensive unit testing of pipeline stages.
+This directory contains the LangBot backend test suite, including unit tests,
+integration tests, startup E2E tests, and container-backed Box runtime tests.

 ## Quality Gate Layers

@@ -10,10 +11,15 @@ LangBot uses a layered quality gate system for developers and CI:
 |-------|---------|--------------|-------------|
 | **Quick** | `make test-quick` or `bash scripts/test-quick.sh` | Ruff lint + Unit tests + Smoke tests | Before every commit |
 | **Fast Integration** | `make test-integration-fast` or `bash scripts/test-integration-fast.sh` | SQLite/API/Pipeline integration (no external services) | Before PR, weekly |
+| **Backend E2E** | `uv run --python 3.12 pytest tests/e2e -q --tb=short` | Starts a real LangBot process with minimal config | Before release, CI |
+| **Box Integration** | `uv run --python 3.12 pytest tests/integration_tests -q --tb=short` | Real Box sandbox/runtime integration | Before Box/runtime changes, CI |
+| **Frontend E2E** | `cd web && pnpm test:e2e` | Playwright smoke tests with mocked backend and Space APIs | Before web changes, CI |
 | **Coverage Gate** | `make test-coverage` or `bash scripts/test-coverage.sh` | All tests with coverage, threshold: 18% | Before merge, CI |
 | **Full Local** | `make test-all-local` | Quick + Integration + Coverage | Before major changes |

-**Note**: PostgreSQL migration tests and slow tests are NOT in local default gates. They run in separate CI workflows.
+**Note**: PostgreSQL migration tests and slow tests are NOT in local default
+gates. They run in separate CI workflows. Frontend Playwright tests live under
+`web/tests/e2e` and are documented in `web/README.md`.

 ### Developer Workflow

@@ -28,6 +34,9 @@ make test-all-local
 bash scripts/test-quick.sh           # ~2 min
 bash scripts/test-integration-fast.sh # ~3 min
 bash scripts/test-coverage.sh         # ~8 min
+uv run --python 3.12 pytest tests/e2e -q --tb=short
+uv run --python 3.12 pytest tests/integration_tests -q --tb=short
+cd web && pnpm test:e2e
 ```

 ### Coverage Baseline
@@ -70,6 +79,12 @@ tests/
 │   └── persistence/             # Database/persistence tests
 │       ├── __init__.py
 │       └── test_migrations.py   # Alembic migration tests
+├── e2e/                          # Real LangBot startup E2E tests
+│   ├── conftest.py
+│   ├── test_startup.py
+│   └── utils/
+├── integration_tests/            # Container-backed integration tests
+│   └── box/                      # Box runtime and MCP process tests
 ├── smoke/                        # Smoke tests (quick validation)
 │   └── test_fake_message_flow.py
 ├── unit_tests/                   # Unit tests
@@ -303,6 +318,44 @@ These tests:
 - Test prevent_default, exception handling, and full message flow
 - Do not require real LLM provider keys

+### Running backend E2E startup tests
+
+Backend E2E tests start a real LangBot process with a generated minimal
+`data/config.yaml`, SQLite database, local storage, and embedded Chroma path.
+They do not require provider keys or external services.
+
+```bash
+uv run --python 3.12 pytest tests/e2e -q --tb=short
+```
+
+These tests verify startup orchestration, migrations, API route registration,
+and the minimal no-LLM startup path. The E2E process manager disables ambient
+proxy variables for subprocess startup and uses direct localhost HTTP clients,
+so local proxy settings should not affect the health checks.
+
+### Running Box integration tests
+
+Box integration tests exercise the real sandbox runtime path, including command
+execution, session persistence, managed process WebSocket attachment, and
+cleanup behavior.
+
+```bash
+uv run --python 3.12 pytest tests/integration_tests -q --tb=short
+```
+
+These tests require a working Docker or Podman runtime. In CI, the dedicated
+Box integration job checks Docker availability before running the tests.
+
+### Running frontend E2E tests
+
+Frontend E2E tests live in `web/tests/e2e` and use Playwright. They start Vite
+and mock the LangBot backend and Space APIs, so no backend process is required.
+
+```bash
+cd web
+pnpm test:e2e
+```
+
 ### Known Issues

 Some tests may encounter circular import errors. This is a known issue with the current module structure. The test infrastructure is designed to work around this using lazy imports, but if you encounter issues:
@@ -320,6 +373,9 @@ Tests are automatically run on:
 - Push to master/develop branches

 The workflow runs tests on Python 3.11, 3.12, and 3.13 to ensure compatibility.
+Startup E2E and Box integration tests run as separate Python 3.12 jobs because
+they exercise process/container behavior instead of pure Python compatibility.
+Frontend Playwright smoke tests run in `.github/workflows/frontend-tests.yml`.

 ## Adding New Tests

@@ -406,4 +462,4 @@ Check that you're mocking at the right level and using `AsyncMock` for async fun
 - [ ] Add E2E tests
 - [ ] Add performance benchmarks
 - [ ] Add mutation testing for better coverage quality
- [ ] Add property-based testing with Hypothesis
+- [ ] Add property-based testing with Hypothesis
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -92,11 +92,11 @@ def e2e_client(e2e_port, langbot_process):

    base_url = f'http://127.0.0.1:{e2e_port}'

-    with httpx.Client(base_url=base_url, timeout=10.0) as client:
+    with httpx.Client(base_url=base_url, timeout=10.0, trust_env=False) as client:
        yield client


@pytest.fixture(scope='session')
 def e2e_db_path(e2e_tmpdir):
    """Path to SQLite database file."""
-    return e2e_tmpdir / 'data' / 'langbot.db'
+    return e2e_tmpdir / 'data' / 'langbot.db'
--- a/tests/e2e/test_startup.py
+++ b/tests/e2e/test_startup.py
@@ -38,7 +38,7 @@ class TestStartupFlow:
        # System info should contain version info
        assert 'version' in data['data'] or 'edition' in data['data']

-    def test_database_initialized(self, e2e_db_path):
+    def test_database_initialized(self, langbot_process, e2e_db_path):
        """Verify SQLite database was created and initialized."""
        assert e2e_db_path.exists()

@@ -75,7 +75,7 @@ class TestStartupFlow:
        """Test auth endpoint."""
        # First startup may allow initial setup
        response = e2e_client.post('/api/v1/user/auth', json={
-            'username': 'admin',
+            'user': 'admin',
            'password': 'admin',
        })

@@ -94,7 +94,7 @@ class TestStartupStages:
        # If API responds on e2e_port, config was loaded
        assert e2e_client.get('/api/v1/system/info').status_code == 200

-    def test_migrations_applied(self, e2e_db_path):
+    def test_migrations_applied(self, langbot_process, e2e_db_path):
        """Verify database migrations were applied."""
        import sqlite3
        conn = sqlite3.connect(str(e2e_db_path))
--- a/tests/e2e/utils/process_manager.py
+++ b/tests/e2e/utils/process_manager.py
@@ -44,6 +44,17 @@ class LangBotProcess:
        # Prepare environment
        env = os.environ.copy()
        env['PYTHONPATH'] = str(self.project_root / 'src')
+        for proxy_key in (
+            'HTTP_PROXY',
+            'HTTPS_PROXY',
+            'ALL_PROXY',
+            'http_proxy',
+            'https_proxy',
+            'all_proxy',
+        ):
+            env.pop(proxy_key, None)
+        env['NO_PROXY'] = '127.0.0.1,localhost'
+        env['no_proxy'] = '127.0.0.1,localhost'

        # Set API port via environment variable
        env['API__PORT'] = str(self.port)
@@ -113,6 +124,8 @@ precision = 2
                r = httpx.get(
                    f'http://127.0.0.1:{self.port}/api/v1/system/info',
                    timeout=2.0,
+                    follow_redirects=False,
+                    trust_env=False,
                )
                if r.status_code == 200:
                    logger.info(f'LangBot started successfully on port {self.port}')
@@ -185,6 +198,8 @@ precision = 2
            r = httpx.get(
                f'http://127.0.0.1:{self.port}/api/v1/system/info',
                timeout=5.0,
+                follow_redirects=False,
+                trust_env=False,
            )
            return r.status_code == 200
        except Exception:
@@ -201,4 +216,4 @@ def find_project_root() -> Path:
            return parent

    # Fallback to LangBot-test-build directory
-    return Path('/home/glwuy/langbot-app/LangBot-test-build')
+    return Path('/home/glwuy/langbot-app/LangBot-test-build')
--- a/tests/unit_tests/provider/test_litellmchat.py
+++ b/tests/unit_tests/provider/test_litellmchat.py
@@ -115,6 +115,15 @@ class TestExtractUsage:
        assert result['prompt_tokens'] == 0
        assert result['completion_tokens'] == 0

+    def test_extract_usage_without_provider_usage(self):
+        """Missing provider usage is not treated as authoritative zero usage."""
+        requester = litellmchat.LiteLLMRequester(ap=Mock(), config={})
+
+        response = Mock()
+        response.usage = None
+
+        assert requester._extract_usage(response) is None
+

 class TestNormalizeUsage:
    """Test _normalize_usage helper covering real-world usage shapes"""
@@ -131,6 +140,22 @@ class TestNormalizeUsage:
        )
        assert result == {'prompt_tokens': 12, 'completion_tokens': 8, 'total_tokens': 20}

+    def test_preserves_token_details(self):
+        """Provider token details such as cache counters are preserved."""
+        result = litellmchat.LiteLLMRequester._normalize_usage(
+            {
+                'prompt_tokens': 12,
+                'completion_tokens': 8,
+                'total_tokens': 20,
+                'prompt_tokens_details': {'cached_tokens': 7},
+                'completion_tokens_details': {'reasoning_tokens': 3},
+            }
+        )
+
+        assert result['prompt_tokens'] == 12
+        assert result['prompt_tokens_details'] == {'cached_tokens': 7}
+        assert result['completion_tokens_details'] == {'reasoning_tokens': 3}
+
    def test_missing_total_is_derived(self):
        """When total_tokens is absent/zero it is derived from prompt + completion"""
        usage = Mock()
@@ -166,9 +191,7 @@ class TestInvokeLLMStreamUsage:
        if has_choice:
            choice = Mock()
            delta = Mock()
-            delta.model_dump = Mock(
-                return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls}
-            )
+            delta.model_dump = Mock(return_value={'role': 'assistant', 'content': content, 'tool_calls': tool_calls})
            choice.delta = delta
            choice.finish_reason = finish_reason
            chunk.choices = [choice]
@@ -313,7 +336,8 @@ class TestInvokeLLMStreamUsage:

        with patch.object(litellmchat, 'acompletion', new=AsyncMock(side_effect=lambda **kw: _aiter())):
            collected = [
-                chunk async for chunk in requester.invoke_llm_stream(
+                chunk
+                async for chunk in requester.invoke_llm_stream(
                    query=query,
                    model=model,
                    messages=messages,
@@ -788,7 +812,9 @@ class TestInvokeRerank:
        with patch('httpx.AsyncClient', return_value=mock_client):
            # arerank must NOT be called on the openai-compatible path
            with patch.object(
-                litellmchat, 'arerank', new_callable=AsyncMock,
+                litellmchat,
+                'arerank',
+                new_callable=AsyncMock,
                side_effect=AssertionError('arerank must not be used for openai-compatible provider'),
            ):
                results = await requester.invoke_rerank(
@@ -1068,8 +1094,7 @@ class TestScanModels:

        with patch.object(litellmchat.litellm, 'supports_function_calling') as mock_supports_function_calling:
            mock_supports_function_calling.side_effect = (
-                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6'
-                and custom_llm_provider is None
+                lambda model, custom_llm_provider=None: model == 'moonshot/kimi-k2.6' and custom_llm_provider is None
            )

            assert requester._supports_function_calling('kimi-k2.6') is True
--- a/tests/unit_tests/provider/test_tool_manager.py
+++ b/tests/unit_tests/provider/test_tool_manager.py
@@ -226,7 +226,7 @@ class TestToolManagerExecuteFuncCall:

    @pytest.mark.asyncio
    async def test_execute_raises_when_tool_not_found(self, mock_app_with_loaders, sample_query):
-        """Test that execute_func_call raises ValueError when tool not found."""
+        """Test that execute_func_call raises ToolNotFoundError when tool not found."""
        toolmgr = get_toolmgr_module()

        mock_app, mock_plugin_loader, mock_mcp_loader = mock_app_with_loaders
@@ -236,7 +236,7 @@ class TestToolManagerExecuteFuncCall:
        manager = toolmgr.ToolManager(mock_app)
        self._wire_loaders(manager, mock_app, mock_plugin_loader, mock_mcp_loader)

-        with pytest.raises(ValueError, match='未找到工具'):
+        with pytest.raises(toolmgr.ToolNotFoundError, match='Tool not found: unknown_tool'):
            await manager.execute_func_call('unknown_tool', {}, sample_query)

    @pytest.mark.asyncio
--- a/web/.gitignore
+++ b/web/.gitignore
@@ -12,6 +12,8 @@

 # testing
 /coverage
+/playwright-report
+/test-results

 # next.js
 /dist/
--- a/web/README.md
+++ b/web/README.md
@@ -1,3 +1,13 @@
 # Debug LangBot Frontend

 Please refer to the [Development Guide](https://link.langbot.app/en/docs/dev-config) for more information.
+
+## Tests
+
+Run the frontend smoke tests without a backend process:
+
+```bash
+pnpm test:e2e
+```
+
+The Playwright suite starts Vite and mocks the LangBot backend and Space APIs.
--- a/web/package.json
+++ b/web/package.json
@@ -6,6 +6,7 @@
    "dev": "vite",
    "build": "tsc && vite build",
    "preview": "vite preview",
+    "test:e2e": "playwright test",
    "lint": "eslint .",
    "format": "prettier --write ."
  },
@@ -86,6 +87,7 @@
    "zod": "^3.24.4"
  },
  "devDependencies": {
+    "@playwright/test": "^1.61.0",
    "@types/debug": "^4.1.12",
    "@types/estree": "^1.0.8",
    "@types/estree-jsx": "^1.0.5",
--- a/web/playwright.config.ts
+++ b/web/playwright.config.ts
@@ -0,0 +1,25 @@
+import { defineConfig, devices } from '@playwright/test';
+
+export default defineConfig({
+  testDir: './tests/e2e',
+  fullyParallel: true,
+  forbidOnly: !!process.env.CI,
+  retries: process.env.CI ? 1 : 0,
+  reporter: process.env.CI ? [['github'], ['list']] : 'list',
+  use: {
+    baseURL: 'http://127.0.0.1:4173',
+    trace: 'on-first-retry',
+  },
+  projects: [
+    {
+      name: 'chromium',
+      use: { ...devices['Desktop Chrome'] },
+    },
+  ],
+  webServer: {
+    command: 'pnpm exec vite --host 127.0.0.1 --port 4173',
+    url: 'http://127.0.0.1:4173',
+    reuseExistingServer: !process.env.CI,
+    timeout: 120_000,
+  },
+});
--- a/web/pnpm-lock.yaml
+++ b/web/pnpm-lock.yaml
@@ -192,6 +192,9 @@ dependencies:
    version: 3.25.76

 devDependencies:
+  '@playwright/test':
+    specifier: ^1.61.0
+    version: 1.61.0
  '@types/debug':
    specifier: ^4.1.12
    version: 4.1.12
@@ -529,6 +532,14 @@ packages:
    engines: {node: ^12.20.0 || ^14.18.0 || >=16.0.0}
    dev: true

+  /@playwright/test@1.61.0:
+    resolution: {integrity: sha512-cKA5B6lpFEMyMGjxF54QihfYpB4FkEGH+qZhtArDEG+wezQAJY8Pq6C7T1SjWz+FFzt3TbyoXBQYk/0292TdJA==}
+    engines: {node: '>=18'}
+    hasBin: true
+    dependencies:
+      playwright: 1.61.0
+    dev: true
+
  /@radix-ui/number@1.1.1:
    resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==}
    dev: false
@@ -3204,6 +3215,14 @@ packages:
    engines: {node: '>=0.4.x'}
    dev: false

+  /fsevents@2.3.2:
+    resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==}
+    engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
+    os: [darwin]
+    requiresBuild: true
+    dev: true
+    optional: true
+
  /fsevents@2.3.3:
    resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
    engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
@@ -4940,6 +4959,22 @@ packages:
    hasBin: true
    dev: true

+  /playwright-core@1.61.0:
+    resolution: {integrity: sha512-caX7TrY3Ml6egyDX0WUcTHDxodl/b51y5wJOdCEA36QviK/s2g081hvmGs8eaE3DWb6NYZQ6BjO/QkNRPenoPA==}
+    engines: {node: '>=18'}
+    hasBin: true
+    dev: true
+
+  /playwright@1.61.0:
+    resolution: {integrity: sha512-Z+7BeeqQPRRzklHsVFP4KTGIyMxKUmfeRA4WisM6G3/XW6nwGeX6fX9qYaDa+CiUqpOkb2f6X3nar05R3kSuJQ==}
+    engines: {node: '>=18'}
+    hasBin: true
+    dependencies:
+      playwright-core: 1.61.0
+    optionalDependencies:
+      fsevents: 2.3.2
+    dev: true
+
  /pngjs@5.0.0:
    resolution: {integrity: sha512-40QW5YalBNfQo5yRYmiw7Yz6TKKVr3h6970B2YE+3fQpsWcrbj1PzJgxeJ19DRQjhMbKPIuMY8rFaXc8moolVw==}
    engines: {node: '>=10.13.0'}
--- a/web/tests/e2e/fixtures/langbot-api.ts
+++ b/web/tests/e2e/fixtures/langbot-api.ts
@@ -0,0 +1,417 @@
+import { Page, Route } from '@playwright/test';
+
+type JsonRecord = Record<string, unknown>;
+
+interface SkillMock {
+  name: string;
+  display_name: string;
+  description: string;
+  instructions: string;
+  package_root: string;
+  updated_at: string;
+}
+
+interface LangBotApiMockState {
+  skills: SkillMock[];
+}
+
+function ok(data: unknown) {
+  return {
+    code: 0,
+    message: 'ok',
+    data,
+    timestamp: Date.now(),
+  };
+}
+
+async function fulfillJson(route: Route, data: unknown) {
+  await route.fulfill({
+    status: 200,
+    contentType: 'application/json',
+    body: JSON.stringify(ok(data)),
+  });
+}
+
+function routePath(route: Route) {
+  return new URL(route.request().url()).pathname;
+}
+
+function emptyMonitoringData() {
+  return {
+    overview: {
+      total_messages: 0,
+      llm_calls: 0,
+      embedding_calls: 0,
+      model_calls: 0,
+      success_rate: 0,
+      active_sessions: 0,
+    },
+    messages: [],
+    llmCalls: [],
+    embeddingCalls: [],
+    sessions: [],
+    errors: [],
+    totalCount: {
+      messages: 0,
+      llmCalls: 0,
+      embeddingCalls: 0,
+      sessions: 0,
+      errors: 0,
+    },
+  };
+}
+
+function emptyTokenStatistics() {
+  return {
+    summary: {
+      total_calls: 0,
+      success_calls: 0,
+      error_calls: 0,
+      total_input_tokens: 0,
+      total_output_tokens: 0,
+      total_tokens: 0,
+      total_cost: 0,
+      avg_tokens_per_call: 0,
+      avg_duration_ms: 0,
+      avg_tokens_per_second: 0,
+      zero_token_success_calls: 0,
+    },
+    by_model: [],
+    timeseries: [],
+    bucket: 'day',
+  };
+}
+
+function makeSkill(data: JsonRecord): SkillMock {
+  return {
+    name: String(data.name || ''),
+    display_name: String(data.display_name || ''),
+    description: String(data.description || ''),
+    instructions: String(data.instructions || ''),
+    package_root: String(data.package_root || ''),
+    updated_at: new Date().toISOString(),
+  };
+}
+
+async function handleBackendApi(route: Route, state: LangBotApiMockState) {
+  const request = route.request();
+  const url = new URL(request.url());
+  const path = url.pathname;
+  const method = request.method();
+
+  if (path === '/api/v1/system/info') {
+    return fulfillJson(route, {
+      debug: false,
+      version: 'frontend-smoke',
+      edition: 'community',
+      cloud_service_url: 'https://space.langbot.app',
+      enable_marketplace: true,
+      allow_modify_login_info: true,
+      disable_models_service: false,
+      limitation: {
+        max_bots: -1,
+        max_pipelines: -1,
+        max_extensions: -1,
+      },
+      outbound_ips: [],
+      wizard_status: 'completed',
+      wizard_progress: null,
+    });
+  }
+
+  if (path === '/api/v1/user/account-info') {
+    return fulfillJson(route, {
+      initialized: true,
+      account_type: 'local',
+      has_password: true,
+    });
+  }
+
+  if (path === '/api/v1/user/check-token') {
+    return fulfillJson(route, { token: '' });
+  }
+
+  if (path === '/api/v1/user/auth') {
+    return fulfillJson(route, { token: 'playwright-token' });
+  }
+
+  if (path === '/api/v1/user/info') {
+    return fulfillJson(route, {
+      user: 'admin@example.com',
+      account_type: 'local',
+      has_password: true,
+    });
+  }
+
+  if (path === '/api/v1/user/space-credits') {
+    return fulfillJson(route, { credits: null });
+  }
+
+  if (path === '/api/v1/platform/bots') {
+    return fulfillJson(route, { bots: [] });
+  }
+
+  if (path === '/api/v1/pipelines') {
+    return fulfillJson(route, { pipelines: [] });
+  }
+
+  if (path === '/api/v1/knowledge/bases') {
+    return fulfillJson(route, { bases: [] });
+  }
+
+  if (path === '/api/v1/knowledge/migration/status') {
+    return fulfillJson(route, {
+      needed: false,
+      internal_kb_count: 0,
+      external_kb_count: 0,
+    });
+  }
+
+  if (path === '/api/v1/plugins') {
+    return fulfillJson(route, { plugins: [] });
+  }
+
+  if (path === '/api/v1/extensions') {
+    return fulfillJson(route, { extensions: [] });
+  }
+
+  if (path === '/api/v1/mcp/servers') {
+    return fulfillJson(route, { servers: [] });
+  }
+
+  if (path === '/api/v1/skills') {
+    if (method === 'POST') {
+      const skill = makeSkill(
+        JSON.parse(request.postData() || '{}') as JsonRecord,
+      );
+      state.skills = [
+        ...state.skills.filter((item) => item.name !== skill.name),
+        skill,
+      ];
+      return fulfillJson(route, { skill });
+    }
+
+    return fulfillJson(route, { skills: state.skills });
+  }
+
+  const skillFileMatch = path.match(
+    /^\/api\/v1\/skills\/([^/]+)\/files\/(.+)$/,
+  );
+  if (skillFileMatch) {
+    const skillName = decodeURIComponent(skillFileMatch[1]);
+    const filePath = decodeURIComponent(skillFileMatch[2]);
+    const skill = state.skills.find((item) => item.name === skillName);
+    return fulfillJson(route, {
+      skill: { name: skillName },
+      path: filePath,
+      content: skill?.instructions || '',
+    });
+  }
+
+  const skillFilesMatch = path.match(/^\/api\/v1\/skills\/([^/]+)\/files$/);
+  if (skillFilesMatch) {
+    const skillName = decodeURIComponent(skillFilesMatch[1]);
+    return fulfillJson(route, {
+      skill: { name: skillName },
+      base_path: '.',
+      entries: [
+        {
+          path: 'SKILL.md',
+          name: 'SKILL.md',
+          is_dir: false,
+          size: null,
+        },
+      ],
+      truncated: false,
+    });
+  }
+
+  const skillMatch = path.match(/^\/api\/v1\/skills\/([^/]+)$/);
+  if (skillMatch) {
+    const skillName = decodeURIComponent(skillMatch[1]);
+    const skill = state.skills.find((item) => item.name === skillName) || {
+      name: skillName,
+      display_name: '',
+      description: '',
+      instructions: '',
+      package_root: '',
+      updated_at: new Date().toISOString(),
+    };
+    return fulfillJson(route, { skill });
+  }
+
+  if (path === '/api/v1/system/status/plugin-system') {
+    return fulfillJson(route, {
+      is_enable: true,
+      is_connected: true,
+      plugin_connector_error: '',
+    });
+  }
+
+  if (path === '/api/v1/plugins/debug-info') {
+    return fulfillJson(route, {
+      debug_url: 'ws://127.0.0.1:5300/plugin/debug',
+      plugin_debug_key: 'test-debug-key',
+    });
+  }
+
+  if (path === '/api/v1/box/status') {
+    return fulfillJson(route, {
+      available: true,
+      enabled: true,
+      profile: 'playwright',
+      recent_error_count: 0,
+      active_sessions: 0,
+      managed_processes: 0,
+      session_ttl_sec: 3600,
+      backend: {
+        name: 'playwright',
+        available: true,
+      },
+    });
+  }
+
+  if (path === '/api/v1/box/sessions') {
+    return fulfillJson(route, []);
+  }
+
+  if (path === '/api/v1/monitoring/data') {
+    return fulfillJson(route, emptyMonitoringData());
+  }
+
+  if (path === '/api/v1/monitoring/overview') {
+    return fulfillJson(route, emptyMonitoringData().overview);
+  }
+
+  if (path === '/api/v1/monitoring/token-statistics') {
+    return fulfillJson(route, emptyTokenStatistics());
+  }
+
+  if (path === '/api/v1/monitoring/feedback/stats') {
+    return fulfillJson(route, {
+      total_feedback: 0,
+      total_likes: 0,
+      total_dislikes: 0,
+      satisfaction_rate: 0,
+    });
+  }
+
+  if (path === '/api/v1/monitoring/feedback') {
+    return fulfillJson(route, { feedback: [], total: 0 });
+  }
+
+  if (path === '/api/v1/survey/pending') {
+    return fulfillJson(route, { survey: null });
+  }
+
+  if (path === '/api/v1/system/tasks') {
+    return fulfillJson(route, { tasks: [] });
+  }
+
+  if (
+    path === '/api/v1/marketplace/plugins' ||
+    path === '/api/v1/marketplace/plugins/search' ||
+    path === '/api/v1/marketplace/extensions/search' ||
+    path === '/api/v1/marketplace/mcps/search' ||
+    path === '/api/v1/marketplace/skills/search'
+  ) {
+    return fulfillJson(route, { plugins: [], total: 0 });
+  }
+
+  if (path === '/api/v1/marketplace/tags') {
+    return fulfillJson(route, { tags: [] });
+  }
+
+  if (path === '/api/v1/marketplace/recommendation-lists') {
+    return fulfillJson(route, { lists: [] });
+  }
+
+  if (path === '/api/v1/dist/info/releases') {
+    return fulfillJson(route, []);
+  }
+
+  if (path === '/api/v1/dist/info/repo') {
+    return fulfillJson(route, {
+      repo: {
+        stargazers_count: 0,
+        forks_count: 0,
+        open_issues_count: 0,
+      },
+      contributors: [],
+    });
+  }
+
+  await fulfillJson(route, {});
+}
+
+async function handleCloudApi(route: Route) {
+  const path = routePath(route);
+
+  if (
+    path === '/api/v1/marketplace/plugins' ||
+    path === '/api/v1/marketplace/plugins/search' ||
+    path === '/api/v1/marketplace/extensions/search' ||
+    path === '/api/v1/marketplace/mcps/search' ||
+    path === '/api/v1/marketplace/skills/search'
+  ) {
+    return fulfillJson(route, { plugins: [], total: 0 });
+  }
+
+  if (path === '/api/v1/marketplace/tags') {
+    return fulfillJson(route, { tags: [] });
+  }
+
+  if (path === '/api/v1/marketplace/recommendation-lists') {
+    return fulfillJson(route, { lists: [] });
+  }
+
+  if (path === '/api/v1/dist/info/releases') {
+    return fulfillJson(route, []);
+  }
+
+  if (path === '/api/v1/dist/info/repo') {
+    return fulfillJson(route, {
+      repo: {
+        stargazers_count: 0,
+        forks_count: 0,
+        open_issues_count: 0,
+      },
+      contributors: [],
+    });
+  }
+
+  await fulfillJson(route, {});
+}
+
+export async function installLangBotApiMocks(
+  page: Page,
+  options: { authenticated?: boolean; storage?: JsonRecord } = {},
+) {
+  const { authenticated = false, storage = {} } = options;
+  const state: LangBotApiMockState = {
+    skills: [],
+  };
+
+  await page.addInitScript(
+    ({ authenticated, storage }) => {
+      localStorage.setItem('langbot_language', 'en-US');
+      localStorage.setItem('extensions_group_by_type', 'false');
+
+      if (authenticated) {
+        localStorage.setItem('token', 'playwright-token');
+        localStorage.setItem('userEmail', 'admin@example.com');
+      } else {
+        localStorage.removeItem('token');
+        localStorage.removeItem('userEmail');
+      }
+
+      for (const [key, value] of Object.entries(storage)) {
+        localStorage.setItem(key, String(value));
+      }
+    },
+    { authenticated, storage },
+  );
+
+  await page.route('**/api/v1/**', (route) => handleBackendApi(route, state));
+  await page.route('https://space.langbot.app/**', handleCloudApi);
+}
--- a/web/tests/e2e/home-smoke.spec.ts
+++ b/web/tests/e2e/home-smoke.spec.ts
@@ -0,0 +1,133 @@
+import { expect, test } from '@playwright/test';
+
+import { installLangBotApiMocks } from './fixtures/langbot-api';
+
+const appRoutes = [
+  {
+    path: '/home/bots',
+    heading: 'Bots',
+    bodyText: 'Select a bot from the sidebar',
+  },
+  {
+    path: '/home/pipelines',
+    heading: 'Pipelines',
+    bodyText: 'Select a pipeline from the sidebar',
+  },
+  {
+    path: '/home/extensions',
+    heading: 'Extensions',
+    bodyText: 'No extensions installed',
+  },
+  {
+    path: '/home/mcp',
+    heading: 'MCP',
+    bodyText: 'Select an MCP server from the sidebar',
+  },
+  {
+    path: '/home/knowledge',
+    heading: 'Knowledge',
+    bodyText: 'Select a knowledge base from the sidebar',
+  },
+];
+
+test.describe('authenticated app shell', () => {
+  for (const route of appRoutes) {
+    test(`${route.path} renders without a backend process`, async ({
+      page,
+    }) => {
+      await installLangBotApiMocks(page, { authenticated: true });
+
+      await page.goto(route.path);
+
+      await expect(page).toHaveURL(new RegExp(`${route.path}$`));
+      await expect(page.getByText('Home').first()).toBeVisible();
+      await expect(
+        page.getByRole('button', { name: 'Dashboard' }),
+      ).toBeVisible();
+      await expect(page.getByText('Extensions').first()).toBeVisible();
+      await expect(page.getByText(route.heading).first()).toBeVisible();
+      await expect(page.getByText(route.bodyText)).toBeVisible();
+      await expect(page.getByText('Backend unavailable')).toHaveCount(0);
+    });
+  }
+
+  test('/home/monitoring loads dashboard data from mocked APIs', async ({
+    page,
+  }) => {
+    await installLangBotApiMocks(page, { authenticated: true });
+
+    await page.goto('/home/monitoring');
+
+    await expect(page).toHaveURL(/\/home\/monitoring$/);
+    await expect(page.getByText('Total Messages').first()).toBeVisible();
+    await expect(
+      page.getByRole('tab', { name: 'Message Records' }),
+    ).toBeVisible();
+    await expect(
+      page.getByRole('tab', { name: 'Token Monitoring' }),
+    ).toBeVisible();
+
+    await page.getByRole('tab', { name: 'Token Monitoring' }).click();
+    await expect(
+      page.getByText('No token usage in the selected time range'),
+    ).toBeVisible();
+    await expect(page.getByText('Unable to connect to server')).toHaveCount(0);
+  });
+
+  test('/home/extensions shows plugin debug information from the backend', async ({
+    page,
+  }) => {
+    await installLangBotApiMocks(page, { authenticated: true });
+
+    await page.goto('/home/extensions');
+
+    await page.getByRole('button', { name: 'Debug Info' }).click();
+
+    await expect(page.getByText('Plugin Debug Information')).toBeVisible();
+    await expect(page.getByRole('textbox').nth(0)).toHaveValue(
+      'ws://127.0.0.1:5300/plugin/debug',
+    );
+    await expect(page.getByRole('textbox').nth(1)).toHaveValue(
+      'test-debug-key',
+    );
+  });
+
+  test('/home/skills?action=create creates a manual skill', async ({
+    page,
+  }) => {
+    await installLangBotApiMocks(page, { authenticated: true });
+
+    await page.goto('/home/skills?action=create');
+
+    await expect(page).toHaveURL(/\/home\/skills\?action=create$/);
+    await expect(page.getByText('Create Skill').first()).toBeVisible();
+    await expect(page.getByText('Import Local Skill Directory')).toBeVisible();
+
+    const saveButton = page.getByRole('button', { name: 'Save' });
+    await expect(saveButton).toBeEnabled();
+    await saveButton.click();
+    await expect(page.getByText('Skill name cannot be empty')).toBeVisible();
+
+    await page.locator('#display_name').fill('Daily Summary');
+    await page.locator('#name').fill('daily_summary');
+    await page
+      .locator('#description')
+      .fill('Summarizes the current conversation for handoff.');
+    await page
+      .locator('#instructions')
+      .fill('Summarize the conversation in five concise bullet points.');
+    await saveButton.click();
+
+    await expect(page).toHaveURL(/\/home\/skills\?id=daily_summary$/);
+    await expect(
+      page.getByRole('heading', { name: 'Daily Summary' }),
+    ).toBeVisible();
+    await expect(page.locator('#name')).toHaveValue('daily_summary');
+    await expect(page.locator('#description')).toHaveValue(
+      'Summarizes the current conversation for handoff.',
+    );
+    await expect(page.locator('#instructions')).toHaveValue(
+      'Summarize the conversation in five concise bullet points.',
+    );
+  });
+});
--- a/web/tests/e2e/login.spec.ts
+++ b/web/tests/e2e/login.spec.ts
@@ -0,0 +1,22 @@
+import { expect, test } from '@playwright/test';
+
+import { installLangBotApiMocks } from './fixtures/langbot-api';
+
+test('local account login reaches the authenticated home shell', async ({
+  page,
+}) => {
+  await installLangBotApiMocks(page);
+
+  await page.goto('/login');
+
+  await expect(page.getByText('Welcome')).toBeVisible();
+  await page.getByPlaceholder('Enter email address').fill('admin@example.com');
+  await page.getByPlaceholder('Enter password').fill('password');
+  await page.getByRole('button', { name: 'Login with password' }).click();
+
+  await expect(page).toHaveURL(/\/home$/);
+  await expect(page.getByText('Home').first()).toBeVisible();
+  await expect(page.getByRole('button', { name: 'Dashboard' })).toBeVisible();
+  await expect(page.getByText('Total Messages').first()).toBeVisible();
+  await expect(page.getByText('Unable to connect to server')).toHaveCount(0);
+});
Author	SHA1	Message	Date
huanghuoguoguo	e27e26b071	test: add frontend smoke and backend e2e CI	2026-06-16 10:54:57 +08:00
huanghuoguoguo	e9fe2f2d43	feat(agent-runner): support host tool lookup (#2244 )	2026-06-14 11:29:57 +08:00
huanghuoguoguo	27be09ab15	fix(provider): preserve litellm usage details (#2246 )	2026-06-14 11:12:29 +08:00
huanghuoguoguo	1ef4507d9a	[codex] Delegate web page bot stream helpers (#2245 ) * fix(platform): delegate web page bot stream helpers * style(platform): format web page bot adapter	2026-06-14 10:57:53 +08:00