Compare commits

..

1 Commits

Author SHA1 Message Date
huanghuoguoguo
14330741cc fix(rag): reject unsafe runtime file paths 2026-05-16 10:53:57 +08:00
4 changed files with 87 additions and 32 deletions

View File

@@ -633,12 +633,11 @@ class PluginRuntimeConnector:
Raises: Raises:
ValueError: If plugin_id is not in the expected 'author/name' format. ValueError: If plugin_id is not in the expected 'author/name' format.
""" """
segments = plugin_id.split('/') if '/' not in plugin_id:
if len(segments) != 2 or not all(segments):
raise ValueError( raise ValueError(
f"Invalid plugin_id format: '{plugin_id}'. Expected 'author/name' format (e.g. 'langbot/rag-engine')." f"Invalid plugin_id format: '{plugin_id}'. Expected 'author/name' format (e.g. 'langbot/rag-engine')."
) )
return segments[0], segments[1] return plugin_id.split('/', 1)
async def call_rag_ingest(self, plugin_id: str, context_data: dict[str, Any]) -> dict[str, Any]: async def call_rag_ingest(self, plugin_id: str, context_data: dict[str, Any]) -> dict[str, Any]:
"""Call plugin to ingest document. """Call plugin to ingest document.

View File

@@ -1,7 +1,11 @@
from __future__ import annotations from __future__ import annotations
import posixpath import posixpath
from typing import Any import re
from typing import TYPE_CHECKING, Any
from urllib.parse import unquote
if TYPE_CHECKING:
from langbot.pkg.core import app from langbot.pkg.core import app
@@ -109,8 +113,17 @@ class RAGRuntimeService:
regardless of the underlying storage provider. regardless of the underlying storage provider.
""" """
# Validate storage_path to prevent path traversal # Validate storage_path to prevent path traversal
normalized = posixpath.normpath(storage_path) decoded_path = unquote(storage_path).replace('\\', '/')
if normalized.startswith('/') or '..' in normalized.split('/'): decoded_segments = decoded_path.split('/')
normalized = posixpath.normpath(decoded_path)
if (
not storage_path
or '\x00' in decoded_path
or normalized.startswith('/')
or '..' in decoded_segments
or '..' in normalized.split('/')
or re.match(r'^[A-Za-z]:/', normalized)
):
raise ValueError('Invalid storage path') raise ValueError('Invalid storage path')
content_bytes = await self.ap.storage_mgr.storage_provider.load(normalized) content_bytes = await self.ap.storage_mgr.storage_provider.load(normalized)
return content_bytes if content_bytes else b'' return content_bytes if content_bytes else b''

View File

@@ -1,25 +0,0 @@
"""Test plugin ID parsing validation."""
import pytest
from src.langbot.pkg.plugin.connector import PluginRuntimeConnector
def test_parse_plugin_id_accepts_author_name():
assert PluginRuntimeConnector._parse_plugin_id('langbot/rag-engine') == ('langbot', 'rag-engine')
@pytest.mark.parametrize(
'plugin_id',
[
'',
'author',
'author/',
'/name',
'author/name/extra',
'/',
],
)
def test_parse_plugin_id_rejects_malformed_ids(plugin_id):
with pytest.raises(ValueError, match='Expected'):
PluginRuntimeConnector._parse_plugin_id(plugin_id)

View File

@@ -0,0 +1,68 @@
from __future__ import annotations
from types import SimpleNamespace
import pytest
from langbot.pkg.rag.service.runtime import RAGRuntimeService
class DummyStorageProvider:
def __init__(self, content: bytes | None = b'data'):
self.content = content
self.loaded_paths: list[str] = []
async def load(self, path: str):
self.loaded_paths.append(path)
return self.content
def make_service(storage_provider: DummyStorageProvider) -> RAGRuntimeService:
return RAGRuntimeService(SimpleNamespace(storage_mgr=SimpleNamespace(storage_provider=storage_provider)))
@pytest.mark.asyncio
async def test_get_file_stream_normalizes_safe_path():
storage_provider = DummyStorageProvider()
service = make_service(storage_provider)
content = await service.get_file_stream('safe/./nested/file.pdf')
assert content == b'data'
assert storage_provider.loaded_paths == ['safe/nested/file.pdf']
@pytest.mark.asyncio
@pytest.mark.parametrize(
'storage_path',
[
'',
'../secret.txt',
'/absolute/path.txt',
'..\\secret.txt',
'nested\\..\\secret.txt',
'%2e%2e/secret.txt',
'nested/%2e%2e/secret.txt',
'C:\\secret.txt',
'safe/\x00file.txt',
],
)
async def test_get_file_stream_rejects_unsafe_paths(storage_path: str):
storage_provider = DummyStorageProvider()
service = make_service(storage_provider)
with pytest.raises(ValueError, match='Invalid storage path'):
await service.get_file_stream(storage_path)
assert storage_provider.loaded_paths == []
@pytest.mark.asyncio
async def test_get_file_stream_returns_empty_bytes_for_missing_content():
storage_provider = DummyStorageProvider(content=None)
service = make_service(storage_provider)
content = await service.get_file_stream('safe/file.pdf')
assert content == b''
assert storage_provider.loaded_paths == ['safe/file.pdf']