Compare commits

..

1 Commits

Author SHA1 Message Date
huanghuoguoguo
14330741cc fix(rag): reject unsafe runtime file paths 2026-05-16 10:53:57 +08:00
4 changed files with 91 additions and 74 deletions

View File

@@ -120,26 +120,24 @@ class BotService:
async def update_bot(self, bot_uuid: str, bot_data: dict) -> None: async def update_bot(self, bot_uuid: str, bot_data: dict) -> None:
"""Update bot""" """Update bot"""
update_data = bot_data.copy() if 'uuid' in bot_data:
del bot_data['uuid']
if 'uuid' in update_data:
del update_data['uuid']
# set use_pipeline_name # set use_pipeline_name
if 'use_pipeline_uuid' in update_data: if 'use_pipeline_uuid' in bot_data:
result = await self.ap.persistence_mgr.execute_async( result = await self.ap.persistence_mgr.execute_async(
sqlalchemy.select(persistence_pipeline.LegacyPipeline).where( sqlalchemy.select(persistence_pipeline.LegacyPipeline).where(
persistence_pipeline.LegacyPipeline.uuid == update_data['use_pipeline_uuid'] persistence_pipeline.LegacyPipeline.uuid == bot_data['use_pipeline_uuid']
) )
) )
pipeline = result.first() pipeline = result.first()
if pipeline is not None: if pipeline is not None:
update_data['use_pipeline_name'] = pipeline.name bot_data['use_pipeline_name'] = pipeline.name
else: else:
raise Exception('Pipeline not found') raise Exception('Pipeline not found')
await self.ap.persistence_mgr.execute_async( await self.ap.persistence_mgr.execute_async(
sqlalchemy.update(persistence_bot.Bot).values(update_data).where(persistence_bot.Bot.uuid == bot_uuid) sqlalchemy.update(persistence_bot.Bot).values(bot_data).where(persistence_bot.Bot.uuid == bot_uuid)
) )
await self.ap.platform_mgr.remove_bot(bot_uuid) await self.ap.platform_mgr.remove_bot(bot_uuid)

View File

@@ -1,8 +1,12 @@
from __future__ import annotations from __future__ import annotations
import posixpath import posixpath
from typing import Any import re
from langbot.pkg.core import app from typing import TYPE_CHECKING, Any
from urllib.parse import unquote
if TYPE_CHECKING:
from langbot.pkg.core import app
class RAGRuntimeService: class RAGRuntimeService:
@@ -109,8 +113,17 @@ class RAGRuntimeService:
regardless of the underlying storage provider. regardless of the underlying storage provider.
""" """
# Validate storage_path to prevent path traversal # Validate storage_path to prevent path traversal
normalized = posixpath.normpath(storage_path) decoded_path = unquote(storage_path).replace('\\', '/')
if normalized.startswith('/') or '..' in normalized.split('/'): decoded_segments = decoded_path.split('/')
normalized = posixpath.normpath(decoded_path)
if (
not storage_path
or '\x00' in decoded_path
or normalized.startswith('/')
or '..' in decoded_segments
or '..' in normalized.split('/')
or re.match(r'^[A-Za-z]:/', normalized)
):
raise ValueError('Invalid storage path') raise ValueError('Invalid storage path')
content_bytes = await self.ap.storage_mgr.storage_provider.load(normalized) content_bytes = await self.ap.storage_mgr.storage_provider.load(normalized)
return content_bytes if content_bytes else b'' return content_bytes if content_bytes else b''

View File

@@ -1,62 +0,0 @@
from types import SimpleNamespace
from unittest.mock import AsyncMock
from sqlalchemy.sql.dml import Update
from langbot.pkg.api.http.service.bot import BotService
class _FakeResult:
def __init__(self, value):
self.value = value
def first(self):
return self.value
class _PersistenceManager:
def __init__(self):
self.update_values = None
async def execute_async(self, statement):
if isinstance(statement, Update):
self.update_values = {
key: value for key, value in statement.compile().params.items() if not key.startswith('uuid_')
}
return None
return _FakeResult(SimpleNamespace(name='Updated Pipeline'))
async def test_update_bot_copies_input_before_filtering_and_setting_pipeline_name():
persistence_mgr = _PersistenceManager()
runtime_bot = SimpleNamespace(enable=False)
platform_mgr = SimpleNamespace(
remove_bot=AsyncMock(),
load_bot=AsyncMock(return_value=runtime_bot),
)
ap = SimpleNamespace(
persistence_mgr=persistence_mgr,
platform_mgr=platform_mgr,
sess_mgr=SimpleNamespace(session_list=[]),
)
service = BotService(ap)
service.get_bot = AsyncMock(return_value={'uuid': 'bot-1'})
payload = {
'uuid': 'caller-owned-uuid',
'name': 'Test Bot',
'use_pipeline_uuid': 'pipeline-1',
}
await service.update_bot('bot-1', payload)
assert payload == {
'uuid': 'caller-owned-uuid',
'name': 'Test Bot',
'use_pipeline_uuid': 'pipeline-1',
}
assert persistence_mgr.update_values == {
'name': 'Test Bot',
'use_pipeline_uuid': 'pipeline-1',
'use_pipeline_name': 'Updated Pipeline',
}

View File

@@ -0,0 +1,68 @@
from __future__ import annotations
from types import SimpleNamespace
import pytest
from langbot.pkg.rag.service.runtime import RAGRuntimeService
class DummyStorageProvider:
def __init__(self, content: bytes | None = b'data'):
self.content = content
self.loaded_paths: list[str] = []
async def load(self, path: str):
self.loaded_paths.append(path)
return self.content
def make_service(storage_provider: DummyStorageProvider) -> RAGRuntimeService:
return RAGRuntimeService(SimpleNamespace(storage_mgr=SimpleNamespace(storage_provider=storage_provider)))
@pytest.mark.asyncio
async def test_get_file_stream_normalizes_safe_path():
storage_provider = DummyStorageProvider()
service = make_service(storage_provider)
content = await service.get_file_stream('safe/./nested/file.pdf')
assert content == b'data'
assert storage_provider.loaded_paths == ['safe/nested/file.pdf']
@pytest.mark.asyncio
@pytest.mark.parametrize(
'storage_path',
[
'',
'../secret.txt',
'/absolute/path.txt',
'..\\secret.txt',
'nested\\..\\secret.txt',
'%2e%2e/secret.txt',
'nested/%2e%2e/secret.txt',
'C:\\secret.txt',
'safe/\x00file.txt',
],
)
async def test_get_file_stream_rejects_unsafe_paths(storage_path: str):
storage_provider = DummyStorageProvider()
service = make_service(storage_provider)
with pytest.raises(ValueError, match='Invalid storage path'):
await service.get_file_stream(storage_path)
assert storage_provider.loaded_paths == []
@pytest.mark.asyncio
async def test_get_file_stream_returns_empty_bytes_for_missing_content():
storage_provider = DummyStorageProvider(content=None)
service = make_service(storage_provider)
content = await service.get_file_stream('safe/file.pdf')
assert content == b''
assert storage_provider.loaded_paths == ['safe/file.pdf']