mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-03 12:34:37 +00:00
Compare commits
1 Commits
fix/pipeli
...
fix/rag-ru
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14330741cc |
@@ -275,7 +275,6 @@ class MessageAggregator:
|
||||
message_chain=merged_chain,
|
||||
adapter=base_msg.adapter,
|
||||
pipeline_uuid=base_msg.pipeline_uuid,
|
||||
routed_by_rule=any(msg.routed_by_rule for msg in messages),
|
||||
)
|
||||
|
||||
async def flush_all(self) -> None:
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import posixpath
|
||||
from typing import Any
|
||||
from langbot.pkg.core import app
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from urllib.parse import unquote
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langbot.pkg.core import app
|
||||
|
||||
|
||||
class RAGRuntimeService:
|
||||
@@ -109,8 +113,17 @@ class RAGRuntimeService:
|
||||
regardless of the underlying storage provider.
|
||||
"""
|
||||
# Validate storage_path to prevent path traversal
|
||||
normalized = posixpath.normpath(storage_path)
|
||||
if normalized.startswith('/') or '..' in normalized.split('/'):
|
||||
decoded_path = unquote(storage_path).replace('\\', '/')
|
||||
decoded_segments = decoded_path.split('/')
|
||||
normalized = posixpath.normpath(decoded_path)
|
||||
if (
|
||||
not storage_path
|
||||
or '\x00' in decoded_path
|
||||
or normalized.startswith('/')
|
||||
or '..' in decoded_segments
|
||||
or '..' in normalized.split('/')
|
||||
or re.match(r'^[A-Za-z]:/', normalized)
|
||||
):
|
||||
raise ValueError('Invalid storage path')
|
||||
content_bytes = await self.ap.storage_mgr.storage_provider.load(normalized)
|
||||
return content_bytes if content_bytes else b''
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
"""
|
||||
MessageAggregator unit tests.
|
||||
"""
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
import langbot_plugin.api.entities.builtin.platform.message as platform_message
|
||||
import langbot_plugin.api.entities.builtin.provider.session as provider_session
|
||||
|
||||
|
||||
def test_merge_messages_preserves_routed_by_rule_if_any_input_matches(sample_message_event, mock_adapter):
|
||||
"""Merged PendingMessage should keep routed_by_rule when any input was rule-routed."""
|
||||
aggregator = import_module('langbot.pkg.pipeline.aggregator')
|
||||
message_aggregator = aggregator.MessageAggregator(ap=None)
|
||||
|
||||
first_message = aggregator.PendingMessage(
|
||||
bot_uuid='test-bot-uuid',
|
||||
launcher_type=provider_session.LauncherTypes.PERSON,
|
||||
launcher_id=12345,
|
||||
sender_id=12345,
|
||||
message_event=sample_message_event,
|
||||
message_chain=platform_message.MessageChain([platform_message.Plain(text='first')]),
|
||||
adapter=mock_adapter,
|
||||
pipeline_uuid='test-pipeline-uuid',
|
||||
routed_by_rule=False,
|
||||
)
|
||||
second_message = aggregator.PendingMessage(
|
||||
bot_uuid='test-bot-uuid',
|
||||
launcher_type=provider_session.LauncherTypes.PERSON,
|
||||
launcher_id=12345,
|
||||
sender_id=12345,
|
||||
message_event=sample_message_event,
|
||||
message_chain=platform_message.MessageChain([platform_message.Plain(text='second')]),
|
||||
adapter=mock_adapter,
|
||||
pipeline_uuid='test-pipeline-uuid',
|
||||
routed_by_rule=True,
|
||||
)
|
||||
|
||||
merged_message = message_aggregator._merge_messages([first_message, second_message])
|
||||
|
||||
assert merged_message.routed_by_rule is True
|
||||
assert str(merged_message.message_chain) == 'first\nsecond'
|
||||
68
tests/unit_tests/rag/test_runtime_service.py
Normal file
68
tests/unit_tests/rag/test_runtime_service.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from langbot.pkg.rag.service.runtime import RAGRuntimeService
|
||||
|
||||
|
||||
class DummyStorageProvider:
|
||||
def __init__(self, content: bytes | None = b'data'):
|
||||
self.content = content
|
||||
self.loaded_paths: list[str] = []
|
||||
|
||||
async def load(self, path: str):
|
||||
self.loaded_paths.append(path)
|
||||
return self.content
|
||||
|
||||
|
||||
def make_service(storage_provider: DummyStorageProvider) -> RAGRuntimeService:
|
||||
return RAGRuntimeService(SimpleNamespace(storage_mgr=SimpleNamespace(storage_provider=storage_provider)))
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_file_stream_normalizes_safe_path():
|
||||
storage_provider = DummyStorageProvider()
|
||||
service = make_service(storage_provider)
|
||||
|
||||
content = await service.get_file_stream('safe/./nested/file.pdf')
|
||||
|
||||
assert content == b'data'
|
||||
assert storage_provider.loaded_paths == ['safe/nested/file.pdf']
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
'storage_path',
|
||||
[
|
||||
'',
|
||||
'../secret.txt',
|
||||
'/absolute/path.txt',
|
||||
'..\\secret.txt',
|
||||
'nested\\..\\secret.txt',
|
||||
'%2e%2e/secret.txt',
|
||||
'nested/%2e%2e/secret.txt',
|
||||
'C:\\secret.txt',
|
||||
'safe/\x00file.txt',
|
||||
],
|
||||
)
|
||||
async def test_get_file_stream_rejects_unsafe_paths(storage_path: str):
|
||||
storage_provider = DummyStorageProvider()
|
||||
service = make_service(storage_provider)
|
||||
|
||||
with pytest.raises(ValueError, match='Invalid storage path'):
|
||||
await service.get_file_stream(storage_path)
|
||||
|
||||
assert storage_provider.loaded_paths == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_file_stream_returns_empty_bytes_for_missing_content():
|
||||
storage_provider = DummyStorageProvider(content=None)
|
||||
service = make_service(storage_provider)
|
||||
|
||||
content = await service.get_file_stream('safe/file.pdf')
|
||||
|
||||
assert content == b''
|
||||
assert storage_provider.loaded_paths == ['safe/file.pdf']
|
||||
Reference in New Issue
Block a user