fix(utils): classify runner URLs safely

This commit is contained in:
huanghuoguoguo
2026-05-16 10:53:24 +08:00
parent b251fc4b89
commit d43cbf0243
3 changed files with 89 additions and 27 deletions
+42 -27
View File
@@ -1,5 +1,7 @@
from __future__ import annotations from __future__ import annotations
import ipaddress
import re
from urllib.parse import urlparse from urllib.parse import urlparse
@@ -20,29 +22,39 @@ CLOUD_DOMAINS = [
'.langflow.org', '.langflow.org',
] ]
LOCAL_PATTERNS = [ HOST_LABEL_PATTERN = re.compile(r'^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?$')
'localhost',
'127.0.0.1',
'0.0.0.0', def _is_valid_hostname(host: str) -> bool:
'192.168.', if host == 'localhost':
'10.', return True
'172.16.',
'172.17.', try:
'172.18.', ipaddress.ip_address(host)
'172.19.', return True
'172.20.', except ValueError:
'172.21.', pass
'172.22.',
'172.23.', if not host or len(host) > 253 or any(char.isspace() for char in host):
'172.24.', return False
'172.25.',
'172.26.', host = host.rstrip('.')
'172.27.', if not host:
'172.28.', return False
'172.29.',
'172.30.', return all(HOST_LABEL_PATTERN.match(label) for label in host.split('.'))
'172.31.',
]
def _is_local_host(host: str) -> bool:
if host == 'localhost':
return True
try:
ip_address = ipaddress.ip_address(host)
except ValueError:
return False
return ip_address.is_private or ip_address.is_loopback or ip_address.is_unspecified
def get_runner_category(runner_name: str, runner_url: str) -> str: def get_runner_category(runner_name: str, runner_url: str) -> str:
@@ -52,12 +64,15 @@ def get_runner_category(runner_name: str, runner_url: str) -> str:
try: try:
parsed_url = urlparse(runner_url) parsed_url = urlparse(runner_url)
host = parsed_url.hostname.lower() if parsed_url.hostname else '' host = parsed_url.hostname.lower() if parsed_url.hostname else ''
except Exception: _ = parsed_url.port
except (TypeError, ValueError):
return RunnerCategory.UNKNOWN return RunnerCategory.UNKNOWN
for pattern in LOCAL_PATTERNS: if not parsed_url.scheme or not host or not _is_valid_hostname(host):
if host.startswith(pattern): return RunnerCategory.UNKNOWN
return RunnerCategory.LOCAL
if _is_local_host(host):
return RunnerCategory.LOCAL
for domain in CLOUD_DOMAINS: for domain in CLOUD_DOMAINS:
if host.endswith(domain): if host.endswith(domain):
+1
View File
@@ -0,0 +1 @@
+46
View File
@@ -0,0 +1,46 @@
import pytest
from langbot.pkg.utils.runner import RunnerCategory, get_runner_category
@pytest.mark.parametrize(
'runner_url',
[
'api.dify.ai/v1',
'localhost:7860',
'https:///v1',
'https://',
'https://exa mple.com',
'http://[::1',
'http://localhost:bad',
],
)
def test_get_runner_category_returns_unknown_for_invalid_urls(runner_url):
assert get_runner_category('dify-service-api', runner_url) == RunnerCategory.UNKNOWN
@pytest.mark.parametrize(
'runner_url',
[
'http://localhost:7860',
'http://127.0.0.1:7860',
'http://10.0.0.1:7860',
'http://172.16.0.1:7860',
'http://172.31.255.255:7860',
'http://192.168.1.20:7860',
'http://[::1]:7860',
],
)
def test_get_runner_category_detects_local_hosts_with_ipaddress(runner_url):
assert get_runner_category('langflow-api', runner_url) == RunnerCategory.LOCAL
@pytest.mark.parametrize(
'runner_url',
[
'http://10.evil.com',
'http://192.168.example.com',
],
)
def test_get_runner_category_does_not_treat_private_ip_prefix_domains_as_local(runner_url):
assert get_runner_category('langflow-api', runner_url) == RunnerCategory.CLOUD