mirror of
https://github.com/MHSanaei/3x-ui.git
synced 2026-06-27 16:14:21 +00:00
feat(xray): add tunnel health monitor (#5480)
* feat(xray): add tunnel health monitor * fix(tunnelmonitor): reuse netproxy client and init logger in tests Replace the duplicated newHTTPClient/dialContextWithProxy with netproxy.NewHTTPClient, which centralises the http/https/socks5 handling and avoids the dial-goroutine connection leak on context cancellation. Cap failures at the threshold during cooldown so the counter stays a true consecutive-failure count. Add TestMain to initialise the logger and fix the nil-pointer panic in the success-after-failure path. * fix(tunnelmonitor): observable recovery, signal headroom, and hardening Address the remaining review findings on the tunnel health monitor: - Recovery is now synchronous and observable: the callback calls server.RestartXray() directly and returns its error instead of just enqueuing SIGUSR1, so a failed restart no longer masks as success and arms the cooldown while the tunnel is still down. - Give the OS signal channel headroom (buffer 8) so producers cannot starve a SIGTERM/SIGINT out of the single slot. - Warn at startup when the monitor is enabled without a proxy, since the probe then measures host connectivity rather than the xray tunnel. - Cap failures at the threshold in the nil-recover branch too, matching the cooldown cap. - Document the XUI_TUNNEL_HEALTH_* vars in .env.example and the README. - Add tests for status-code classification, Normalize bounds, New proxy scheme errors, the recovery-error and nil-recover paths, the cooldown cap, and Run context cancellation (coverage 90%). --------- Co-authored-by: Sanaei <ho3ein.sanaei@gmail.com>
This commit is contained in:
@@ -4,3 +4,16 @@ XUI_LOG_FOLDER=x-ui
|
||||
XUI_BIN_FOLDER=x-ui
|
||||
XUI_INIT_WEB_BASE_PATH=/
|
||||
# XUI_PORT=8080
|
||||
|
||||
# Optional tunnel health monitor (disabled by default). It periodically probes a
|
||||
# URL and restarts xray-core after repeated failures. Point XUI_TUNNEL_HEALTH_PROXY
|
||||
# at a local xray inbound so the probe tests the tunnel; without it the probe only
|
||||
# checks host connectivity and a restart will not fix host network issues. A restart
|
||||
# drops every connected client.
|
||||
# XUI_TUNNEL_HEALTH_MONITOR=true
|
||||
# XUI_TUNNEL_HEALTH_PROXY=socks5://127.0.0.1:1080
|
||||
# XUI_TUNNEL_HEALTH_URL=https://www.cloudflare.com/cdn-cgi/trace
|
||||
# XUI_TUNNEL_HEALTH_INTERVAL=30s
|
||||
# XUI_TUNNEL_HEALTH_TIMEOUT=10s
|
||||
# XUI_TUNNEL_HEALTH_FAILURES=3
|
||||
# XUI_TUNNEL_HEALTH_COOLDOWN=5m
|
||||
|
||||
@@ -146,6 +146,13 @@ docker run -d --cap-add=NET_ADMIN --cap-add=NET_RAW ... ghcr.io/mhsanaei/3x-ui
|
||||
| `XUI_ENABLE_FAIL2BAN` | Enable Fail2ban-based IP-limit enforcement | `true` |
|
||||
| `XUI_LOG_LEVEL` | Log verbosity (`debug`, `info`, `warning`, `error`) | `info` |
|
||||
| `XUI_DEBUG` | Enable debug mode | `false` |
|
||||
| `XUI_TUNNEL_HEALTH_MONITOR` | Enable the tunnel health monitor (probes a URL and restarts xray after repeated failures; a restart drops all clients) | `false` |
|
||||
| `XUI_TUNNEL_HEALTH_PROXY` | Proxy the probe is sent through; point it at a local xray inbound so the probe tests the tunnel (e.g. `socks5://127.0.0.1:1080`). Empty means the probe only checks host connectivity | — |
|
||||
| `XUI_TUNNEL_HEALTH_URL` | URL probed for tunnel health | `https://www.cloudflare.com/cdn-cgi/trace` |
|
||||
| `XUI_TUNNEL_HEALTH_INTERVAL` | Interval between probes | `30s` |
|
||||
| `XUI_TUNNEL_HEALTH_TIMEOUT` | Per-probe timeout | `10s` |
|
||||
| `XUI_TUNNEL_HEALTH_FAILURES` | Consecutive failures before a restart is triggered | `3` |
|
||||
| `XUI_TUNNEL_HEALTH_COOLDOWN` | Minimum delay between consecutive restarts | `5m` |
|
||||
|
||||
## Supported Languages
|
||||
|
||||
|
||||
@@ -0,0 +1,271 @@
|
||||
package tunnelmonitor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/logger"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/util/netproxy"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultHealthURL = "https://www.cloudflare.com/cdn-cgi/trace"
|
||||
defaultInterval = 30 * time.Second
|
||||
defaultTimeout = 10 * time.Second
|
||||
defaultFailureThreshold = 3
|
||||
defaultCooldown = 5 * time.Minute
|
||||
)
|
||||
|
||||
// Config controls the optional tunnel health monitor.
|
||||
type Config struct {
|
||||
Enabled bool
|
||||
URL string
|
||||
ProxyURL string
|
||||
Interval time.Duration
|
||||
Timeout time.Duration
|
||||
FailureThreshold int
|
||||
Cooldown time.Duration
|
||||
}
|
||||
|
||||
// RecoveryFunc performs recovery after the monitor reaches the configured
|
||||
// failure threshold. The panel wires this to an Xray core restart.
|
||||
type RecoveryFunc func(context.Context) error
|
||||
|
||||
// Monitor periodically probes a URL and triggers recovery after repeated
|
||||
// failures. It is intentionally independent from panel settings/UI so it can be
|
||||
// enabled safely through service environment variables first.
|
||||
type Monitor struct {
|
||||
cfg Config
|
||||
client *http.Client
|
||||
recover RecoveryFunc
|
||||
failures int
|
||||
lastRecovery time.Time
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// DefaultConfig returns disabled-by-default monitor settings.
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
Enabled: false,
|
||||
URL: defaultHealthURL,
|
||||
Interval: defaultInterval,
|
||||
Timeout: defaultTimeout,
|
||||
FailureThreshold: defaultFailureThreshold,
|
||||
Cooldown: defaultCooldown,
|
||||
}
|
||||
}
|
||||
|
||||
// ConfigFromEnv builds Config from XUI_TUNNEL_HEALTH_* environment variables.
|
||||
//
|
||||
// Supported variables:
|
||||
// - XUI_TUNNEL_HEALTH_MONITOR=true
|
||||
// - XUI_TUNNEL_HEALTH_URL=https://www.cloudflare.com/cdn-cgi/trace
|
||||
// - XUI_TUNNEL_HEALTH_PROXY=socks5://127.0.0.1:1080
|
||||
// - XUI_TUNNEL_HEALTH_INTERVAL=30s
|
||||
// - XUI_TUNNEL_HEALTH_TIMEOUT=10s
|
||||
// - XUI_TUNNEL_HEALTH_FAILURES=3
|
||||
// - XUI_TUNNEL_HEALTH_COOLDOWN=5m
|
||||
func ConfigFromEnv() Config {
|
||||
cfg := DefaultConfig()
|
||||
|
||||
cfg.Enabled = parseBool(os.Getenv("XUI_TUNNEL_HEALTH_MONITOR"))
|
||||
cfg.URL = firstNonEmpty(os.Getenv("XUI_TUNNEL_HEALTH_URL"), cfg.URL)
|
||||
cfg.ProxyURL = strings.TrimSpace(os.Getenv("XUI_TUNNEL_HEALTH_PROXY"))
|
||||
cfg.Interval = parseDurationEnv("XUI_TUNNEL_HEALTH_INTERVAL", cfg.Interval)
|
||||
cfg.Timeout = parseDurationEnv("XUI_TUNNEL_HEALTH_TIMEOUT", cfg.Timeout)
|
||||
cfg.Cooldown = parseDurationEnv("XUI_TUNNEL_HEALTH_COOLDOWN", cfg.Cooldown)
|
||||
cfg.FailureThreshold = parseIntEnv("XUI_TUNNEL_HEALTH_FAILURES", cfg.FailureThreshold)
|
||||
|
||||
return cfg.Normalize()
|
||||
}
|
||||
|
||||
// Normalize applies safe bounds and defaults.
|
||||
func (c Config) Normalize() Config {
|
||||
if strings.TrimSpace(c.URL) == "" {
|
||||
c.URL = defaultHealthURL
|
||||
}
|
||||
c.URL = strings.TrimSpace(c.URL)
|
||||
c.ProxyURL = strings.TrimSpace(c.ProxyURL)
|
||||
|
||||
if c.Interval < time.Second {
|
||||
c.Interval = defaultInterval
|
||||
}
|
||||
if c.Timeout < time.Second {
|
||||
c.Timeout = defaultTimeout
|
||||
}
|
||||
if c.FailureThreshold < 1 {
|
||||
c.FailureThreshold = defaultFailureThreshold
|
||||
}
|
||||
if c.Cooldown < time.Second {
|
||||
c.Cooldown = defaultCooldown
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// New creates a monitor with an HTTP client based on cfg.
|
||||
func New(cfg Config, recover RecoveryFunc) (*Monitor, error) {
|
||||
cfg = cfg.Normalize()
|
||||
|
||||
client, err := netproxy.NewHTTPClient(cfg.ProxyURL, cfg.Timeout)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newWithClient(cfg, client, recover), nil
|
||||
}
|
||||
|
||||
func newWithClient(cfg Config, client *http.Client, recover RecoveryFunc) *Monitor {
|
||||
cfg = cfg.Normalize()
|
||||
if client == nil {
|
||||
client = &http.Client{Timeout: cfg.Timeout}
|
||||
}
|
||||
|
||||
return &Monitor{
|
||||
cfg: cfg,
|
||||
client: client,
|
||||
recover: recover,
|
||||
now: time.Now,
|
||||
}
|
||||
}
|
||||
|
||||
// Run starts the monitor loop until ctx is cancelled.
|
||||
func (m *Monitor) Run(ctx context.Context) {
|
||||
if m == nil || !m.cfg.Enabled {
|
||||
return
|
||||
}
|
||||
|
||||
logger.Info("Tunnel health monitor enabled: ", m.cfg.URL)
|
||||
|
||||
ticker := time.NewTicker(m.cfg.Interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
logger.Info("Tunnel health monitor stopped")
|
||||
return
|
||||
case <-ticker.C:
|
||||
recovered, err := m.Step(ctx)
|
||||
if err != nil {
|
||||
logger.Warning("Tunnel health monitor check failed: ", err)
|
||||
}
|
||||
if recovered {
|
||||
logger.Warning("Tunnel health monitor triggered Xray restart")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step performs one probe and maybe triggers recovery.
|
||||
func (m *Monitor) Step(ctx context.Context) (bool, error) {
|
||||
if m == nil {
|
||||
return false, errors.New("nil monitor")
|
||||
}
|
||||
|
||||
if err := m.probe(ctx); err != nil {
|
||||
m.failures++
|
||||
|
||||
if m.failures < m.cfg.FailureThreshold {
|
||||
return false, fmt.Errorf("probe failed %d/%d: %w", m.failures, m.cfg.FailureThreshold, err)
|
||||
}
|
||||
|
||||
now := m.now()
|
||||
if !m.lastRecovery.IsZero() && now.Sub(m.lastRecovery) < m.cfg.Cooldown {
|
||||
m.failures = m.cfg.FailureThreshold
|
||||
return false, fmt.Errorf("probe failed %d/%d; recovery cooldown active: %w", m.failures, m.cfg.FailureThreshold, err)
|
||||
}
|
||||
|
||||
if m.recover == nil {
|
||||
m.failures = m.cfg.FailureThreshold
|
||||
return false, errors.New("recovery function is not configured")
|
||||
}
|
||||
|
||||
if recErr := m.recover(ctx); recErr != nil {
|
||||
return false, fmt.Errorf("recovery failed after probe error %v: %w", err, recErr)
|
||||
}
|
||||
|
||||
m.lastRecovery = now
|
||||
m.failures = 0
|
||||
return true, err
|
||||
}
|
||||
|
||||
if m.failures > 0 {
|
||||
logger.Info("Tunnel health monitor recovered after successful probe")
|
||||
}
|
||||
m.failures = 0
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (m *Monitor) probe(ctx context.Context) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, m.cfg.URL, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := m.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||
|
||||
if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest {
|
||||
return fmt.Errorf("unexpected HTTP status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseBool(value string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(value)) {
|
||||
case "1", "true", "yes", "y", "on", "enable", "enabled":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func parseDurationEnv(name string, fallback time.Duration) time.Duration {
|
||||
value := strings.TrimSpace(os.Getenv(name))
|
||||
if value == "" {
|
||||
return fallback
|
||||
}
|
||||
|
||||
d, err := time.ParseDuration(value)
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
|
||||
func parseIntEnv(name string, fallback int) int {
|
||||
value := strings.TrimSpace(os.Getenv(name))
|
||||
if value == "" {
|
||||
return fallback
|
||||
}
|
||||
|
||||
n, err := strconv.Atoi(value)
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func firstNonEmpty(value string, fallback string) string {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return fallback
|
||||
}
|
||||
return value
|
||||
}
|
||||
@@ -0,0 +1,454 @@
|
||||
package tunnelmonitor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/logger"
|
||||
"github.com/op/go-logging"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
logger.InitLogger(logging.ERROR)
|
||||
m.Run()
|
||||
}
|
||||
|
||||
type roundTripFunc func(*http.Request) (*http.Response, error)
|
||||
|
||||
func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
return f(req)
|
||||
}
|
||||
|
||||
func TestMonitorRestartsAfterFailureThreshold(t *testing.T) {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Interval: time.Minute,
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 2,
|
||||
Cooldown: time.Minute,
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
return nil, errors.New("tunnel down")
|
||||
}),
|
||||
}
|
||||
|
||||
restarts := 0
|
||||
monitor := newWithClient(cfg, client, func(ctx context.Context) error {
|
||||
restarts++
|
||||
return nil
|
||||
})
|
||||
|
||||
monitor.now = func() time.Time {
|
||||
return time.Unix(100, 0)
|
||||
}
|
||||
|
||||
if recovered, _ := monitor.Step(context.Background()); recovered {
|
||||
t.Fatal("first failure must not trigger recovery")
|
||||
}
|
||||
|
||||
if recovered, _ := monitor.Step(context.Background()); !recovered {
|
||||
t.Fatal("second consecutive failure should trigger recovery")
|
||||
}
|
||||
|
||||
if restarts != 1 {
|
||||
t.Fatalf("expected 1 recovery, got %d", restarts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorRespectsRecoveryCooldown(t *testing.T) {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Interval: time.Minute,
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 1,
|
||||
Cooldown: time.Minute,
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
return nil, errors.New("tunnel down")
|
||||
}),
|
||||
}
|
||||
|
||||
now := time.Unix(100, 0)
|
||||
restarts := 0
|
||||
|
||||
monitor := newWithClient(cfg, client, func(ctx context.Context) error {
|
||||
restarts++
|
||||
return nil
|
||||
})
|
||||
|
||||
monitor.now = func() time.Time {
|
||||
return now
|
||||
}
|
||||
|
||||
recovered, _ := monitor.Step(context.Background())
|
||||
if !recovered {
|
||||
t.Fatal("first failure should trigger recovery when threshold is 1")
|
||||
}
|
||||
|
||||
recovered, _ = monitor.Step(context.Background())
|
||||
if recovered {
|
||||
t.Fatal("cooldown should suppress immediate second recovery")
|
||||
}
|
||||
|
||||
if restarts != 1 {
|
||||
t.Fatalf("expected 1 recovery during cooldown, got %d", restarts)
|
||||
}
|
||||
|
||||
now = now.Add(time.Minute + time.Second)
|
||||
|
||||
recovered, _ = monitor.Step(context.Background())
|
||||
if !recovered {
|
||||
t.Fatal("recovery should be allowed after cooldown")
|
||||
}
|
||||
|
||||
if restarts != 2 {
|
||||
t.Fatalf("expected 2 recoveries after cooldown, got %d", restarts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorSuccessResetsFailures(t *testing.T) {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Interval: time.Minute,
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 2,
|
||||
Cooldown: time.Minute,
|
||||
}
|
||||
|
||||
fail := true
|
||||
client := &http.Client{
|
||||
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
if fail {
|
||||
return nil, errors.New("tunnel down")
|
||||
}
|
||||
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: http.NoBody,
|
||||
}, nil
|
||||
}),
|
||||
}
|
||||
|
||||
restarts := 0
|
||||
monitor := newWithClient(cfg, client, func(ctx context.Context) error {
|
||||
restarts++
|
||||
return nil
|
||||
})
|
||||
|
||||
_, _ = monitor.Step(context.Background())
|
||||
|
||||
fail = false
|
||||
if recovered, err := monitor.Step(context.Background()); recovered || err != nil {
|
||||
t.Fatalf("successful probe should not recover or fail, recovered=%v err=%v", recovered, err)
|
||||
}
|
||||
|
||||
fail = true
|
||||
if recovered, _ := monitor.Step(context.Background()); recovered {
|
||||
t.Fatal("failure after success should be counted as first failure again")
|
||||
}
|
||||
|
||||
if restarts != 0 {
|
||||
t.Fatalf("expected no recovery, got %d", restarts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigFromEnvParsesValues(t *testing.T) {
|
||||
t.Setenv("XUI_TUNNEL_HEALTH_MONITOR", "true")
|
||||
t.Setenv("XUI_TUNNEL_HEALTH_URL", "https://example.com/health")
|
||||
t.Setenv("XUI_TUNNEL_HEALTH_PROXY", "socks5://127.0.0.1:1080")
|
||||
t.Setenv("XUI_TUNNEL_HEALTH_INTERVAL", "15s")
|
||||
t.Setenv("XUI_TUNNEL_HEALTH_TIMEOUT", "3s")
|
||||
t.Setenv("XUI_TUNNEL_HEALTH_FAILURES", "4")
|
||||
t.Setenv("XUI_TUNNEL_HEALTH_COOLDOWN", "2m")
|
||||
|
||||
cfg := ConfigFromEnv()
|
||||
|
||||
if !cfg.Enabled {
|
||||
t.Fatal("expected monitor to be enabled")
|
||||
}
|
||||
|
||||
if cfg.URL != "https://example.com/health" {
|
||||
t.Fatalf("unexpected URL: %s", cfg.URL)
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(cfg.ProxyURL, "socks5://") {
|
||||
t.Fatalf("unexpected proxy URL: %s", cfg.ProxyURL)
|
||||
}
|
||||
|
||||
if cfg.Interval != 15*time.Second {
|
||||
t.Fatalf("unexpected interval: %s", cfg.Interval)
|
||||
}
|
||||
|
||||
if cfg.Timeout != 3*time.Second {
|
||||
t.Fatalf("unexpected timeout: %s", cfg.Timeout)
|
||||
}
|
||||
|
||||
if cfg.FailureThreshold != 4 {
|
||||
t.Fatalf("unexpected threshold: %d", cfg.FailureThreshold)
|
||||
}
|
||||
|
||||
if cfg.Cooldown != 2*time.Minute {
|
||||
t.Fatalf("unexpected cooldown: %s", cfg.Cooldown)
|
||||
}
|
||||
}
|
||||
|
||||
func failingClient() *http.Client {
|
||||
return &http.Client{
|
||||
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
return nil, errors.New("tunnel down")
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
func statusClient(code int) *http.Client {
|
||||
return &http.Client{
|
||||
Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
return &http.Response{StatusCode: code, Body: http.NoBody}, nil
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeStatusCodeClassification(t *testing.T) {
|
||||
cases := []struct {
|
||||
status int
|
||||
healthy bool
|
||||
}{
|
||||
{199, false},
|
||||
{200, true},
|
||||
{204, true},
|
||||
{301, true},
|
||||
{399, true},
|
||||
{400, false},
|
||||
{404, false},
|
||||
{500, false},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Interval: time.Minute,
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 100,
|
||||
Cooldown: time.Minute,
|
||||
}
|
||||
|
||||
monitor := newWithClient(cfg, statusClient(tc.status), func(ctx context.Context) error {
|
||||
return nil
|
||||
})
|
||||
|
||||
recovered, err := monitor.Step(context.Background())
|
||||
if recovered {
|
||||
t.Fatalf("status %d: unexpected recovery", tc.status)
|
||||
}
|
||||
if tc.healthy && err != nil {
|
||||
t.Fatalf("status %d: expected healthy probe, got error %v", tc.status, err)
|
||||
}
|
||||
if !tc.healthy && err == nil {
|
||||
t.Fatalf("status %d: expected failure, got nil error", tc.status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClampsBounds(t *testing.T) {
|
||||
got := Config{
|
||||
URL: " ",
|
||||
Interval: 0,
|
||||
Timeout: 500 * time.Millisecond,
|
||||
FailureThreshold: 0,
|
||||
Cooldown: 0,
|
||||
}.Normalize()
|
||||
|
||||
if got.URL != defaultHealthURL {
|
||||
t.Fatalf("URL not defaulted: %q", got.URL)
|
||||
}
|
||||
if got.Interval != defaultInterval {
|
||||
t.Fatalf("Interval not clamped: %s", got.Interval)
|
||||
}
|
||||
if got.Timeout != defaultTimeout {
|
||||
t.Fatalf("Timeout not clamped: %s", got.Timeout)
|
||||
}
|
||||
if got.FailureThreshold != defaultFailureThreshold {
|
||||
t.Fatalf("FailureThreshold not clamped: %d", got.FailureThreshold)
|
||||
}
|
||||
if got.Cooldown != defaultCooldown {
|
||||
t.Fatalf("Cooldown not clamped: %s", got.Cooldown)
|
||||
}
|
||||
|
||||
valid := Config{
|
||||
URL: "https://example.com/health",
|
||||
Interval: 15 * time.Second,
|
||||
Timeout: 3 * time.Second,
|
||||
FailureThreshold: 5,
|
||||
Cooldown: 2 * time.Minute,
|
||||
}.Normalize()
|
||||
|
||||
if valid.URL != "https://example.com/health" ||
|
||||
valid.Interval != 15*time.Second ||
|
||||
valid.Timeout != 3*time.Second ||
|
||||
valid.FailureThreshold != 5 ||
|
||||
valid.Cooldown != 2*time.Minute {
|
||||
t.Fatalf("valid config was mutated: %+v", valid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewRejectsUnsupportedProxyScheme(t *testing.T) {
|
||||
m, err := New(Config{ProxyURL: "ftp://127.0.0.1:21"}, func(ctx context.Context) error {
|
||||
return nil
|
||||
})
|
||||
if err == nil || m != nil {
|
||||
t.Fatalf("expected error and nil monitor for bad scheme, got m=%v err=%v", m, err)
|
||||
}
|
||||
|
||||
m, err = New(Config{}, func(ctx context.Context) error {
|
||||
return nil
|
||||
})
|
||||
if err != nil || m == nil {
|
||||
t.Fatalf("expected a valid monitor for empty proxy, got m=%v err=%v", m, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorRecoveryErrorDoesNotArmCooldown(t *testing.T) {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Interval: time.Minute,
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 1,
|
||||
Cooldown: time.Minute,
|
||||
}
|
||||
|
||||
attempts := 0
|
||||
monitor := newWithClient(cfg, failingClient(), func(ctx context.Context) error {
|
||||
attempts++
|
||||
return errors.New("restart failed")
|
||||
})
|
||||
monitor.now = func() time.Time {
|
||||
return time.Unix(100, 0)
|
||||
}
|
||||
|
||||
recovered, err := monitor.Step(context.Background())
|
||||
if recovered || err == nil {
|
||||
t.Fatalf("failed recovery must report recovered=false with an error, got recovered=%v err=%v", recovered, err)
|
||||
}
|
||||
if !monitor.lastRecovery.IsZero() {
|
||||
t.Fatal("a failed recovery must not arm the cooldown")
|
||||
}
|
||||
|
||||
if _, err := monitor.Step(context.Background()); err == nil {
|
||||
t.Fatal("expected error on the second failing step")
|
||||
}
|
||||
if attempts != 2 {
|
||||
t.Fatalf("recovery should be retried (no cooldown) after a failure, attempts=%d", attempts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorNilRecoverStaysBounded(t *testing.T) {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Interval: time.Minute,
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 2,
|
||||
Cooldown: time.Minute,
|
||||
}
|
||||
|
||||
monitor := newWithClient(cfg, failingClient(), nil)
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
recovered, _ := monitor.Step(context.Background())
|
||||
if recovered {
|
||||
t.Fatal("a nil recovery func must never report recovery")
|
||||
}
|
||||
if monitor.failures > cfg.FailureThreshold {
|
||||
t.Fatalf("failures must stay capped at threshold %d, got %d", cfg.FailureThreshold, monitor.failures)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorFailuresCappedDuringCooldown(t *testing.T) {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Interval: time.Minute,
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 2,
|
||||
Cooldown: time.Minute,
|
||||
}
|
||||
|
||||
restarts := 0
|
||||
monitor := newWithClient(cfg, failingClient(), func(ctx context.Context) error {
|
||||
restarts++
|
||||
return nil
|
||||
})
|
||||
monitor.now = func() time.Time {
|
||||
return time.Unix(100, 0)
|
||||
}
|
||||
|
||||
monitor.Step(context.Background())
|
||||
if recovered, _ := monitor.Step(context.Background()); !recovered {
|
||||
t.Fatal("expected recovery once the threshold is reached")
|
||||
}
|
||||
|
||||
for i := 0; i < 6; i++ {
|
||||
monitor.Step(context.Background())
|
||||
if monitor.failures > cfg.FailureThreshold {
|
||||
t.Fatalf("failures must never exceed threshold %d during cooldown, got %d", cfg.FailureThreshold, monitor.failures)
|
||||
}
|
||||
}
|
||||
|
||||
if restarts != 1 {
|
||||
t.Fatalf("cooldown should suppress further recoveries, restarts=%d", restarts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMonitorRunStopsOnContextCancel(t *testing.T) {
|
||||
cfg := Config{
|
||||
Enabled: true,
|
||||
URL: "http://example.test",
|
||||
Timeout: time.Second,
|
||||
FailureThreshold: 1,
|
||||
Cooldown: time.Hour,
|
||||
}
|
||||
|
||||
recovered := make(chan struct{})
|
||||
var once sync.Once
|
||||
monitor := newWithClient(cfg, failingClient(), func(ctx context.Context) error {
|
||||
once.Do(func() { close(recovered) })
|
||||
return nil
|
||||
})
|
||||
monitor.cfg.Interval = 5 * time.Millisecond
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
monitor.Run(ctx)
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-recovered:
|
||||
case <-time.After(2 * time.Second):
|
||||
cancel()
|
||||
t.Fatal("Run did not trigger recovery within the deadline")
|
||||
}
|
||||
|
||||
cancel()
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("Run did not return after context cancellation")
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
@@ -18,6 +19,7 @@ import (
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/database"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/logger"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/sub"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/tunnelmonitor"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/util/crypto"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/util/sys"
|
||||
"github.com/mhsanaei/3x-ui/v3/internal/web"
|
||||
@@ -91,7 +93,7 @@ func runWebServer() {
|
||||
return
|
||||
}
|
||||
|
||||
sigCh := make(chan os.Signal, 1)
|
||||
sigCh := make(chan os.Signal, 8)
|
||||
// Trap shutdown signals
|
||||
signal.Notify(sigCh, syscall.SIGHUP, syscall.SIGTERM, sys.SIGUSR1, os.Interrupt)
|
||||
global.SetRestartHook(func() {
|
||||
@@ -100,6 +102,27 @@ func runWebServer() {
|
||||
default:
|
||||
}
|
||||
})
|
||||
|
||||
var stopTunnelHealthMonitor context.CancelFunc
|
||||
monitorCfg := tunnelmonitor.ConfigFromEnv()
|
||||
if monitorCfg.Enabled {
|
||||
if monitorCfg.ProxyURL == "" {
|
||||
logger.Warning("Tunnel health monitor enabled without XUI_TUNNEL_HEALTH_PROXY: the probe measures host connectivity, not the xray tunnel, so failures will restart xray without fixing host network issues")
|
||||
}
|
||||
|
||||
monitorCtx, cancel := context.WithCancel(context.Background())
|
||||
stopTunnelHealthMonitor = cancel
|
||||
|
||||
monitor, err := tunnelmonitor.New(monitorCfg, func(_ context.Context) error {
|
||||
logger.Warning("Tunnel health monitor threshold reached, restarting xray-core")
|
||||
return server.RestartXray()
|
||||
})
|
||||
if err != nil {
|
||||
logger.Warning("Tunnel health monitor disabled: ", err)
|
||||
} else {
|
||||
go monitor.Run(monitorCtx)
|
||||
}
|
||||
}
|
||||
for {
|
||||
sig := <-sigCh
|
||||
|
||||
@@ -142,6 +165,10 @@ func runWebServer() {
|
||||
}
|
||||
|
||||
default:
|
||||
if stopTunnelHealthMonitor != nil {
|
||||
stopTunnelHealthMonitor()
|
||||
}
|
||||
|
||||
// --- FIX FOR TELEGRAM BOT CONFLICT (409) on full shutdown ---
|
||||
tgbot.StopBot()
|
||||
// ------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user