diff --git a/internal/util/common/err.go b/internal/util/common/err.go index 54d66e8a8..bd4237a37 100644 --- a/internal/util/common/err.go +++ b/internal/util/common/err.go @@ -4,6 +4,7 @@ package common import ( "errors" "fmt" + "runtime/debug" "github.com/mhsanaei/3x-ui/v3/internal/logger" ) @@ -30,3 +31,17 @@ func Recover(msg string) any { } return panicErr } + +// GoRecover runs fn in a new goroutine guarded by a recover, so a panic in a +// background goroutine is logged (with name and a stack trace) instead of taking +// the whole process down. name identifies the goroutine in the log. +func GoRecover(name string, fn func()) { + go func() { + defer func() { + if r := recover(); r != nil { + logger.Error("panic in goroutine", name, ":", r, "\n"+string(debug.Stack())) + } + }() + fn() + }() +} diff --git a/internal/util/common/gorecover_test.go b/internal/util/common/gorecover_test.go new file mode 100644 index 000000000..d9d0f3b04 --- /dev/null +++ b/internal/util/common/gorecover_test.go @@ -0,0 +1,41 @@ +package common + +import ( + "os" + "testing" + "time" + + "github.com/mhsanaei/3x-ui/v3/internal/logger" + "github.com/op/go-logging" +) + +func TestMain(m *testing.M) { + logger.InitLogger(logging.ERROR) + os.Exit(m.Run()) +} + +func TestGoRecover_RunsFn(t *testing.T) { + done := make(chan struct{}) + GoRecover("test-run", func() { close(done) }) + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("fn did not run") + } +} + +func TestGoRecover_RecoversPanic(t *testing.T) { + done := make(chan struct{}) + // If GoRecover did not recover, this panic would crash the test binary. + GoRecover("test-panic", func() { + defer close(done) + panic("boom") + }) + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("goroutine did not complete") + } + // Let the deferred recover+log run before the test ends. + time.Sleep(50 * time.Millisecond) +} diff --git a/internal/web/job/node_heartbeat_job.go b/internal/web/job/node_heartbeat_job.go index 16111bf1e..c8c14b419 100644 --- a/internal/web/job/node_heartbeat_job.go +++ b/internal/web/job/node_heartbeat_job.go @@ -9,6 +9,7 @@ import ( "github.com/mhsanaei/3x-ui/v3/internal/database/model" "github.com/mhsanaei/3x-ui/v3/internal/eventbus" "github.com/mhsanaei/3x-ui/v3/internal/logger" + "github.com/mhsanaei/3x-ui/v3/internal/util/common" "github.com/mhsanaei/3x-ui/v3/internal/web/service" "github.com/mhsanaei/3x-ui/v3/internal/web/websocket" ) @@ -50,11 +51,12 @@ func (j *NodeHeartbeatJob) Run() { } wg.Add(1) sem <- struct{}{} - go func(n *model.Node) { + n := n + common.GoRecover("node-heartbeat:"+n.Name, func() { defer wg.Done() defer func() { <-sem }() j.probeOne(n) - }(n) + }) } wg.Wait() diff --git a/internal/web/job/node_traffic_sync_job.go b/internal/web/job/node_traffic_sync_job.go index d7f3e8f35..eddc0b20b 100644 --- a/internal/web/job/node_traffic_sync_job.go +++ b/internal/web/job/node_traffic_sync_job.go @@ -8,10 +8,10 @@ import ( "github.com/mhsanaei/3x-ui/v3/internal/database/model" "github.com/mhsanaei/3x-ui/v3/internal/logger" + "github.com/mhsanaei/3x-ui/v3/internal/util/common" "github.com/mhsanaei/3x-ui/v3/internal/web/runtime" "github.com/mhsanaei/3x-ui/v3/internal/web/service" "github.com/mhsanaei/3x-ui/v3/internal/web/websocket" - "github.com/mhsanaei/3x-ui/v3/internal/xray" ) const ( @@ -96,11 +96,12 @@ func (j *NodeTrafficSyncJob) Run() { } wg.Add(1) sem <- struct{}{} - go func(n *model.Node) { + n := n + common.GoRecover("node-traffic-sync:"+n.Name, func() { defer wg.Done() defer func() { <-sem }() j.syncOne(mgr, n, doIpSync) - }(n) + }) } wg.Wait() @@ -211,7 +212,8 @@ func (j *NodeTrafficSyncJob) maybePushGlobals(mgr *runtime.Manager, nodes []*mod } wg.Add(1) sem <- struct{}{} - go func(n *model.Node, remote *runtime.Remote, traffics []*xray.ClientTraffic) { + n, remote, traffics := n, remote, traffics + common.GoRecover("node-global-push:"+n.Name, func() { defer wg.Done() defer func() { <-sem }() ctx, cancel := context.WithTimeout(context.Background(), nodeTrafficSyncRequestTimeout) @@ -225,7 +227,7 @@ func (j *NodeTrafficSyncJob) maybePushGlobals(mgr *runtime.Manager, nodes []*mod logger.Warning("node traffic sync: push globals to", n.Name, "failed:", err) } } - }(n, remote, traffics) + }) } wg.Wait() }