mirror of
https://github.com/coredns/coredns.git
synced 2025-10-26 15:54:16 -04:00
fix: prevent SIGTERM/reload deadlock (#7562)
This commit is contained in:
@@ -50,6 +50,10 @@ type Server struct {
|
||||
classChaos bool // allow non-INET class queries
|
||||
|
||||
tsigSecret map[string]string
|
||||
|
||||
// Ensure Stop is idempotent when invoked concurrently (e.g., during reload and SIGTERM).
|
||||
stopOnce sync.Once
|
||||
wgDoneOnce sync.Once
|
||||
}
|
||||
|
||||
// MetadataCollector is a plugin that can retrieve metadata functions from all metadata providing plugins
|
||||
@@ -212,33 +216,37 @@ func (s *Server) ListenPacket() (net.PacketConn, error) {
|
||||
// immediately.
|
||||
// This implements Caddy.Stopper interface.
|
||||
func (s *Server) Stop() (err error) {
|
||||
if runtime.GOOS != "windows" {
|
||||
// force connections to close after timeout
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
s.dnsWg.Done() // decrement our initial increment used as a barrier
|
||||
s.dnsWg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
var onceErr error
|
||||
s.stopOnce.Do(func() {
|
||||
if runtime.GOOS != "windows" {
|
||||
// force connections to close after timeout
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
// decrement our initial increment used as a barrier, but only once
|
||||
s.wgDoneOnce.Do(func() { s.dnsWg.Done() })
|
||||
s.dnsWg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
// Wait for remaining connections to finish or
|
||||
// force them all to close after timeout
|
||||
select {
|
||||
case <-time.After(s.graceTimeout):
|
||||
case <-done:
|
||||
// Wait for remaining connections to finish or
|
||||
// force them all to close after timeout
|
||||
select {
|
||||
case <-time.After(s.graceTimeout):
|
||||
case <-done:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close the listener now; this stops the server without delay
|
||||
s.m.Lock()
|
||||
for _, s1 := range s.server {
|
||||
// We might not have started and initialized the full set of servers
|
||||
if s1 != nil {
|
||||
err = s1.Shutdown()
|
||||
// Close the listener now; this stops the server without delay
|
||||
s.m.Lock()
|
||||
for _, s1 := range s.server {
|
||||
// We might not have started and initialized the full set of servers
|
||||
if s1 != nil {
|
||||
onceErr = s1.Shutdown()
|
||||
}
|
||||
}
|
||||
}
|
||||
s.m.Unlock()
|
||||
return
|
||||
s.m.Unlock()
|
||||
})
|
||||
return onceErr
|
||||
}
|
||||
|
||||
// Address together with Stop() implement caddy.GracefulServer.
|
||||
|
||||
@@ -2,6 +2,7 @@ package dnsserver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/coredns/coredns/plugin"
|
||||
@@ -120,3 +121,22 @@ func BenchmarkCoreServeDNS(b *testing.B) {
|
||||
s.ServeDNS(ctx, w, m)
|
||||
}
|
||||
}
|
||||
|
||||
// Validates Stop is idempotent and safe under concurrent calls.
|
||||
func TestStopIsIdempotent(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
s := &Server{}
|
||||
s.dnsWg.Add(1)
|
||||
|
||||
const n = 10
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(n)
|
||||
for range n {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
_ = s.Stop()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -105,6 +105,10 @@ func hook(event caddy.EventName, info any) error {
|
||||
// now lets consider that plugin will not be reload, unless appear in next config file
|
||||
// change status of usage will be reset in setup if the plugin appears in config file
|
||||
r.setUsage(maybeUsed)
|
||||
// If shutdown is in progress, avoid attempting a restart.
|
||||
if shutdownRequested(r.quit) {
|
||||
return
|
||||
}
|
||||
_, err := instance.Restart(corefile)
|
||||
reloadInfo.WithLabelValues("sha512", hex.EncodeToString(sha512sum[:])).Set(1)
|
||||
if err != nil {
|
||||
@@ -126,3 +130,14 @@ func hook(event caddy.EventName, info any) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// shutdownRequested reports whether a shutdown has been requested via quit channel.
|
||||
// helps with unit testing of the shutdown gate logic.
|
||||
func shutdownRequested(quit <-chan bool) bool {
|
||||
select {
|
||||
case <-quit:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
50
plugin/reload/reload_test.go
Normal file
50
plugin/reload/reload_test.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package reload
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/coredns/caddy"
|
||||
)
|
||||
|
||||
// fakeInput implements caddy.Input for testing parse().
|
||||
type fakeInput struct {
|
||||
p string
|
||||
b []byte
|
||||
}
|
||||
|
||||
func (f fakeInput) ServerType() string { return "dns" }
|
||||
func (f fakeInput) Body() []byte { return f.b }
|
||||
func (f fakeInput) Path() string { return f.p }
|
||||
|
||||
// TestParseInvalidCorefile ensures parse returns an error for invalid Corefile syntax.
|
||||
func TestParseInvalidCorefile(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
broken := fakeInput{p: "Corefile", b: []byte(". { errors\n")}
|
||||
if _, err := parse(broken); err == nil {
|
||||
t.Fatalf("expected parse error for invalid Corefile, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestShutdownGate ensures the shutdown gate helper recognizes when shutdown is requested.
|
||||
func TestShutdownGate(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
q := make(chan bool, 1)
|
||||
if shutdownRequested(q) {
|
||||
t.Fatalf("expected no shutdown before signal")
|
||||
}
|
||||
q <- true
|
||||
if !shutdownRequested(q) {
|
||||
t.Fatalf("expected shutdown after signal")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHookIgnoresNonStartupEvent ensures hook is a no-op for non-startup events.
|
||||
func TestHookIgnoresNonStartupEvent(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if err := hook(caddy.EventName("not-startup"), nil); err != nil {
|
||||
t.Fatalf("expected no error for non-startup event, got %v", err)
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,7 @@ func init() { plugin.Register("reload", setup) }
|
||||
// channel for QUIT is never changed in purpose.
|
||||
// WARNING: this data may be unsync after an invalid attempt of reload Corefile.
|
||||
var (
|
||||
r = reload{dur: defaultInterval, u: unused, quit: make(chan bool)}
|
||||
r = reload{dur: defaultInterval, u: unused, quit: make(chan bool, 1)}
|
||||
once, shutOnce sync.Once
|
||||
)
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/coredns/caddy"
|
||||
"github.com/coredns/coredns/core/dnsserver"
|
||||
@@ -380,6 +381,59 @@ func TestReloadUnreadyPlugin(t *testing.T) {
|
||||
c1.Stop()
|
||||
}
|
||||
|
||||
// TestReloadConcurrentRestartAndStop ensures there is no deadlock when a restart
|
||||
// races with a shutdown (issue #7314).
|
||||
func TestReloadConcurrentRestartAndStop(t *testing.T) {
|
||||
corefileA := `.:0 {
|
||||
reload 2s 1s
|
||||
whoami
|
||||
}`
|
||||
corefileB := `.:0 {
|
||||
reload 2s 1s
|
||||
whoami
|
||||
# change to trigger different config
|
||||
}`
|
||||
|
||||
c, err := CoreDNSServer(corefileA)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), inUse) {
|
||||
return
|
||||
}
|
||||
t.Fatalf("Could not start CoreDNS instance: %v", err)
|
||||
}
|
||||
|
||||
restartErr := make(chan error, 1)
|
||||
stopDone := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
_, err := c.Restart(NewInput(corefileB))
|
||||
restartErr <- err
|
||||
}()
|
||||
|
||||
// Small delay to increase overlap window
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
go func() {
|
||||
c.Stop()
|
||||
close(stopDone)
|
||||
}()
|
||||
|
||||
// Both operations should complete promptly; if not, we may be deadlocked.
|
||||
select {
|
||||
case <-stopDone:
|
||||
// ok
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("Stop did not complete in time (possible deadlock)")
|
||||
}
|
||||
select {
|
||||
case <-restartErr:
|
||||
// ok: restart either succeeded or returned an error
|
||||
// we only care about not hanging
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("Restart did not complete in time (possible deadlock)")
|
||||
}
|
||||
}
|
||||
|
||||
type unready struct {
|
||||
next plugin.Handler
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user