mirror of
https://github.com/coredns/coredns.git
synced 2025-10-28 08:44:17 -04:00
fix: prevent SIGTERM/reload deadlock (#7562)
This commit is contained in:
@@ -50,6 +50,10 @@ type Server struct {
|
|||||||
classChaos bool // allow non-INET class queries
|
classChaos bool // allow non-INET class queries
|
||||||
|
|
||||||
tsigSecret map[string]string
|
tsigSecret map[string]string
|
||||||
|
|
||||||
|
// Ensure Stop is idempotent when invoked concurrently (e.g., during reload and SIGTERM).
|
||||||
|
stopOnce sync.Once
|
||||||
|
wgDoneOnce sync.Once
|
||||||
}
|
}
|
||||||
|
|
||||||
// MetadataCollector is a plugin that can retrieve metadata functions from all metadata providing plugins
|
// MetadataCollector is a plugin that can retrieve metadata functions from all metadata providing plugins
|
||||||
@@ -212,33 +216,37 @@ func (s *Server) ListenPacket() (net.PacketConn, error) {
|
|||||||
// immediately.
|
// immediately.
|
||||||
// This implements Caddy.Stopper interface.
|
// This implements Caddy.Stopper interface.
|
||||||
func (s *Server) Stop() (err error) {
|
func (s *Server) Stop() (err error) {
|
||||||
if runtime.GOOS != "windows" {
|
var onceErr error
|
||||||
// force connections to close after timeout
|
s.stopOnce.Do(func() {
|
||||||
done := make(chan struct{})
|
if runtime.GOOS != "windows" {
|
||||||
go func() {
|
// force connections to close after timeout
|
||||||
s.dnsWg.Done() // decrement our initial increment used as a barrier
|
done := make(chan struct{})
|
||||||
s.dnsWg.Wait()
|
go func() {
|
||||||
close(done)
|
// decrement our initial increment used as a barrier, but only once
|
||||||
}()
|
s.wgDoneOnce.Do(func() { s.dnsWg.Done() })
|
||||||
|
s.dnsWg.Wait()
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
// Wait for remaining connections to finish or
|
// Wait for remaining connections to finish or
|
||||||
// force them all to close after timeout
|
// force them all to close after timeout
|
||||||
select {
|
select {
|
||||||
case <-time.After(s.graceTimeout):
|
case <-time.After(s.graceTimeout):
|
||||||
case <-done:
|
case <-done:
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Close the listener now; this stops the server without delay
|
// Close the listener now; this stops the server without delay
|
||||||
s.m.Lock()
|
s.m.Lock()
|
||||||
for _, s1 := range s.server {
|
for _, s1 := range s.server {
|
||||||
// We might not have started and initialized the full set of servers
|
// We might not have started and initialized the full set of servers
|
||||||
if s1 != nil {
|
if s1 != nil {
|
||||||
err = s1.Shutdown()
|
onceErr = s1.Shutdown()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
s.m.Unlock()
|
||||||
s.m.Unlock()
|
})
|
||||||
return
|
return onceErr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Address together with Stop() implement caddy.GracefulServer.
|
// Address together with Stop() implement caddy.GracefulServer.
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package dnsserver
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/coredns/coredns/plugin"
|
"github.com/coredns/coredns/plugin"
|
||||||
@@ -120,3 +121,22 @@ func BenchmarkCoreServeDNS(b *testing.B) {
|
|||||||
s.ServeDNS(ctx, w, m)
|
s.ServeDNS(ctx, w, m)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Validates Stop is idempotent and safe under concurrent calls.
|
||||||
|
func TestStopIsIdempotent(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
s := &Server{}
|
||||||
|
s.dnsWg.Add(1)
|
||||||
|
|
||||||
|
const n = 10
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(n)
|
||||||
|
for range n {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
_ = s.Stop()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
|||||||
@@ -105,6 +105,10 @@ func hook(event caddy.EventName, info any) error {
|
|||||||
// now lets consider that plugin will not be reload, unless appear in next config file
|
// now lets consider that plugin will not be reload, unless appear in next config file
|
||||||
// change status of usage will be reset in setup if the plugin appears in config file
|
// change status of usage will be reset in setup if the plugin appears in config file
|
||||||
r.setUsage(maybeUsed)
|
r.setUsage(maybeUsed)
|
||||||
|
// If shutdown is in progress, avoid attempting a restart.
|
||||||
|
if shutdownRequested(r.quit) {
|
||||||
|
return
|
||||||
|
}
|
||||||
_, err := instance.Restart(corefile)
|
_, err := instance.Restart(corefile)
|
||||||
reloadInfo.WithLabelValues("sha512", hex.EncodeToString(sha512sum[:])).Set(1)
|
reloadInfo.WithLabelValues("sha512", hex.EncodeToString(sha512sum[:])).Set(1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -126,3 +130,14 @@ func hook(event caddy.EventName, info any) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// shutdownRequested reports whether a shutdown has been requested via quit channel.
|
||||||
|
// helps with unit testing of the shutdown gate logic.
|
||||||
|
func shutdownRequested(quit <-chan bool) bool {
|
||||||
|
select {
|
||||||
|
case <-quit:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
50
plugin/reload/reload_test.go
Normal file
50
plugin/reload/reload_test.go
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
package reload
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/coredns/caddy"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeInput implements caddy.Input for testing parse().
|
||||||
|
type fakeInput struct {
|
||||||
|
p string
|
||||||
|
b []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeInput) ServerType() string { return "dns" }
|
||||||
|
func (f fakeInput) Body() []byte { return f.b }
|
||||||
|
func (f fakeInput) Path() string { return f.p }
|
||||||
|
|
||||||
|
// TestParseInvalidCorefile ensures parse returns an error for invalid Corefile syntax.
|
||||||
|
func TestParseInvalidCorefile(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
broken := fakeInput{p: "Corefile", b: []byte(". { errors\n")}
|
||||||
|
if _, err := parse(broken); err == nil {
|
||||||
|
t.Fatalf("expected parse error for invalid Corefile, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestShutdownGate ensures the shutdown gate helper recognizes when shutdown is requested.
|
||||||
|
func TestShutdownGate(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
q := make(chan bool, 1)
|
||||||
|
if shutdownRequested(q) {
|
||||||
|
t.Fatalf("expected no shutdown before signal")
|
||||||
|
}
|
||||||
|
q <- true
|
||||||
|
if !shutdownRequested(q) {
|
||||||
|
t.Fatalf("expected shutdown after signal")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHookIgnoresNonStartupEvent ensures hook is a no-op for non-startup events.
|
||||||
|
func TestHookIgnoresNonStartupEvent(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
if err := hook(caddy.EventName("not-startup"), nil); err != nil {
|
||||||
|
t.Fatalf("expected no error for non-startup event, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -20,7 +20,7 @@ func init() { plugin.Register("reload", setup) }
|
|||||||
// channel for QUIT is never changed in purpose.
|
// channel for QUIT is never changed in purpose.
|
||||||
// WARNING: this data may be unsync after an invalid attempt of reload Corefile.
|
// WARNING: this data may be unsync after an invalid attempt of reload Corefile.
|
||||||
var (
|
var (
|
||||||
r = reload{dur: defaultInterval, u: unused, quit: make(chan bool)}
|
r = reload{dur: defaultInterval, u: unused, quit: make(chan bool, 1)}
|
||||||
once, shutOnce sync.Once
|
once, shutOnce sync.Once
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/coredns/caddy"
|
"github.com/coredns/caddy"
|
||||||
"github.com/coredns/coredns/core/dnsserver"
|
"github.com/coredns/coredns/core/dnsserver"
|
||||||
@@ -380,6 +381,59 @@ func TestReloadUnreadyPlugin(t *testing.T) {
|
|||||||
c1.Stop()
|
c1.Stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestReloadConcurrentRestartAndStop ensures there is no deadlock when a restart
|
||||||
|
// races with a shutdown (issue #7314).
|
||||||
|
func TestReloadConcurrentRestartAndStop(t *testing.T) {
|
||||||
|
corefileA := `.:0 {
|
||||||
|
reload 2s 1s
|
||||||
|
whoami
|
||||||
|
}`
|
||||||
|
corefileB := `.:0 {
|
||||||
|
reload 2s 1s
|
||||||
|
whoami
|
||||||
|
# change to trigger different config
|
||||||
|
}`
|
||||||
|
|
||||||
|
c, err := CoreDNSServer(corefileA)
|
||||||
|
if err != nil {
|
||||||
|
if strings.Contains(err.Error(), inUse) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.Fatalf("Could not start CoreDNS instance: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
restartErr := make(chan error, 1)
|
||||||
|
stopDone := make(chan struct{})
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
_, err := c.Restart(NewInput(corefileB))
|
||||||
|
restartErr <- err
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Small delay to increase overlap window
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
c.Stop()
|
||||||
|
close(stopDone)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Both operations should complete promptly; if not, we may be deadlocked.
|
||||||
|
select {
|
||||||
|
case <-stopDone:
|
||||||
|
// ok
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
t.Fatalf("Stop did not complete in time (possible deadlock)")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-restartErr:
|
||||||
|
// ok: restart either succeeded or returned an error
|
||||||
|
// we only care about not hanging
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
t.Fatalf("Restart did not complete in time (possible deadlock)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type unready struct {
|
type unready struct {
|
||||||
next plugin.Handler
|
next plugin.Handler
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user