mirror of
				https://github.com/coredns/coredns.git
				synced 2025-10-31 02:03:20 -04:00 
			
		
		
		
	fix: prevent SIGTERM/reload deadlock (#7562)
This commit is contained in:
		| @@ -50,6 +50,10 @@ type Server struct { | ||||
| 	classChaos   bool                 // allow non-INET class queries | ||||
|  | ||||
| 	tsigSecret map[string]string | ||||
|  | ||||
| 	// Ensure Stop is idempotent when invoked concurrently (e.g., during reload and SIGTERM). | ||||
| 	stopOnce   sync.Once | ||||
| 	wgDoneOnce sync.Once | ||||
| } | ||||
|  | ||||
| // MetadataCollector is a plugin that can retrieve metadata functions from all metadata providing plugins | ||||
| @@ -212,33 +216,37 @@ func (s *Server) ListenPacket() (net.PacketConn, error) { | ||||
| // immediately. | ||||
| // This implements Caddy.Stopper interface. | ||||
| func (s *Server) Stop() (err error) { | ||||
| 	if runtime.GOOS != "windows" { | ||||
| 		// force connections to close after timeout | ||||
| 		done := make(chan struct{}) | ||||
| 		go func() { | ||||
| 			s.dnsWg.Done() // decrement our initial increment used as a barrier | ||||
| 			s.dnsWg.Wait() | ||||
| 			close(done) | ||||
| 		}() | ||||
| 	var onceErr error | ||||
| 	s.stopOnce.Do(func() { | ||||
| 		if runtime.GOOS != "windows" { | ||||
| 			// force connections to close after timeout | ||||
| 			done := make(chan struct{}) | ||||
| 			go func() { | ||||
| 				// decrement our initial increment used as a barrier, but only once | ||||
| 				s.wgDoneOnce.Do(func() { s.dnsWg.Done() }) | ||||
| 				s.dnsWg.Wait() | ||||
| 				close(done) | ||||
| 			}() | ||||
|  | ||||
| 		// Wait for remaining connections to finish or | ||||
| 		// force them all to close after timeout | ||||
| 		select { | ||||
| 		case <-time.After(s.graceTimeout): | ||||
| 		case <-done: | ||||
| 			// Wait for remaining connections to finish or | ||||
| 			// force them all to close after timeout | ||||
| 			select { | ||||
| 			case <-time.After(s.graceTimeout): | ||||
| 			case <-done: | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Close the listener now; this stops the server without delay | ||||
| 	s.m.Lock() | ||||
| 	for _, s1 := range s.server { | ||||
| 		// We might not have started and initialized the full set of servers | ||||
| 		if s1 != nil { | ||||
| 			err = s1.Shutdown() | ||||
| 		// Close the listener now; this stops the server without delay | ||||
| 		s.m.Lock() | ||||
| 		for _, s1 := range s.server { | ||||
| 			// We might not have started and initialized the full set of servers | ||||
| 			if s1 != nil { | ||||
| 				onceErr = s1.Shutdown() | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	s.m.Unlock() | ||||
| 	return | ||||
| 		s.m.Unlock() | ||||
| 	}) | ||||
| 	return onceErr | ||||
| } | ||||
|  | ||||
| // Address together with Stop() implement caddy.GracefulServer. | ||||
|   | ||||
| @@ -2,6 +2,7 @@ package dnsserver | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"sync" | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/coredns/coredns/plugin" | ||||
| @@ -120,3 +121,22 @@ func BenchmarkCoreServeDNS(b *testing.B) { | ||||
| 		s.ServeDNS(ctx, w, m) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // Validates Stop is idempotent and safe under concurrent calls. | ||||
| func TestStopIsIdempotent(t *testing.T) { | ||||
| 	t.Parallel() | ||||
|  | ||||
| 	s := &Server{} | ||||
| 	s.dnsWg.Add(1) | ||||
|  | ||||
| 	const n = 10 | ||||
| 	var wg sync.WaitGroup | ||||
| 	wg.Add(n) | ||||
| 	for range n { | ||||
| 		go func() { | ||||
| 			defer wg.Done() | ||||
| 			_ = s.Stop() | ||||
| 		}() | ||||
| 	} | ||||
| 	wg.Wait() | ||||
| } | ||||
|   | ||||
| @@ -105,6 +105,10 @@ func hook(event caddy.EventName, info any) error { | ||||
| 					// now lets consider that plugin will not be reload, unless appear in next config file | ||||
| 					// change status of usage will be reset in setup if the plugin appears in config file | ||||
| 					r.setUsage(maybeUsed) | ||||
| 					// If shutdown is in progress, avoid attempting a restart. | ||||
| 					if shutdownRequested(r.quit) { | ||||
| 						return | ||||
| 					} | ||||
| 					_, err := instance.Restart(corefile) | ||||
| 					reloadInfo.WithLabelValues("sha512", hex.EncodeToString(sha512sum[:])).Set(1) | ||||
| 					if err != nil { | ||||
| @@ -126,3 +130,14 @@ func hook(event caddy.EventName, info any) error { | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // shutdownRequested reports whether a shutdown has been requested via quit channel. | ||||
| // helps with unit testing of the shutdown gate logic. | ||||
| func shutdownRequested(quit <-chan bool) bool { | ||||
| 	select { | ||||
| 	case <-quit: | ||||
| 		return true | ||||
| 	default: | ||||
| 		return false | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										50
									
								
								plugin/reload/reload_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								plugin/reload/reload_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| package reload | ||||
|  | ||||
| import ( | ||||
| 	"testing" | ||||
|  | ||||
| 	"github.com/coredns/caddy" | ||||
| ) | ||||
|  | ||||
| // fakeInput implements caddy.Input for testing parse(). | ||||
| type fakeInput struct { | ||||
| 	p string | ||||
| 	b []byte | ||||
| } | ||||
|  | ||||
| func (f fakeInput) ServerType() string { return "dns" } | ||||
| func (f fakeInput) Body() []byte       { return f.b } | ||||
| func (f fakeInput) Path() string       { return f.p } | ||||
|  | ||||
| // TestParseInvalidCorefile ensures parse returns an error for invalid Corefile syntax. | ||||
| func TestParseInvalidCorefile(t *testing.T) { | ||||
| 	t.Parallel() | ||||
|  | ||||
| 	broken := fakeInput{p: "Corefile", b: []byte(". { errors\n")} | ||||
| 	if _, err := parse(broken); err == nil { | ||||
| 		t.Fatalf("expected parse error for invalid Corefile, got nil") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TestShutdownGate ensures the shutdown gate helper recognizes when shutdown is requested. | ||||
| func TestShutdownGate(t *testing.T) { | ||||
| 	t.Parallel() | ||||
|  | ||||
| 	q := make(chan bool, 1) | ||||
| 	if shutdownRequested(q) { | ||||
| 		t.Fatalf("expected no shutdown before signal") | ||||
| 	} | ||||
| 	q <- true | ||||
| 	if !shutdownRequested(q) { | ||||
| 		t.Fatalf("expected shutdown after signal") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TestHookIgnoresNonStartupEvent ensures hook is a no-op for non-startup events. | ||||
| func TestHookIgnoresNonStartupEvent(t *testing.T) { | ||||
| 	t.Parallel() | ||||
|  | ||||
| 	if err := hook(caddy.EventName("not-startup"), nil); err != nil { | ||||
| 		t.Fatalf("expected no error for non-startup event, got %v", err) | ||||
| 	} | ||||
| } | ||||
| @@ -20,7 +20,7 @@ func init() { plugin.Register("reload", setup) } | ||||
| // channel for QUIT is never changed in purpose. | ||||
| // WARNING: this data may be unsync after an invalid attempt of reload Corefile. | ||||
| var ( | ||||
| 	r              = reload{dur: defaultInterval, u: unused, quit: make(chan bool)} | ||||
| 	r              = reload{dur: defaultInterval, u: unused, quit: make(chan bool, 1)} | ||||
| 	once, shutOnce sync.Once | ||||
| ) | ||||
|  | ||||
|   | ||||
| @@ -8,6 +8,7 @@ import ( | ||||
| 	"net/http" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/coredns/caddy" | ||||
| 	"github.com/coredns/coredns/core/dnsserver" | ||||
| @@ -380,6 +381,59 @@ func TestReloadUnreadyPlugin(t *testing.T) { | ||||
| 	c1.Stop() | ||||
| } | ||||
|  | ||||
| // TestReloadConcurrentRestartAndStop ensures there is no deadlock when a restart | ||||
| // races with a shutdown (issue #7314). | ||||
| func TestReloadConcurrentRestartAndStop(t *testing.T) { | ||||
| 	corefileA := `.:0 { | ||||
| 		reload 2s 1s | ||||
| 		whoami | ||||
| 	}` | ||||
| 	corefileB := `.:0 { | ||||
| 		reload 2s 1s | ||||
| 		whoami | ||||
| 		# change to trigger different config | ||||
| 	}` | ||||
|  | ||||
| 	c, err := CoreDNSServer(corefileA) | ||||
| 	if err != nil { | ||||
| 		if strings.Contains(err.Error(), inUse) { | ||||
| 			return | ||||
| 		} | ||||
| 		t.Fatalf("Could not start CoreDNS instance: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	restartErr := make(chan error, 1) | ||||
| 	stopDone := make(chan struct{}) | ||||
|  | ||||
| 	go func() { | ||||
| 		_, err := c.Restart(NewInput(corefileB)) | ||||
| 		restartErr <- err | ||||
| 	}() | ||||
|  | ||||
| 	// Small delay to increase overlap window | ||||
| 	time.Sleep(50 * time.Millisecond) | ||||
|  | ||||
| 	go func() { | ||||
| 		c.Stop() | ||||
| 		close(stopDone) | ||||
| 	}() | ||||
|  | ||||
| 	// Both operations should complete promptly; if not, we may be deadlocked. | ||||
| 	select { | ||||
| 	case <-stopDone: | ||||
| 		// ok | ||||
| 	case <-time.After(5 * time.Second): | ||||
| 		t.Fatalf("Stop did not complete in time (possible deadlock)") | ||||
| 	} | ||||
| 	select { | ||||
| 	case <-restartErr: | ||||
| 		// ok: restart either succeeded or returned an error | ||||
| 		// we only care about not hanging | ||||
| 	case <-time.After(5 * time.Second): | ||||
| 		t.Fatalf("Restart did not complete in time (possible deadlock)") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| type unready struct { | ||||
| 	next plugin.Handler | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user