test(multisocket): deflake restart by using a fresh port and coordinated cleanup (#7438)

Avoid transient EADDRINUSE and a previous negative WaitGroup panic:
- restart onto a different free port
- no Stop() around Restart()
- channel-coordinated Stop of the new instance

Fixes #7311

Signed-off-by: Syed Azeez <syedazeez337@gmail.com>
This commit is contained in:
Syed Azeez
2025-08-11 23:37:09 +05:30
committed by GitHub
parent fff39399e0
commit bc0467d1c0

View File

@@ -4,12 +4,21 @@ import (
"fmt" "fmt"
"net" "net"
"testing" "testing"
"time"
"github.com/miekg/dns" "github.com/miekg/dns"
) )
// These tests need a fixed port, because :0 selects a random port for each socket, but we need all sockets to be on // pickPort returns a free TCP port on 127.0.0.1 and closes the probe listener.
// the same port. func pickPort(t *testing.T) int {
t.Helper()
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("probe listen failed: %v", err)
}
defer ln.Close()
return ln.Addr().(*net.TCPAddr).Port
}
func TestMultisocket(t *testing.T) { func TestMultisocket(t *testing.T) {
tests := []struct { tests := []struct {
@@ -55,10 +64,11 @@ func TestMultisocket(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
s, err := CoreDNSServer(test.corefile) s, err := CoreDNSServer(test.corefile)
defer s.Stop()
if err != nil { if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err) t.Fatalf("Could not get CoreDNS serving instance: %s", err)
} }
defer s.Stop()
// check number of servers // check number of servers
if len(s.Servers()) != test.expectedServers { if len(s.Servers()) != test.expectedServers {
t.Fatalf("Expected %d servers, got %d", test.expectedServers, len(s.Servers())) t.Fatalf("Expected %d servers, got %d", test.expectedServers, len(s.Servers()))
@@ -82,50 +92,68 @@ func TestMultisocket(t *testing.T) {
} }
} }
// NOTE: restart uses a different port to avoid transient EADDRINUSE / shutdown races
// when TCP/UDP from the previous instance havent fully torn down yet.
func TestMultisocket_Restart(t *testing.T) { func TestMultisocket_Restart(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
numSocketsBefore int numSocketsBefore int
numSocketsAfter int numSocketsAfter int
}{ }{
{ {name: "increase", numSocketsBefore: 1, numSocketsAfter: 2},
name: "increase", {name: "decrease", numSocketsBefore: 2, numSocketsAfter: 1},
numSocketsBefore: 1, {name: "no changes", numSocketsBefore: 2, numSocketsAfter: 2},
numSocketsAfter: 2,
},
{
name: "decrease",
numSocketsBefore: 2,
numSocketsAfter: 1,
},
{
name: "no changes",
numSocketsBefore: 2,
numSocketsAfter: 2,
},
} }
for _, test := range tests { for _, tc := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
corefile := `.:5058 { port1 := pickPort(t)
port2 := pickPort(t) // restart onto a different free port
coreTmpl := `.:%d {
multisocket %d multisocket %d
}` }`
srv, err := CoreDNSServer(fmt.Sprintf(corefile, test.numSocketsBefore))
srv, err := CoreDNSServer(fmt.Sprintf(coreTmpl, port1, tc.numSocketsBefore))
if err != nil { if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err) t.Fatalf("Could not get CoreDNS serving instance: %v", err)
} }
if test.numSocketsBefore != len(srv.Servers()) { if got := len(srv.Servers()); got != tc.numSocketsBefore {
t.Fatalf("Expected %d servers, got %d", test.numSocketsBefore, len(srv.Servers())) t.Fatalf("Expected %d servers, got %d", tc.numSocketsBefore, got)
} }
newSrv, err := srv.Restart(NewInput(fmt.Sprintf(corefile, test.numSocketsAfter))) resultCh := make(chan int, 1)
if err != nil { errCh := make(chan error, 1)
t.Fatalf("Could not get CoreDNS serving instance: %s", err) stopCh := make(chan struct{})
// Do the restart in a goroutine; return only the server count.
go func() {
newSrv, rerr := srv.Restart(NewInput(fmt.Sprintf(coreTmpl, port2, tc.numSocketsAfter)))
if rerr != nil {
errCh <- rerr
return
}
resultCh <- len(newSrv.Servers())
<-stopCh
newSrv.Stop()
}()
select {
case got := <-resultCh:
if got != tc.numSocketsAfter {
close(stopCh) // still stop the new instance
t.Fatalf("Expected %d servers, got %d", tc.numSocketsAfter, got)
}
close(stopCh) // now safe to stop the new instance
case rerr := <-errCh:
// Restart failed; stop the original instance.
srv.Stop()
t.Fatalf("Restart failed: %v", rerr)
case <-time.After(30 * time.Second):
// Timeout; stop the original instance.
srv.Stop()
t.Fatalf("Restart timed out after 30s (ports :%d→:%d, %d→%d sockets)",
port1, port2, tc.numSocketsBefore, tc.numSocketsAfter)
} }
if test.numSocketsAfter != len(newSrv.Servers()) {
t.Fatalf("Expected %d servers, got %d", test.numSocketsAfter, len(newSrv.Servers()))
}
newSrv.Stop()
}) })
} }
} }