mirror of
https://github.com/coredns/coredns.git
synced 2025-11-12 15:02:17 -05:00
fix(dnstap): Better error handling (redial & logging) when Dnstap is busy (#7619)
* Fix dnstap redial & improve logging Signed-off-by: xyang378 <xyang378@bloomberg.net> * fix CR comments Signed-off-by: xyang378 <xyang378@bloomberg.net> * redial at interval Signed-off-by: xyang378 <xyang378@bloomberg.net> * CR comments & lint Signed-off-by: xyang378 <xyang378@bloomberg.net> CR comment * fix lint Signed-off-by: xyang378 <xyang378@bloomberg.net> --------- Signed-off-by: xyang378 <xyang378@bloomberg.net>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package dnstap
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
"testing"
|
||||
@@ -10,6 +11,7 @@ import (
|
||||
|
||||
tap "github.com/dnstap/golang-dnstap"
|
||||
fs "github.com/farsightsec/golang-framestream"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -17,6 +19,16 @@ var (
|
||||
tmsg = tap.Dnstap{Type: &msgType}
|
||||
)
|
||||
|
||||
type MockLogger struct {
|
||||
WarnCount int
|
||||
WarnLog string
|
||||
}
|
||||
|
||||
func (l *MockLogger) Warningf(format string, v ...any) {
|
||||
l.WarnCount++
|
||||
l.WarnLog += fmt.Sprintf(format, v...)
|
||||
}
|
||||
|
||||
func accept(t *testing.T, l net.Listener, count int) {
|
||||
t.Helper()
|
||||
server, err := l.Accept()
|
||||
@@ -64,6 +76,7 @@ func TestTransport(t *testing.T) {
|
||||
dio := newIO(param[0], l.Addr().String(), 1, 1)
|
||||
dio.tcpTimeout = 10 * time.Millisecond
|
||||
dio.flushTimeout = 30 * time.Millisecond
|
||||
dio.errorCheckInterval = 50 * time.Millisecond
|
||||
dio.connect()
|
||||
|
||||
dio.Dnstap(&tmsg)
|
||||
@@ -93,6 +106,7 @@ func TestRace(t *testing.T) {
|
||||
dio := newIO("tcp", l.Addr().String(), 1, 1)
|
||||
dio.tcpTimeout = 10 * time.Millisecond
|
||||
dio.flushTimeout = 30 * time.Millisecond
|
||||
dio.errorCheckInterval = 50 * time.Millisecond
|
||||
dio.connect()
|
||||
defer dio.close()
|
||||
|
||||
@@ -108,12 +122,131 @@ func TestRace(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestReconnect(t *testing.T) {
|
||||
count := 5
|
||||
t.Run("ConnectedOnStart", func(t *testing.T) {
|
||||
// GIVEN
|
||||
// TCP connection available before DnsTap start up
|
||||
// DnsTap successfully established output connection on start up
|
||||
l, err := reuseport.Listen("tcp", ":0")
|
||||
if err != nil {
|
||||
t.Fatalf("Cannot start listener: %s", err)
|
||||
}
|
||||
|
||||
l, err := reuseport.Listen("tcp", ":0")
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
accept(t, l, 1)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
addr := l.Addr().String()
|
||||
logger := MockLogger{}
|
||||
dio := newIO("tcp", addr, 1, 1)
|
||||
dio.tcpTimeout = 10 * time.Millisecond
|
||||
dio.flushTimeout = 30 * time.Millisecond
|
||||
dio.errorCheckInterval = 50 * time.Millisecond
|
||||
dio.logger = &logger
|
||||
dio.connect()
|
||||
defer dio.close()
|
||||
|
||||
// WHEN
|
||||
// TCP connection closed when DnsTap is still running
|
||||
// TCP listener starts again on the same port
|
||||
// DnsTap send multiple messages
|
||||
dio.Dnstap(&tmsg)
|
||||
wg.Wait()
|
||||
|
||||
// Close listener
|
||||
l.Close()
|
||||
// And start TCP listener again on the same port
|
||||
l, err = reuseport.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
t.Fatalf("Cannot start listener: %s", err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
accept(t, l, 1)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
messageCount := 5
|
||||
for range messageCount {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
dio.Dnstap(&tmsg)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// THEN
|
||||
// DnsTap is able to reconnect
|
||||
// Messages can be sent eventually
|
||||
require.NotNil(t, dio.enc)
|
||||
require.Equal(t, 0, len(dio.queue))
|
||||
require.Less(t, logger.WarnCount, messageCount)
|
||||
})
|
||||
|
||||
t.Run("NotConnectedOnStart", func(t *testing.T) {
|
||||
// GIVEN
|
||||
// No TCP connection established at DnsTap start up
|
||||
l, err := reuseport.Listen("tcp", ":0")
|
||||
if err != nil {
|
||||
t.Fatalf("Cannot start listener: %s", err)
|
||||
}
|
||||
l.Close()
|
||||
|
||||
logger := MockLogger{}
|
||||
addr := l.Addr().String()
|
||||
dio := newIO("tcp", addr, 1, 1)
|
||||
dio.tcpTimeout = 10 * time.Millisecond
|
||||
dio.flushTimeout = 30 * time.Millisecond
|
||||
dio.errorCheckInterval = 50 * time.Millisecond
|
||||
dio.logger = &logger
|
||||
dio.connect()
|
||||
defer dio.close()
|
||||
|
||||
// WHEN
|
||||
// DnsTap is already running
|
||||
// TCP listener starts on DnsTap's configured port
|
||||
// DnsTap send multiple messages
|
||||
dio.Dnstap(&tmsg)
|
||||
|
||||
l, err = reuseport.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
t.Fatalf("Cannot start listener: %s", err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
messageCount := 5
|
||||
go func() {
|
||||
accept(t, l, messageCount)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
for range messageCount {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
dio.Dnstap(&tmsg)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// THEN
|
||||
// DnsTap is able to reconnect
|
||||
// Messages can be sent eventually
|
||||
require.NotNil(t, dio.enc)
|
||||
require.Equal(t, 0, len(dio.queue))
|
||||
require.Less(t, logger.WarnCount, messageCount)
|
||||
})
|
||||
}
|
||||
|
||||
func TestFullQueueWriteFail(t *testing.T) {
|
||||
// GIVEN
|
||||
// DnsTap I/O with a small queue
|
||||
l, err := reuseport.Listen("unix", "dn2stap.sock")
|
||||
if err != nil {
|
||||
t.Fatalf("Cannot start listener: %s", err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
@@ -122,35 +255,25 @@ func TestReconnect(t *testing.T) {
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
addr := l.Addr().String()
|
||||
dio := newIO("tcp", addr, 1, 1)
|
||||
dio.tcpTimeout = 10 * time.Millisecond
|
||||
dio.flushTimeout = 30 * time.Millisecond
|
||||
logger := MockLogger{}
|
||||
dio := newIO("unix", l.Addr().String(), 1, 1)
|
||||
dio.flushTimeout = 500 * time.Millisecond
|
||||
dio.errorCheckInterval = 50 * time.Millisecond
|
||||
dio.logger = &logger
|
||||
dio.queue = make(chan *tap.Dnstap, 1)
|
||||
dio.connect()
|
||||
defer dio.close()
|
||||
|
||||
dio.Dnstap(&tmsg)
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Close listener
|
||||
l.Close()
|
||||
// And start TCP listener again on the same port
|
||||
l, err = reuseport.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
t.Fatalf("Cannot start listener: %s", err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
accept(t, l, 1)
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
// WHEN
|
||||
// messages overwhelms the queue
|
||||
count := 100
|
||||
for range count {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
dio.Dnstap(&tmsg)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// THEN
|
||||
// Dropped messages are logged
|
||||
require.NotEqual(t, 0, logger.WarnCount)
|
||||
require.Contains(t, logger.WarnLog, "Dropped dnstap messages")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user