Files
coredns/plugin/pkg/proxy/persistent_test.go

256 lines
5.3 KiB
Go
Raw Normal View History

package proxy
import (
"runtime"
"testing"
"time"
"github.com/coredns/coredns/plugin/pkg/dnstest"
"github.com/miekg/dns"
)
func TestCached(t *testing.T) {
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
tr := newTransport("TestCached", s.Addr)
tr.Start()
defer tr.Stop()
c1, cache1, _ := tr.Dial("udp")
c2, cache2, _ := tr.Dial("udp")
if cache1 || cache2 {
t.Errorf("Expected non-cached connection")
}
tr.Yield(c1)
tr.Yield(c2)
c3, cached3, _ := tr.Dial("udp")
if !cached3 {
t.Error("Expected cached connection (c3)")
}
perf(proxy): use mutex-based connection pool (#7790) * perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-14 03:49:46 +02:00
// FIFO: first yielded (c1) should be first out
if c1 != c3 {
t.Error("Expected c1 == c3 (FIFO order)")
}
tr.Yield(c3)
// dial another protocol
c4, cached4, _ := tr.Dial("tcp")
if cached4 {
t.Errorf("Expected non-cached connection (c4)")
}
tr.Yield(c4)
}
func TestCleanupByTimer(t *testing.T) {
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
tr := newTransport("TestCleanupByTimer", s.Addr)
tr.SetExpire(100 * time.Millisecond)
tr.Start()
defer tr.Stop()
c1, _, _ := tr.Dial("udp")
c2, _, _ := tr.Dial("udp")
tr.Yield(c1)
time.Sleep(10 * time.Millisecond)
tr.Yield(c2)
time.Sleep(120 * time.Millisecond)
c3, cached, _ := tr.Dial("udp")
if cached {
t.Error("Expected non-cached connection (c3)")
}
tr.Yield(c3)
time.Sleep(120 * time.Millisecond)
c4, cached, _ := tr.Dial("udp")
if cached {
t.Error("Expected non-cached connection (c4)")
}
tr.Yield(c4)
}
func TestCleanupAll(t *testing.T) {
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
tr := newTransport("TestCleanupAll", s.Addr)
c1, _ := dns.DialTimeout("udp", tr.addr, maxDialTimeout)
c2, _ := dns.DialTimeout("udp", tr.addr, maxDialTimeout)
c3, _ := dns.DialTimeout("udp", tr.addr, maxDialTimeout)
tr.conns[typeUDP] = []*persistConn{{c1, time.Now()}, {c2, time.Now()}, {c3, time.Now()}}
if len(tr.conns[typeUDP]) != 3 {
t.Error("Expected 3 connections")
}
tr.cleanup(true)
if len(tr.conns[typeUDP]) > 0 {
t.Error("Expected no cached connections")
}
}
perf(proxy): use mutex-based connection pool (#7790) * perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-14 03:49:46 +02:00
func TestMaxIdleConns(t *testing.T) {
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
tr := newTransport("TestMaxIdleConns", s.Addr)
tr.SetMaxIdleConns(2) // Limit to 2 connections per type
tr.Start()
defer tr.Stop()
// Dial 3 connections
c1, _, _ := tr.Dial("udp")
c2, _, _ := tr.Dial("udp")
c3, _, _ := tr.Dial("udp")
// Yield all 3
tr.Yield(c1)
tr.Yield(c2)
tr.Yield(c3) // This should be discarded (pool full)
// Check pool size is capped at 2
tr.mu.Lock()
poolSize := len(tr.conns[typeUDP])
tr.mu.Unlock()
if poolSize != 2 {
t.Errorf("Expected pool size 2, got %d", poolSize)
}
// Verify we get the first 2 back (FIFO)
d1, cached1, _ := tr.Dial("udp")
d2, cached2, _ := tr.Dial("udp")
_, cached3, _ := tr.Dial("udp")
if !cached1 || !cached2 {
t.Error("Expected first 2 dials to be cached")
}
if cached3 {
t.Error("Expected 3rd dial to be non-cached (pool was limited to 2)")
}
if d1 != c1 || d2 != c2 {
t.Error("Expected FIFO order: d1==c1, d2==c2")
}
}
func TestMaxIdleConnsUnlimited(t *testing.T) {
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
tr := newTransport("TestMaxIdleConnsUnlimited", s.Addr)
// maxIdleConns defaults to 0 (unlimited)
tr.Start()
defer tr.Stop()
// Dial and yield 5 connections
conns := make([]*persistConn, 5)
for i := range conns {
conns[i], _, _ = tr.Dial("udp")
}
for _, c := range conns {
tr.Yield(c)
}
// Check all 5 are in pool
tr.mu.Lock()
poolSize := len(tr.conns[typeUDP])
tr.mu.Unlock()
if poolSize != 5 {
t.Errorf("Expected pool size 5 (unlimited), got %d", poolSize)
}
}
func TestYieldAfterStop(t *testing.T) {
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
tr := newTransport("TestYieldAfterStop", s.Addr)
tr.Start()
// Dial a connection while transport is running
c1, _, err := tr.Dial("udp")
if err != nil {
t.Fatalf("Failed to dial: %v", err)
}
// Stop the transport
tr.Stop()
// Give cleanup goroutine time to exit
time.Sleep(50 * time.Millisecond)
// Yield the connection after stop - should close it, not pool it
tr.Yield(c1)
// Verify pool is empty (connection was closed, not added)
tr.mu.Lock()
poolSize := len(tr.conns[typeUDP])
tr.mu.Unlock()
if poolSize != 0 {
t.Errorf("Expected pool size 0 after stop, got %d", poolSize)
}
}
func BenchmarkYield(b *testing.B) {
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
tr := newTransport("BenchmarkYield", s.Addr)
tr.Start()
defer tr.Stop()
c, _, _ := tr.Dial("udp")
b.ReportAllocs()
for b.Loop() {
tr.Yield(c)
perf(proxy): use mutex-based connection pool (#7790) * perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-14 03:49:46 +02:00
// Simulate FIFO consumption: remove from front
tr.mu.Lock()
if len(tr.conns[typeUDP]) > 0 {
tr.conns[typeUDP] = tr.conns[typeUDP][1:]
}
perf(proxy): use mutex-based connection pool (#7790) * perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-14 03:49:46 +02:00
tr.mu.Unlock()
runtime.Gosched()
}
}