Files
coredns/test/proxy_test.go

154 lines
3.9 KiB
Go
Raw Normal View History

package test
import (
perf(proxy): use mutex-based connection pool (#7790) * perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-14 03:49:46 +02:00
"context"
"fmt"
"net"
"testing"
perf(proxy): use mutex-based connection pool (#7790) * perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-14 03:49:46 +02:00
"github.com/coredns/coredns/plugin/forward"
"github.com/coredns/coredns/plugin/pkg/dnstest"
"github.com/coredns/coredns/plugin/pkg/proxy"
"github.com/coredns/coredns/plugin/test"
"github.com/miekg/dns"
)
func TestLookupProxy(t *testing.T) {
t.Parallel()
name, rm, err := test.TempFile(".", exampleOrg)
if err != nil {
t.Fatalf("Failed to create zone: %s", err)
}
defer rm()
corefile := `example.org:0 {
file ` + name + `
}`
i, udp, _, err := CoreDNSServerAndPorts(corefile)
if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
}
defer i.Stop()
Default to upstream to self (#2436) * Default to upstream to self This is a backwards incompatible change. This is a massive (cleanup) PR where we default to resolving external names by the coredns process itself, instead of directly forwarding them to some upstream. This ignores any arguments `upstream` may have had and makes it depend on proxy/forward configuration in the Corefile. This allows resolved upstream names to be cached and we have better healthchecking of the upstreams. It also means there is only one way to resolve names, by either using the proxy or forward plugin. The proxy/forward lookup.go functions have been removed. This also lessen the dependency on proxy, meaning deprecating proxy will become easier. Some tests have been removed as well, or moved to the top-level test directory as they now require a full coredns process instead of just the plugin. For the etcd plugin, the entire StubZone resolving is *dropped*! This was a hacky (but working) solution to say the least. If someone cares deeply it can be brought back (maybe)? The pkg/upstream is now very small and almost does nothing. Also the New() function was changed to return a pointer to upstream.Upstream. It also returns only one parameter, so any stragglers using it will encounter a compile error. All documentation has been adapted. This affected the following plugins: * etcd * file * auto * secondary * federation * template * route53 A followup PR will make any upstream directives with arguments an error, right now they are ignored. Signed-off-by: Miek Gieben <miek@miek.nl> * Fix etcd build - probably still fails unit test Signed-off-by: Miek Gieben <miek@miek.nl> * Slightly smarter lookup check in upstream Signed-off-by: Miek Gieben <miek@miek.nl> * Compilez Signed-off-by: Miek Gieben <miek@miek.nl>
2019-01-13 16:54:49 +00:00
m := new(dns.Msg)
m.SetQuestion("example.org.", dns.TypeA)
resp, err := dns.Exchange(m, udp)
if err != nil {
t.Fatal("Expected to receive reply, but didn't")
}
// expect answer section with A record in it
if len(resp.Answer) == 0 {
t.Fatalf("Expected to at least one RR in the answer section, got none: %s", resp)
}
if resp.Answer[0].Header().Rrtype != dns.TypeA {
t.Errorf("Expected RR to A, got: %d", resp.Answer[0].Header().Rrtype)
}
if resp.Answer[0].(*dns.A).A.String() != "127.0.0.1" {
t.Errorf("Expected 127.0.0.1, got: %s", resp.Answer[0].(*dns.A).A.String())
}
}
func BenchmarkProxyLookup(b *testing.B) {
t := new(testing.T)
2016-10-08 15:07:07 +01:00
name, rm, err := test.TempFile(".", exampleOrg)
if err != nil {
t.Fatalf("Failed to created zone: %s", err)
}
defer rm()
corefile := `example.org:0 {
file ` + name + `
}`
i, err := CoreDNSServer(corefile)
if err != nil {
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
}
udp, _ := CoreDNSServerPorts(i, 0)
if udp == "" {
t.Fatalf("Could not get udp listening port")
}
defer i.Stop()
Default to upstream to self (#2436) * Default to upstream to self This is a backwards incompatible change. This is a massive (cleanup) PR where we default to resolving external names by the coredns process itself, instead of directly forwarding them to some upstream. This ignores any arguments `upstream` may have had and makes it depend on proxy/forward configuration in the Corefile. This allows resolved upstream names to be cached and we have better healthchecking of the upstreams. It also means there is only one way to resolve names, by either using the proxy or forward plugin. The proxy/forward lookup.go functions have been removed. This also lessen the dependency on proxy, meaning deprecating proxy will become easier. Some tests have been removed as well, or moved to the top-level test directory as they now require a full coredns process instead of just the plugin. For the etcd plugin, the entire StubZone resolving is *dropped*! This was a hacky (but working) solution to say the least. If someone cares deeply it can be brought back (maybe)? The pkg/upstream is now very small and almost does nothing. Also the New() function was changed to return a pointer to upstream.Upstream. It also returns only one parameter, so any stragglers using it will encounter a compile error. All documentation has been adapted. This affected the following plugins: * etcd * file * auto * secondary * federation * template * route53 A followup PR will make any upstream directives with arguments an error, right now they are ignored. Signed-off-by: Miek Gieben <miek@miek.nl> * Fix etcd build - probably still fails unit test Signed-off-by: Miek Gieben <miek@miek.nl> * Slightly smarter lookup check in upstream Signed-off-by: Miek Gieben <miek@miek.nl> * Compilez Signed-off-by: Miek Gieben <miek@miek.nl>
2019-01-13 16:54:49 +00:00
m := new(dns.Msg)
m.SetQuestion("example.org.", dns.TypeA)
for b.Loop() {
Default to upstream to self (#2436) * Default to upstream to self This is a backwards incompatible change. This is a massive (cleanup) PR where we default to resolving external names by the coredns process itself, instead of directly forwarding them to some upstream. This ignores any arguments `upstream` may have had and makes it depend on proxy/forward configuration in the Corefile. This allows resolved upstream names to be cached and we have better healthchecking of the upstreams. It also means there is only one way to resolve names, by either using the proxy or forward plugin. The proxy/forward lookup.go functions have been removed. This also lessen the dependency on proxy, meaning deprecating proxy will become easier. Some tests have been removed as well, or moved to the top-level test directory as they now require a full coredns process instead of just the plugin. For the etcd plugin, the entire StubZone resolving is *dropped*! This was a hacky (but working) solution to say the least. If someone cares deeply it can be brought back (maybe)? The pkg/upstream is now very small and almost does nothing. Also the New() function was changed to return a pointer to upstream.Upstream. It also returns only one parameter, so any stragglers using it will encounter a compile error. All documentation has been adapted. This affected the following plugins: * etcd * file * auto * secondary * federation * template * route53 A followup PR will make any upstream directives with arguments an error, right now they are ignored. Signed-off-by: Miek Gieben <miek@miek.nl> * Fix etcd build - probably still fails unit test Signed-off-by: Miek Gieben <miek@miek.nl> * Slightly smarter lookup check in upstream Signed-off-by: Miek Gieben <miek@miek.nl> * Compilez Signed-off-by: Miek Gieben <miek@miek.nl>
2019-01-13 16:54:49 +00:00
if _, err := dns.Exchange(m, udp); err != nil {
b.Fatal("Expected to receive reply, but didn't")
}
}
}
perf(proxy): use mutex-based connection pool (#7790) * perf(proxy): use mutex-based connection pool The proxy package (used for example by the forward plugin) utilized an actor model where a single connManager goroutine managed connection pooling via unbuffered channels (dial, yield, ret). This design serialized all connection acquisition and release operations through a single goroutine, creating a bottleneck under high concurrency. This was observable as a performance degradation when using a single upstream backend compared to multiple backends (which sharded the bottleneck). Changes: - Removed dial, yield, and ret channels from the Transport struct. - Removed the connManager goroutine's request processing loop. - Implemented Dial() and Yield() using a sync.Mutex to protect the connection slice, allowing for fast concurrent access without context switching. - Downgraded connManager to a simple background cleanup loop that only handles connection expiration on a ticker. - Updated plugin/pkg/proxy/connect.go to use direct method calls instead of channel sends. - Updated tests to reflect the removal of internal channels. Benchmarks show that this change eliminates the single-backend bottleneck. Now a single upstream backend performs on par with multiple backends, and overall throughput is improved. The implementation aligns with standard Go patterns for connection pooling (e.g., net/http.Transport). Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address PR review for persistent.go - Named mutex field instead of embedding, to not expose Lock() and Unlock() - Move stop check outside of lock in Yield() - Close() without a separate goroutine - Change stop channel to struct Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: address code review feedback for conn pool - Switch from LIFO to FIFO connection selection for source port diversity, reducing DNS cache poisoning risk (RFC 5452). - Remove "clear entire cache" optimization as it was LIFO-specific. FIFO naturally iterates and skips expired connections. - Remove all goroutines for closing connections; collect connections while holding lock, close synchronously after releasing lock. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * fix: remove unused error consts No longer utilised after refactoring the channel based approach. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * feat(forward): add max_idle_conns option Add configurable connection pool limit for the forward plugin via the max_idle_conns Corefile option. Changes: - Add SetMaxIdleConns to proxy - Add maxIdleConns field to Forward struct - Add max_idle_conns parsing in forward plugin setup - Apply setting to each proxy during configuration - Update forward plugin README with new option By default the value is 0 (unbounded). When set, excess connections returned to the pool are closed immediately rather than cached. Also add a yield related test. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore(proxy): simple Dial by closing conns inline Remove toClose slice collection to reduce complexity. Instead close expired connections directly while iterating. Reduces complexity with negligible lock-time impact. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> * chore: fewer explicit Unlock calls Cleaner and less chance of forgetting to unlock on new possible code paths. Signed-off-by: Ville Vesilehto <ville@vesilehto.fi> --------- Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-14 03:49:46 +02:00
// BenchmarkProxyWithMultipleBackends verifies the serialization issue by running concurrent load
// against 1, 2, and 3 backend proxies using the forward plugin.
func BenchmarkProxyWithMultipleBackends(b *testing.B) {
// Start a dummy upstream server
s := dnstest.NewServer(func(w dns.ResponseWriter, r *dns.Msg) {
ret := new(dns.Msg)
ret.SetReply(r)
w.WriteMsg(ret)
})
defer s.Close()
counts := []int{1, 2, 3}
for _, n := range counts {
b.Run(fmt.Sprintf("%d-Backends", n), func(b *testing.B) {
f := forward.New()
f.SetProxyOptions(proxy.Options{PreferUDP: true})
proxies := make([]*proxy.Proxy, n)
for i := range n {
p := proxy.NewProxy(fmt.Sprintf("proxy-%d", i), s.Addr, "dns")
f.SetProxy(p)
proxies[i] = p
}
defer func() {
for _, p := range proxies {
p.Stop()
}
}()
// Pre-warm connections
ctx := context.Background()
m := new(dns.Msg)
m.SetQuestion("example.org.", dns.TypeA)
noop := &benchmarkResponseWriter{}
for range n * 10 {
f.ServeDNS(ctx, noop, m)
}
b.ResetTimer()
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
m := new(dns.Msg)
m.SetQuestion("example.org.", dns.TypeA)
ctx := context.Background()
w := &benchmarkResponseWriter{}
for pb.Next() {
// forward plugin handles selection via its policy (default random)
f.ServeDNS(ctx, w, m)
}
})
})
}
}
type benchmarkResponseWriter struct{}
func (b *benchmarkResponseWriter) LocalAddr() net.Addr { return nil }
func (b *benchmarkResponseWriter) RemoteAddr() net.Addr { return nil }
func (b *benchmarkResponseWriter) WriteMsg(m *dns.Msg) error { return nil }
func (b *benchmarkResponseWriter) Write(p []byte) (int, error) { return len(p), nil }
func (b *benchmarkResponseWriter) Close() error { return nil }
func (b *benchmarkResponseWriter) TsigStatus() error { return nil }
func (b *benchmarkResponseWriter) TsigTimersOnly(bool) {}
func (b *benchmarkResponseWriter) Hijack() {}