Files
coredns/plugin/pkg/proxy/connect.go
Ville Vesilehto f3983c1111 perf(proxy): use mutex-based connection pool (#7790)
* perf(proxy): use mutex-based connection pool

The proxy package (used for example by the forward plugin) utilized
an actor model where a single connManager goroutine managed
connection pooling via unbuffered channels (dial, yield, ret). This
design serialized all connection acquisition and release operations
through a single goroutine, creating a bottleneck under high
concurrency. This was observable as a performance degradation when
using a single upstream backend compared to multiple backends
(which sharded the bottleneck).

Changes:
- Removed dial, yield, and ret channels from the Transport struct.
- Removed the connManager goroutine's request processing loop.
- Implemented Dial() and Yield() using a sync.Mutex to protect the
  connection slice, allowing for fast concurrent access without
  context switching.
- Downgraded connManager to a simple background cleanup loop that
  only handles connection expiration on a ticker.
- Updated plugin/pkg/proxy/connect.go to use direct method calls
  instead of channel sends.
- Updated tests to reflect the removal of internal channels.

Benchmarks show that this change eliminates the single-backend
bottleneck. Now a single upstream backend performs on par with
multiple backends, and overall throughput is improved.

The implementation aligns with standard Go patterns for connection
pooling (e.g., net/http.Transport).

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>

* fix: address PR review for persistent.go

- Named mutex field instead of embedding, to not expose
  Lock() and Unlock()
- Move stop check outside of lock in Yield()
- Close() without a separate goroutine
- Change stop channel to struct

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>

* fix: address code review feedback for conn pool

- Switch from LIFO to FIFO connection selection for source port
  diversity, reducing DNS cache poisoning risk (RFC 5452).
- Remove "clear entire cache" optimization as it was LIFO-specific.
  FIFO naturally iterates and skips expired connections.
- Remove all goroutines for closing connections; collect connections
  while holding lock, close synchronously after releasing lock.

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>

* fix: remove unused error consts

No longer utilised after refactoring the channel based approach.

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>

* feat(forward): add max_idle_conns option

Add configurable connection pool limit for the forward plugin via
the max_idle_conns Corefile option.

Changes:
- Add SetMaxIdleConns to proxy
- Add maxIdleConns field to Forward struct
- Add max_idle_conns parsing in forward plugin setup
- Apply setting to each proxy during configuration
- Update forward plugin README with new option

By default the value is 0 (unbounded). When set, excess
connections returned to the pool are closed immediately
rather than cached.

Also add a yield related test.

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>

* chore(proxy): simple Dial by closing conns inline

Remove toClose slice collection to reduce complexity. Instead close
expired connections directly while iterating. Reduces complexity with
negligible lock-time impact.

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>

* chore: fewer explicit Unlock calls

Cleaner and less chance of forgetting to unlock on new possible
code paths.

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>

---------

Signed-off-by: Ville Vesilehto <ville@vesilehto.fi>
2026-01-13 17:49:46 -08:00

206 lines
5.7 KiB
Go

// Package proxy implements a forwarding proxy with connection caching.
// It manages a pool of upstream connections (UDP and TCP) to reuse them for subsequent requests,
// reducing latency and handshake overhead. It supports in-band health checking.
package proxy
import (
"context"
"errors"
"io"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/coredns/coredns/request"
"github.com/miekg/dns"
)
const (
ErrTransportStopped = "proxy: transport stopped"
)
// limitTimeout is a utility function to auto-tune timeout values
// average observed time is moved towards the last observed delay moderated by a weight
// next timeout to use will be the double of the computed average, limited by min and max frame.
func limitTimeout(currentAvg *int64, minValue time.Duration, maxValue time.Duration) time.Duration {
rt := time.Duration(atomic.LoadInt64(currentAvg))
if rt < minValue {
return minValue
}
if rt < maxValue/2 {
return 2 * rt
}
return maxValue
}
func averageTimeout(currentAvg *int64, observedDuration time.Duration, weight int64) {
dt := time.Duration(atomic.LoadInt64(currentAvg))
atomic.AddInt64(currentAvg, int64(observedDuration-dt)/weight)
}
func (t *Transport) dialTimeout() time.Duration {
return limitTimeout(&t.avgDialTime, minDialTimeout, maxDialTimeout)
}
func (t *Transport) updateDialTimeout(newDialTime time.Duration) {
averageTimeout(&t.avgDialTime, newDialTime, cumulativeAvgWeight)
}
// Dial dials the address configured in transport, potentially reusing a connection or creating a new one.
func (t *Transport) Dial(proto string) (*persistConn, bool, error) {
// If tls has been configured; use it.
if t.tlsConfig != nil {
proto = "tcp-tls"
}
// Check if transport is stopped before attempting to dial
select {
case <-t.stop:
return nil, false, errors.New(ErrTransportStopped)
default:
}
transtype := stringToTransportType(proto)
t.mu.Lock()
// FIFO: take the oldest conn (front of slice) for source port diversity
for len(t.conns[transtype]) > 0 {
pc := t.conns[transtype][0]
t.conns[transtype] = t.conns[transtype][1:]
if time.Since(pc.used) > t.expire {
pc.c.Close()
continue
}
t.mu.Unlock()
connCacheHitsCount.WithLabelValues(t.proxyName, t.addr, proto).Add(1)
return pc, true, nil
}
t.mu.Unlock()
connCacheMissesCount.WithLabelValues(t.proxyName, t.addr, proto).Add(1)
reqTime := time.Now()
timeout := t.dialTimeout()
if proto == "tcp-tls" {
conn, err := dns.DialTimeoutWithTLS("tcp", t.addr, t.tlsConfig, timeout)
t.updateDialTimeout(time.Since(reqTime))
return &persistConn{c: conn}, false, err
}
conn, err := dns.DialTimeout(proto, t.addr, timeout)
t.updateDialTimeout(time.Since(reqTime))
return &persistConn{c: conn}, false, err
}
// Connect selects an upstream, sends the request and waits for a response.
func (p *Proxy) Connect(ctx context.Context, state request.Request, opts Options) (*dns.Msg, error) {
start := time.Now()
var proto string
switch {
case opts.ForceTCP: // TCP flag has precedence over UDP flag
proto = "tcp"
case opts.PreferUDP:
proto = "udp"
default:
proto = state.Proto()
}
pc, cached, err := p.transport.Dial(proto)
if err != nil {
return nil, err
}
// Set buffer size correctly for this client.
pc.c.UDPSize = max(uint16(state.Size()), 512) // #nosec G115 -- UDP size fits in uint16
pc.c.SetWriteDeadline(time.Now().Add(maxTimeout))
// records the origin Id before upstream.
originId := state.Req.Id
state.Req.Id = dns.Id()
defer func() {
state.Req.Id = originId
}()
if err := pc.c.WriteMsg(state.Req); err != nil {
pc.c.Close() // not giving it back
if err == io.EOF && cached {
return nil, ErrCachedClosed
}
return nil, err
}
var ret *dns.Msg
pc.c.SetReadDeadline(time.Now().Add(p.readTimeout))
for {
ret, err = pc.c.ReadMsg()
if err != nil {
if ret != nil && (state.Req.Id == ret.Id) && p.transport.transportTypeFromConn(pc) == typeUDP && shouldTruncateResponse(err) {
// For UDP, if the error is an overflow, we probably have an upstream misbehaving in some way.
// (e.g. sending >512 byte responses without an eDNS0 OPT RR).
// Instead of returning an error, return an empty response with TC bit set. This will make the
// client retry over TCP (if that's supported) or at least receive a clean
// error. The connection is still good so we break before the close.
// Truncate the response.
ret = truncateResponse(ret)
break
}
pc.c.Close() // not giving it back
if err == io.EOF && cached {
return nil, ErrCachedClosed
}
// recovery the origin Id after upstream.
if ret != nil {
ret.Id = originId
}
return ret, err
}
// drop out-of-order responses
if state.Req.Id == ret.Id {
break
}
}
// recovery the origin Id after upstream.
ret.Id = originId
p.transport.Yield(pc)
rc, ok := dns.RcodeToString[ret.Rcode]
if !ok {
rc = strconv.Itoa(ret.Rcode)
}
requestDuration.WithLabelValues(p.proxyName, p.addr, rc).Observe(time.Since(start).Seconds())
return ret, nil
}
const cumulativeAvgWeight = 4
// Function to determine if a response should be truncated.
func shouldTruncateResponse(err error) bool {
// This is to handle a scenario in which upstream sets the TC bit, but doesn't truncate the response
// and we get ErrBuf instead of overflow.
if _, isDNSErr := err.(*dns.Error); isDNSErr && errors.Is(err, dns.ErrBuf) {
return true
} else if strings.Contains(err.Error(), "overflow") {
return true
}
return false
}
// Function to return an empty response with TC (truncated) bit set.
func truncateResponse(response *dns.Msg) *dns.Msg {
// Clear out Answer, Extra, and Ns sections
response.Answer = nil
response.Extra = nil
response.Ns = nil
// Set TC bit to indicate truncation.
response.Truncated = true
return response
}