mirror of
https://github.com/coredns/coredns.git
synced 2026-03-11 08:13:12 -04:00
plugin/forward: add max_age option to enforce an absolute connection lifetime (#7903)
* plugin/pkg/proxy: add max_age for per-connection lifetime cap Introduce a max_age setting on Transport that closes connections based on creation time, independent of idle-timeout (expire). Background: PR #7790 changed the connection pool from LIFO to FIFO for source-port diversity. Under FIFO, every connection is cycled through the pool and its used timestamp is refreshed continuously. When request rate is high enough that pool_size / request_rate < expire, no connection ever becomes idle and expire never fires. This prevents CoreDNS from opening new connections to upstreams that scale out (e.g. new Kubernetes pods behind a ClusterIP service with conntrack pinning). max_age addresses this by enforcing an absolute upper bound on connection lifetime regardless of activity: - persistConn gains a created field set at dial time. - Transport gains maxAge (default 0 = unlimited, preserving existing behaviour). - Dial(): rejects cached connections whose creation age exceeds max_age. - cleanup(): when maxAge > 0, uses a linear scan over both idle-timeout and max-age predicates; when maxAge == 0, preserves the original binary-search path on used time (sorted by FIFO insertion order). - Both hot paths pre-compute the deadline outside any inner loop to avoid repeated time.Now() calls. Tests added: - TestMaxAgeExpireByCreation: connection with old created but fresh used must be rejected even though idle-timeout would pass. - TestMaxAgeFIFORotation: three FIFO-rotated connections (old created, fresh used) must all be rejected, confirming that continuous rotation cannot prevent max-age expiry. Signed-off-by: cangming <cangming@cangming.app> * plugin/forward: add max_age option Expose Transport.SetMaxAge through the forward plugin so operators can set an absolute upper bound on connection lifetime via the Corefile. Usage: forward . 1.2.3.4 { max_age 30s } Default is 0 (unlimited), which preserves existing behaviour. A positive value causes connections older than max_age to be closed and re-dialled on the next request, ensuring CoreDNS reconnects to newly scaled-out upstream pods even under sustained load where the idle timeout (expire) would never fire. If max_age is set, it must not be less than expire; the parser rejects this combination at startup with a clear error message. Signed-off-by: cangming <cangming@cangming.app> --------- Signed-off-by: cangming <cangming@cangming.app>
This commit is contained in:
@@ -65,6 +65,11 @@ func (t *Transport) Dial(proto string) (*persistConn, bool, error) {
|
||||
transtype := stringToTransportType(proto)
|
||||
|
||||
t.mu.Lock()
|
||||
// Pre-compute max-age deadline outside the loop to avoid repeated time.Now() calls.
|
||||
var maxAgeDeadline time.Time
|
||||
if t.maxAge > 0 {
|
||||
maxAgeDeadline = time.Now().Add(-t.maxAge)
|
||||
}
|
||||
// FIFO: take the oldest conn (front of slice) for source port diversity
|
||||
for len(t.conns[transtype]) > 0 {
|
||||
pc := t.conns[transtype][0]
|
||||
@@ -73,6 +78,10 @@ func (t *Transport) Dial(proto string) (*persistConn, bool, error) {
|
||||
pc.c.Close()
|
||||
continue
|
||||
}
|
||||
if !maxAgeDeadline.IsZero() && pc.created.Before(maxAgeDeadline) {
|
||||
pc.c.Close()
|
||||
continue
|
||||
}
|
||||
t.mu.Unlock()
|
||||
connCacheHitsCount.WithLabelValues(t.proxyName, t.addr, proto).Add(1)
|
||||
return pc, true, nil
|
||||
@@ -86,11 +95,11 @@ func (t *Transport) Dial(proto string) (*persistConn, bool, error) {
|
||||
if proto == "tcp-tls" {
|
||||
conn, err := dns.DialTimeoutWithTLS("tcp", t.addr, t.tlsConfig, timeout)
|
||||
t.updateDialTimeout(time.Since(reqTime))
|
||||
return &persistConn{c: conn}, false, err
|
||||
return &persistConn{c: conn, created: time.Now()}, false, err
|
||||
}
|
||||
conn, err := dns.DialTimeout(proto, t.addr, timeout)
|
||||
t.updateDialTimeout(time.Since(reqTime))
|
||||
return &persistConn{c: conn}, false, err
|
||||
return &persistConn{c: conn, created: time.Now()}, false, err
|
||||
}
|
||||
|
||||
// Connect selects an upstream, sends the request and waits for a response.
|
||||
|
||||
Reference in New Issue
Block a user