mirror of
https://github.com/coredns/coredns.git
synced 2026-03-11 16:15:05 -04:00
plugin/forward: add max_age option to enforce an absolute connection lifetime (#7903)
* plugin/pkg/proxy: add max_age for per-connection lifetime cap Introduce a max_age setting on Transport that closes connections based on creation time, independent of idle-timeout (expire). Background: PR #7790 changed the connection pool from LIFO to FIFO for source-port diversity. Under FIFO, every connection is cycled through the pool and its used timestamp is refreshed continuously. When request rate is high enough that pool_size / request_rate < expire, no connection ever becomes idle and expire never fires. This prevents CoreDNS from opening new connections to upstreams that scale out (e.g. new Kubernetes pods behind a ClusterIP service with conntrack pinning). max_age addresses this by enforcing an absolute upper bound on connection lifetime regardless of activity: - persistConn gains a created field set at dial time. - Transport gains maxAge (default 0 = unlimited, preserving existing behaviour). - Dial(): rejects cached connections whose creation age exceeds max_age. - cleanup(): when maxAge > 0, uses a linear scan over both idle-timeout and max-age predicates; when maxAge == 0, preserves the original binary-search path on used time (sorted by FIFO insertion order). - Both hot paths pre-compute the deadline outside any inner loop to avoid repeated time.Now() calls. Tests added: - TestMaxAgeExpireByCreation: connection with old created but fresh used must be rejected even though idle-timeout would pass. - TestMaxAgeFIFORotation: three FIFO-rotated connections (old created, fresh used) must all be rejected, confirming that continuous rotation cannot prevent max-age expiry. Signed-off-by: cangming <cangming@cangming.app> * plugin/forward: add max_age option Expose Transport.SetMaxAge through the forward plugin so operators can set an absolute upper bound on connection lifetime via the Corefile. Usage: forward . 1.2.3.4 { max_age 30s } Default is 0 (unlimited), which preserves existing behaviour. A positive value causes connections older than max_age to be closed and re-dialled on the next request, ensuring CoreDNS reconnects to newly scaled-out upstream pods even under sustained load where the idle timeout (expire) would never fire. If max_age is set, it must not be less than expire; the parser rejects this combination at startup with a clear error message. Signed-off-by: cangming <cangming@cangming.app> --------- Signed-off-by: cangming <cangming@cangming.app>
This commit is contained in:
@@ -9,17 +9,19 @@ import (
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
// a persistConn hold the dns.Conn and the last used time.
|
||||
// a persistConn holds the dns.Conn, its creation time, and the last used time.
|
||||
type persistConn struct {
|
||||
c *dns.Conn
|
||||
used time.Time
|
||||
c *dns.Conn
|
||||
created time.Time
|
||||
used time.Time
|
||||
}
|
||||
|
||||
// Transport hold the persistent cache.
|
||||
type Transport struct {
|
||||
avgDialTime int64 // kind of average time of dial time
|
||||
conns [typeTotalCount][]*persistConn // Buckets for udp, tcp and tcp-tls.
|
||||
expire time.Duration // After this duration a connection is expired.
|
||||
expire time.Duration // After this duration an idle connection is expired.
|
||||
maxAge time.Duration // After this duration a connection is closed regardless of activity; 0 means unlimited.
|
||||
maxIdleConns int // Max idle connections per transport type; 0 means unlimited.
|
||||
addr string
|
||||
tlsConfig *tls.Config
|
||||
@@ -68,7 +70,13 @@ func (t *Transport) cleanup(all bool) {
|
||||
var toClose []*persistConn
|
||||
|
||||
t.mu.Lock()
|
||||
staleTime := time.Now().Add(-t.expire)
|
||||
now := time.Now()
|
||||
staleTime := now.Add(-t.expire)
|
||||
// Pre-compute max-age deadline outside the loop to avoid repeated time.Now() calls.
|
||||
var maxAgeDeadline time.Time
|
||||
if t.maxAge > 0 {
|
||||
maxAgeDeadline = now.Add(-t.maxAge)
|
||||
}
|
||||
for transtype, stack := range t.conns {
|
||||
if len(stack) == 0 {
|
||||
continue
|
||||
@@ -78,10 +86,26 @@ func (t *Transport) cleanup(all bool) {
|
||||
toClose = append(toClose, stack...)
|
||||
continue
|
||||
}
|
||||
if stack[0].used.After(staleTime) {
|
||||
|
||||
// When max-age is set, use a linear scan to evaluate both the idle-timeout
|
||||
// (expire, based on last-used time) and the max-age (based on creation time).
|
||||
if t.maxAge > 0 {
|
||||
var alive []*persistConn
|
||||
for _, pc := range stack {
|
||||
if !pc.used.After(staleTime) || pc.created.Before(maxAgeDeadline) {
|
||||
toClose = append(toClose, pc)
|
||||
} else {
|
||||
alive = append(alive, pc)
|
||||
}
|
||||
}
|
||||
t.conns[transtype] = alive
|
||||
continue
|
||||
}
|
||||
|
||||
// Original expire-only path: connections are sorted by "used"; use binary search.
|
||||
if stack[0].used.After(staleTime) {
|
||||
continue
|
||||
}
|
||||
// connections in stack are sorted by "used"
|
||||
good := sort.Search(len(stack), func(i int) bool {
|
||||
return stack[i].used.After(staleTime)
|
||||
@@ -130,6 +154,10 @@ func (t *Transport) Stop() { close(t.stop) }
|
||||
// SetExpire sets the connection expire time in transport.
|
||||
func (t *Transport) SetExpire(expire time.Duration) { t.expire = expire }
|
||||
|
||||
// SetMaxAge sets the maximum lifetime of a connection regardless of activity.
|
||||
// A value of 0 (default) disables max-age and connections are only closed by expire (idle-timeout).
|
||||
func (t *Transport) SetMaxAge(maxAge time.Duration) { t.maxAge = maxAge }
|
||||
|
||||
// SetMaxIdleConns sets the maximum idle connections per transport type.
|
||||
// A value of 0 means unlimited (default).
|
||||
func (t *Transport) SetMaxIdleConns(n int) { t.maxIdleConns = n }
|
||||
|
||||
Reference in New Issue
Block a user