mirror of
https://github.com/coredns/coredns.git
synced 2026-03-11 00:03:10 -04:00
* plugin/pkg/proxy: add max_age for per-connection lifetime cap Introduce a max_age setting on Transport that closes connections based on creation time, independent of idle-timeout (expire). Background: PR #7790 changed the connection pool from LIFO to FIFO for source-port diversity. Under FIFO, every connection is cycled through the pool and its used timestamp is refreshed continuously. When request rate is high enough that pool_size / request_rate < expire, no connection ever becomes idle and expire never fires. This prevents CoreDNS from opening new connections to upstreams that scale out (e.g. new Kubernetes pods behind a ClusterIP service with conntrack pinning). max_age addresses this by enforcing an absolute upper bound on connection lifetime regardless of activity: - persistConn gains a created field set at dial time. - Transport gains maxAge (default 0 = unlimited, preserving existing behaviour). - Dial(): rejects cached connections whose creation age exceeds max_age. - cleanup(): when maxAge > 0, uses a linear scan over both idle-timeout and max-age predicates; when maxAge == 0, preserves the original binary-search path on used time (sorted by FIFO insertion order). - Both hot paths pre-compute the deadline outside any inner loop to avoid repeated time.Now() calls. Tests added: - TestMaxAgeExpireByCreation: connection with old created but fresh used must be rejected even though idle-timeout would pass. - TestMaxAgeFIFORotation: three FIFO-rotated connections (old created, fresh used) must all be rejected, confirming that continuous rotation cannot prevent max-age expiry. Signed-off-by: cangming <cangming@cangming.app> * plugin/forward: add max_age option Expose Transport.SetMaxAge through the forward plugin so operators can set an absolute upper bound on connection lifetime via the Corefile. Usage: forward . 1.2.3.4 { max_age 30s } Default is 0 (unlimited), which preserves existing behaviour. A positive value causes connections older than max_age to be closed and re-dialled on the next request, ensuring CoreDNS reconnects to newly scaled-out upstream pods even under sustained load where the idle timeout (expire) would never fire. If max_age is set, it must not be less than expire; the parser rejects this combination at startup with a clear error message. Signed-off-by: cangming <cangming@cangming.app> --------- Signed-off-by: cangming <cangming@cangming.app>
124 lines
2.9 KiB
Go
124 lines
2.9 KiB
Go
package proxy
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"runtime"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/coredns/coredns/plugin/pkg/log"
|
|
"github.com/coredns/coredns/plugin/pkg/up"
|
|
)
|
|
|
|
// Proxy defines an upstream host.
|
|
type Proxy struct {
|
|
fails uint32
|
|
addr string
|
|
proxyName string
|
|
|
|
transport *Transport
|
|
|
|
readTimeout time.Duration
|
|
|
|
// health checking
|
|
probe *up.Probe
|
|
health HealthChecker
|
|
}
|
|
|
|
// NewProxy returns a new proxy.
|
|
func NewProxy(proxyName, addr, trans string) *Proxy {
|
|
p := &Proxy{
|
|
addr: addr,
|
|
fails: 0,
|
|
probe: up.New(),
|
|
readTimeout: 2 * time.Second,
|
|
transport: newTransport(proxyName, addr),
|
|
health: NewHealthChecker(proxyName, trans, true, "."),
|
|
proxyName: proxyName,
|
|
}
|
|
|
|
runtime.SetFinalizer(p, (*Proxy).finalizer)
|
|
return p
|
|
}
|
|
|
|
func (p *Proxy) Addr() string { return p.addr }
|
|
|
|
// SetTLSConfig sets the TLS config in the lower p.transport and in the healthchecking client.
|
|
func (p *Proxy) SetTLSConfig(cfg *tls.Config) {
|
|
p.transport.SetTLSConfig(cfg)
|
|
p.health.SetTLSConfig(cfg)
|
|
}
|
|
|
|
// SetExpire sets the expire duration in the lower p.transport.
|
|
func (p *Proxy) SetExpire(expire time.Duration) { p.transport.SetExpire(expire) }
|
|
|
|
// SetMaxAge sets the maximum connection lifetime in the lower p.transport.
|
|
// A value of 0 (default) disables max-age.
|
|
func (p *Proxy) SetMaxAge(maxAge time.Duration) { p.transport.SetMaxAge(maxAge) }
|
|
|
|
// SetMaxIdleConns sets the maximum idle connections per transport type.
|
|
// A value of 0 means unlimited (default).
|
|
func (p *Proxy) SetMaxIdleConns(n int) { p.transport.SetMaxIdleConns(n) }
|
|
|
|
func (p *Proxy) GetHealthchecker() HealthChecker {
|
|
return p.health
|
|
}
|
|
|
|
func (p *Proxy) GetTransport() *Transport {
|
|
return p.transport
|
|
}
|
|
|
|
func (p *Proxy) Fails() uint32 {
|
|
return atomic.LoadUint32(&p.fails)
|
|
}
|
|
|
|
// Healthcheck kicks of a round of health checks for this proxy.
|
|
func (p *Proxy) Healthcheck() {
|
|
if p.health == nil {
|
|
log.Warning("No healthchecker")
|
|
return
|
|
}
|
|
|
|
p.probe.Do(func() error {
|
|
return p.health.Check(p)
|
|
})
|
|
}
|
|
|
|
// Down returns true if this proxy is down, i.e. has *more* fails than maxfails.
|
|
func (p *Proxy) Down(maxfails uint32) bool {
|
|
if maxfails == 0 {
|
|
return false
|
|
}
|
|
|
|
fails := atomic.LoadUint32(&p.fails)
|
|
return fails > maxfails
|
|
}
|
|
|
|
// Stop close stops the health checking goroutine.
|
|
func (p *Proxy) Stop() { p.probe.Stop() }
|
|
func (p *Proxy) finalizer() { p.transport.Stop() }
|
|
|
|
// Start starts the proxy's healthchecking.
|
|
func (p *Proxy) Start(duration time.Duration) {
|
|
p.probe.Start(duration)
|
|
p.transport.Start()
|
|
}
|
|
|
|
func (p *Proxy) SetReadTimeout(duration time.Duration) {
|
|
p.readTimeout = duration
|
|
}
|
|
|
|
// incrementFails increments the number of fails safely.
|
|
func (p *Proxy) incrementFails() {
|
|
curVal := atomic.LoadUint32(&p.fails)
|
|
if curVal > curVal+1 {
|
|
// overflow occurred, do not update the counter again
|
|
return
|
|
}
|
|
atomic.AddUint32(&p.fails, 1)
|
|
}
|
|
|
|
const (
|
|
maxTimeout = 2 * time.Second
|
|
)
|