mirror of
				https://github.com/coredns/coredns.git
				synced 2025-10-31 18:23:13 -04:00 
			
		
		
		
	Plugin/Forward - autotune the dialTimeout for connection (#1852)
* - implement an auto-tunable dialTimeout for fallback. * - fix gofmt * - factorized timeout computation with readTimeout / updated readme / * - fix comment
This commit is contained in:
		
				
					committed by
					
						 Miek Gieben
						Miek Gieben
					
				
			
			
				
	
			
			
			
						parent
						
							6f865a9de7
						
					
				
				
					commit
					70c957d885
				
			| @@ -82,6 +82,10 @@ forward FROM TO... { | |||||||
| Also note the TLS config is "global" for the whole forwarding proxy if you need a different | Also note the TLS config is "global" for the whole forwarding proxy if you need a different | ||||||
| `tls-name` for different upstreams you're out of luck. | `tls-name` for different upstreams you're out of luck. | ||||||
|  |  | ||||||
|  | On each endpoint, the timeouts of the communication are set by default and automatically tuned depending early results. | ||||||
|  | - dialTimeout by default is 30 sec, and can decrease automatically down to 100ms | ||||||
|  | - readTimeout by default is 2 sec, and can decrease automatically down to 10ms | ||||||
|  |  | ||||||
| ## Metrics | ## Metrics | ||||||
|  |  | ||||||
| If monitoring is enabled (via the *prometheus* directive) then the following metric are exported: | If monitoring is enabled (via the *prometheus* directive) then the following metric are exported: | ||||||
|   | |||||||
| @@ -16,21 +16,65 @@ import ( | |||||||
| 	"github.com/miekg/dns" | 	"github.com/miekg/dns" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| func (p *Proxy) readTimeout() time.Duration { | // limitTimeout is a utility function to auto-tune timeout values | ||||||
| 	rtt := time.Duration(atomic.LoadInt64(&p.avgRtt)) | // average observed time is moved towards the last observed delay moderated by a weight | ||||||
|  | // next timeout to use will be the double of the computed average, limited by min and max frame. | ||||||
|  | func limitTimeout(currentAvg *int64, minValue time.Duration, maxValue time.Duration) time.Duration { | ||||||
|  | 	rt := time.Duration(atomic.LoadInt64(currentAvg)) | ||||||
|  | 	if rt < minValue { | ||||||
|  | 		return minValue | ||||||
|  | 	} | ||||||
|  | 	if rt < maxValue/2 { | ||||||
|  | 		return 2 * rt | ||||||
|  | 	} | ||||||
|  | 	return maxValue | ||||||
|  | } | ||||||
|  |  | ||||||
| 	if rtt < minTimeout { | func averageTimeout(currentAvg *int64, observedDuration time.Duration, weight int64) { | ||||||
| 		return minTimeout | 	dt := time.Duration(atomic.LoadInt64(currentAvg)) | ||||||
|  | 	atomic.AddInt64(currentAvg, int64(observedDuration-dt)/weight) | ||||||
| } | } | ||||||
| 	if rtt < maxTimeout/2 { |  | ||||||
| 		return 2 * rtt | func (t *transport) dialTimeout() time.Duration { | ||||||
|  | 	return limitTimeout(&t.avgDialTime, minDialTimeout, maxDialTimeout) | ||||||
| } | } | ||||||
| 	return maxTimeout |  | ||||||
|  | func (t *transport) updateDialTimeout(newDialTime time.Duration) { | ||||||
|  | 	averageTimeout(&t.avgDialTime, newDialTime, cumulativeAvgWeight) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Dial dials the address configured in transport, potentially reusing a connection or creating a new one. | ||||||
|  | func (t *transport) Dial(proto string) (*dns.Conn, bool, error) { | ||||||
|  | 	// If tls has been configured; use it. | ||||||
|  | 	if t.tlsConfig != nil { | ||||||
|  | 		proto = "tcp-tls" | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	t.dial <- proto | ||||||
|  | 	c := <-t.ret | ||||||
|  |  | ||||||
|  | 	if c != nil { | ||||||
|  | 		return c, true, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	reqTime := time.Now() | ||||||
|  | 	timeout := t.dialTimeout() | ||||||
|  | 	if proto == "tcp-tls" { | ||||||
|  | 		conn, err := dns.DialTimeoutWithTLS("tcp", t.addr, t.tlsConfig, timeout) | ||||||
|  | 		t.updateDialTimeout(time.Since(reqTime)) | ||||||
|  | 		return conn, false, err | ||||||
|  | 	} | ||||||
|  | 	conn, err := dns.DialTimeout(proto, t.addr, timeout) | ||||||
|  | 	t.updateDialTimeout(time.Since(reqTime)) | ||||||
|  | 	return conn, false, err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (p *Proxy) readTimeout() time.Duration { | ||||||
|  | 	return limitTimeout(&p.avgRtt, minTimeout, maxTimeout) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (p *Proxy) updateRtt(newRtt time.Duration) { | func (p *Proxy) updateRtt(newRtt time.Duration) { | ||||||
| 	rtt := time.Duration(atomic.LoadInt64(&p.avgRtt)) | 	averageTimeout(&p.avgRtt, newRtt, cumulativeAvgWeight) | ||||||
| 	atomic.AddInt64(&p.avgRtt, int64((newRtt-rtt)/rttCount)) |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // Connect selects an upstream, sends the request and waits for a response. | // Connect selects an upstream, sends the request and waits for a response. | ||||||
| @@ -92,4 +136,4 @@ func (p *Proxy) Connect(ctx context.Context, state request.Request, forceTCP, me | |||||||
| 	return ret, nil | 	return ret, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| const rttCount = 4 | const cumulativeAvgWeight = 4 | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ type persistConn struct { | |||||||
|  |  | ||||||
| // transport hold the persistent cache. | // transport hold the persistent cache. | ||||||
| type transport struct { | type transport struct { | ||||||
|  | 	avgDialTime int64                     // kind of average time of dial time | ||||||
| 	conns       map[string][]*persistConn //  Buckets for udp, tcp and tcp-tls. | 	conns       map[string][]*persistConn //  Buckets for udp, tcp and tcp-tls. | ||||||
| 	expire      time.Duration             // After this duration a connection is expired. | 	expire      time.Duration             // After this duration a connection is expired. | ||||||
| 	addr        string | 	addr        string | ||||||
| @@ -30,6 +31,7 @@ type transport struct { | |||||||
|  |  | ||||||
| func newTransport(addr string, tlsConfig *tls.Config) *transport { | func newTransport(addr string, tlsConfig *tls.Config) *transport { | ||||||
| 	t := &transport{ | 	t := &transport{ | ||||||
|  | 		avgDialTime: int64(defaultDialTimeout / 2), | ||||||
| 		conns:       make(map[string][]*persistConn), | 		conns:       make(map[string][]*persistConn), | ||||||
| 		expire:      defaultExpire, | 		expire:      defaultExpire, | ||||||
| 		addr:        addr, | 		addr:        addr, | ||||||
| @@ -141,28 +143,6 @@ func (t *transport) cleanup(all bool) { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| // Dial dials the address configured in transport, potentially reusing a connection or creating a new one. |  | ||||||
| func (t *transport) Dial(proto string) (*dns.Conn, bool, error) { |  | ||||||
| 	// If tls has been configured; use it. |  | ||||||
| 	if t.tlsConfig != nil { |  | ||||||
| 		proto = "tcp-tls" |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	t.dial <- proto |  | ||||||
| 	c := <-t.ret |  | ||||||
|  |  | ||||||
| 	if c != nil { |  | ||||||
| 		return c, true, nil |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if proto == "tcp-tls" { |  | ||||||
| 		conn, err := dns.DialTimeoutWithTLS("tcp", t.addr, t.tlsConfig, dialTimeout) |  | ||||||
| 		return conn, false, err |  | ||||||
| 	} |  | ||||||
| 	conn, err := dns.DialTimeout(proto, t.addr, dialTimeout) |  | ||||||
| 	return conn, false, err |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Yield return the connection to transport for reuse. | // Yield return the connection to transport for reuse. | ||||||
| func (t *transport) Yield(c *dns.Conn) { t.yield <- c } | func (t *transport) Yield(c *dns.Conn) { t.yield <- c } | ||||||
|  |  | ||||||
| @@ -178,4 +158,9 @@ func (t *transport) SetExpire(expire time.Duration) { t.expire = expire } | |||||||
| // SetTLSConfig sets the TLS config in transport. | // SetTLSConfig sets the TLS config in transport. | ||||||
| func (t *transport) SetTLSConfig(cfg *tls.Config) { t.tlsConfig = cfg } | func (t *transport) SetTLSConfig(cfg *tls.Config) { t.tlsConfig = cfg } | ||||||
|  |  | ||||||
| const defaultExpire = 10 * time.Second | const ( | ||||||
|  | 	defaultExpire      = 10 * time.Second | ||||||
|  | 	minDialTimeout     = 100 * time.Millisecond | ||||||
|  | 	maxDialTimeout     = 30 * time.Second | ||||||
|  | 	defaultDialTimeout = 30 * time.Second | ||||||
|  | ) | ||||||
|   | |||||||
| @@ -140,9 +140,9 @@ func TestCleanupAll(t *testing.T) { | |||||||
|  |  | ||||||
| 	tr := newTransport(s.Addr, nil /* no TLS */) | 	tr := newTransport(s.Addr, nil /* no TLS */) | ||||||
|  |  | ||||||
| 	c1, _ := dns.DialTimeout("udp", tr.addr, dialTimeout) | 	c1, _ := dns.DialTimeout("udp", tr.addr, defaultDialTimeout) | ||||||
| 	c2, _ := dns.DialTimeout("udp", tr.addr, dialTimeout) | 	c2, _ := dns.DialTimeout("udp", tr.addr, defaultDialTimeout) | ||||||
| 	c3, _ := dns.DialTimeout("udp", tr.addr, dialTimeout) | 	c3, _ := dns.DialTimeout("udp", tr.addr, defaultDialTimeout) | ||||||
|  |  | ||||||
| 	tr.conns["udp"] = []*persistConn{ | 	tr.conns["udp"] = []*persistConn{ | ||||||
| 		{c1, time.Now()}, | 		{c1, time.Now()}, | ||||||
|   | |||||||
| @@ -106,7 +106,6 @@ func (p *Proxy) start(duration time.Duration) { | |||||||
| } | } | ||||||
|  |  | ||||||
| const ( | const ( | ||||||
| 	dialTimeout = 4 * time.Second |  | ||||||
| 	timeout    = 2 * time.Second | 	timeout    = 2 * time.Second | ||||||
| 	maxTimeout = 2 * time.Second | 	maxTimeout = 2 * time.Second | ||||||
| 	minTimeout = 10 * time.Millisecond | 	minTimeout = 10 * time.Millisecond | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user