mirror of
				https://github.com/coredns/coredns.git
				synced 2025-10-31 10:13:14 -04:00 
			
		
		
		
	plugin/cache: Add refresh mode setting to serve_stale (#5131)
This PR adds an optional REFRESH_MODE parameter on the serve_stale configuration directive of the cache plugin, which verifies that the upstream is still unavailable before returning stale entries. Signed-off-by: Antoine Tollenaere <atollena@gmail.com>
This commit is contained in:
		
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							c3572fdb30
						
					
				
				
					commit
					66f2ac7568
				
			
							
								
								
									
										9
									
								
								plugin/cache/README.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								plugin/cache/README.md
									
									
									
									
										vendored
									
									
								
							| @@ -37,7 +37,7 @@ cache [TTL] [ZONES...] { | ||||
|     success CAPACITY [TTL] [MINTTL] | ||||
|     denial CAPACITY [TTL] [MINTTL] | ||||
|     prefetch AMOUNT [[DURATION] [PERCENTAGE%]] | ||||
|     serve_stale [DURATION] | ||||
|     serve_stale [DURATION] [REFRESH_MODE] | ||||
| } | ||||
| ~~~ | ||||
|  | ||||
| @@ -57,7 +57,12 @@ cache [TTL] [ZONES...] { | ||||
| * `serve_stale`, when serve\_stale is set, cache always will serve an expired entry to a client if there is one | ||||
|   available.  When this happens, cache will attempt to refresh the cache entry after sending the expired cache | ||||
|   entry to the client. The responses have a TTL of 0. **DURATION** is how far back to consider | ||||
|   stale responses as fresh. The default duration is 1h. | ||||
|   stale responses as fresh. The default duration is 1h. **REFRESH_MODE** controls when the attempt to refresh | ||||
|   the cache happens. `verified` will first verify that an entry is still unavailable from the source before sending | ||||
|   the stale response to the client. `immediate` will immediately send the expired response to the client before | ||||
|   checking to see if the entry is available from the source. **REFRESH_MODE** defaults to `immediate`. Setting this | ||||
|   value to `verified` can lead to increased latency when serving stale responses, but will prevent stale entries | ||||
|   from ever being served if an updated response can be retrieved from the source. | ||||
|  | ||||
| ## Capacity and Eviction | ||||
|  | ||||
|   | ||||
							
								
								
									
										31
									
								
								plugin/cache/cache.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								plugin/cache/cache.go
									
									
									
									
										vendored
									
									
								
							| @@ -38,7 +38,9 @@ type Cache struct { | ||||
| 	duration   time.Duration | ||||
| 	percentage int | ||||
|  | ||||
| 	staleUpTo time.Duration | ||||
| 	// Stale serve | ||||
| 	staleUpTo   time.Duration | ||||
| 	verifyStale bool | ||||
|  | ||||
| 	// Testing. | ||||
| 	now func() time.Time | ||||
| @@ -227,6 +229,33 @@ func (w *ResponseWriter) Write(buf []byte) (int, error) { | ||||
| 	return n, err | ||||
| } | ||||
|  | ||||
| // verifyStaleResponseWriter is a response writer that only writes messages if they should replace a | ||||
| // stale cache entry, and otherwise discards them. | ||||
| type verifyStaleResponseWriter struct { | ||||
| 	*ResponseWriter | ||||
| 	refreshed bool // set to true if the last WriteMsg wrote to ResponseWriter, false otherwise. | ||||
| } | ||||
|  | ||||
| // newVerifyStaleResponseWriter returns a ResponseWriter to be used when verifying stale cache | ||||
| // entries. It only forward writes if an entry was successfully refreshed according to RFC8767, | ||||
| // section 4 (response is NoError or NXDomain), and ignores any other response. | ||||
| func newVerifyStaleResponseWriter(w *ResponseWriter) *verifyStaleResponseWriter { | ||||
| 	return &verifyStaleResponseWriter{ | ||||
| 		w, | ||||
| 		false, | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // WriteMsg implements the dns.ResponseWriter interface. | ||||
| func (w *verifyStaleResponseWriter) WriteMsg(res *dns.Msg) error { | ||||
| 	w.refreshed = false | ||||
| 	if res.Rcode == dns.RcodeSuccess || res.Rcode == dns.RcodeNameError { | ||||
| 		w.refreshed = true | ||||
| 		return w.ResponseWriter.WriteMsg(res) // stores to the cache and send to client | ||||
| 	} | ||||
| 	return nil // else discard | ||||
| } | ||||
|  | ||||
| const ( | ||||
| 	maxTTL  = dnsutil.MaximumDefaulTTL | ||||
| 	minTTL  = dnsutil.MinimalDefaultTTL | ||||
|   | ||||
							
								
								
									
										90
									
								
								plugin/cache/cache_test.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										90
									
								
								plugin/cache/cache_test.go
									
									
									
									
										vendored
									
									
								
							| @@ -266,7 +266,7 @@ func TestServeFromStaleCache(t *testing.T) { | ||||
| 	req.SetQuestion("cached.org.", dns.TypeA) | ||||
| 	ctx := context.TODO() | ||||
|  | ||||
| 	// Cache example.org. | ||||
| 	// Cache cached.org. with 60s TTL | ||||
| 	rec := dnstest.NewRecorder(&test.ResponseWriter{}) | ||||
| 	c.staleUpTo = 1 * time.Hour | ||||
| 	c.ServeDNS(ctx, rec, req) | ||||
| @@ -304,6 +304,80 @@ func TestServeFromStaleCache(t *testing.T) { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestServeFromStaleCacheFetchVerify(t *testing.T) { | ||||
| 	c := New() | ||||
| 	c.Next = ttlBackend(120) | ||||
|  | ||||
| 	req := new(dns.Msg) | ||||
| 	req.SetQuestion("cached.org.", dns.TypeA) | ||||
| 	ctx := context.TODO() | ||||
|  | ||||
| 	// Cache cached.org. with 120s TTL | ||||
| 	rec := dnstest.NewRecorder(&test.ResponseWriter{}) | ||||
| 	c.staleUpTo = 1 * time.Hour | ||||
| 	c.verifyStale = true | ||||
| 	c.ServeDNS(ctx, rec, req) | ||||
| 	if c.pcache.Len() != 1 { | ||||
| 		t.Fatalf("Msg with > 0 TTL should have been cached") | ||||
| 	} | ||||
|  | ||||
| 	tests := []struct { | ||||
| 		name          string | ||||
| 		upstreamRCode int | ||||
| 		upstreamTtl   int | ||||
| 		futureMinutes int | ||||
| 		expectedRCode int | ||||
| 		expectedTtl   int | ||||
| 	}{ | ||||
| 		// After 1 minutes of initial TTL, we should see a cached response | ||||
| 		{"cached.org.", dns.RcodeSuccess, 200, 1, dns.RcodeSuccess, 60}, // ttl = 120 - 60 -- not refreshed | ||||
|  | ||||
| 		// After the 2 more minutes, we should see upstream responses because upstream is available | ||||
| 		{"cached.org.", dns.RcodeSuccess, 200, 3, dns.RcodeSuccess, 200}, | ||||
|  | ||||
| 		// After the TTL expired, if the server fails we should get the cached entry | ||||
| 		{"cached.org.", dns.RcodeServerFailure, 200, 7, dns.RcodeSuccess, 0}, | ||||
|  | ||||
| 		// After 1 more minutes, if the server serves nxdomain we should see them (despite being within the serve stale period) | ||||
| 		{"cached.org.", dns.RcodeNameError, 150, 8, dns.RcodeNameError, 150}, | ||||
| 	} | ||||
|  | ||||
| 	for i, tt := range tests { | ||||
| 		rec := dnstest.NewRecorder(&test.ResponseWriter{}) | ||||
| 		c.now = func() time.Time { return time.Now().Add(time.Duration(tt.futureMinutes) * time.Minute) } | ||||
|  | ||||
| 		if tt.upstreamRCode == dns.RcodeSuccess { | ||||
| 			c.Next = ttlBackend(tt.upstreamTtl) | ||||
| 		} else if tt.upstreamRCode == dns.RcodeServerFailure { | ||||
| 			// Make upstream fail, should now rely on cache during the c.staleUpTo period | ||||
| 			c.Next = servFailBackend(tt.upstreamTtl) | ||||
| 		} else if tt.upstreamRCode == dns.RcodeNameError { | ||||
| 			c.Next = nxDomainBackend(tt.upstreamTtl) | ||||
| 		} else { | ||||
| 			t.Fatal("upstream code not implemented") | ||||
| 		} | ||||
|  | ||||
| 		r := req.Copy() | ||||
| 		r.SetQuestion(tt.name, dns.TypeA) | ||||
| 		ret, _ := c.ServeDNS(ctx, rec, r) | ||||
| 		if ret != tt.expectedRCode { | ||||
| 			t.Errorf("Test %d: expected rcode=%v, got rcode=%v", i, tt.expectedRCode, ret) | ||||
| 			continue | ||||
| 		} | ||||
| 		if ret == dns.RcodeSuccess { | ||||
| 			recTtl := rec.Msg.Answer[0].Header().Ttl | ||||
| 			if tt.expectedTtl != int(recTtl) { | ||||
| 				t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, recTtl) | ||||
| 			} | ||||
| 		} else if ret == dns.RcodeNameError { | ||||
| 			soaTtl := rec.Msg.Ns[0].Header().Ttl | ||||
| 			if tt.expectedTtl != int(soaTtl) { | ||||
| 				t.Errorf("Test %d: expected TTL=%d, got TTL=%d", i, tt.expectedTtl, soaTtl) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestNegativeStaleMaskingPositiveCache(t *testing.T) { | ||||
| 	c := New() | ||||
| 	c.staleUpTo = time.Minute * 10 | ||||
| @@ -454,6 +528,20 @@ func ttlBackend(ttl int) plugin.Handler { | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func servFailBackend(ttl int) plugin.Handler { | ||||
| 	return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { | ||||
| 		m := new(dns.Msg) | ||||
| 		m.SetReply(r) | ||||
| 		m.Response, m.RecursionAvailable = true, true | ||||
|  | ||||
| 		m.Ns = []dns.RR{test.SOA(fmt.Sprintf("example.org. %d IN	SOA	sns.dns.icann.org. noc.dns.icann.org. 2016082540 7200 3600 1209600 3600", ttl))} | ||||
|  | ||||
| 		m.MsgHdr.Rcode = dns.RcodeServerFailure | ||||
| 		w.WriteMsg(m) | ||||
| 		return dns.RcodeServerFailure, nil | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func TestComputeTTL(t *testing.T) { | ||||
| 	tests := []struct { | ||||
| 		msgTTL      time.Duration | ||||
|   | ||||
							
								
								
									
										24
									
								
								plugin/cache/handler.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								plugin/cache/handler.go
									
									
									
									
										vendored
									
									
								
							| @@ -35,19 +35,29 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) | ||||
|  | ||||
| 	ttl := 0 | ||||
| 	i := c.getIgnoreTTL(now, state, server) | ||||
| 	if i != nil { | ||||
| 		ttl = i.ttl(now) | ||||
| 	} | ||||
| 	if i == nil { | ||||
| 		crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do} | ||||
| 		return c.doRefresh(ctx, state, crr) | ||||
| 	} | ||||
| 	ttl = i.ttl(now) | ||||
| 	if ttl < 0 { | ||||
| 		servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc() | ||||
| 		// serve stale behavior | ||||
| 		if c.verifyStale { | ||||
| 			crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do} | ||||
| 			cw := newVerifyStaleResponseWriter(crr) | ||||
| 			ret, err := c.doRefresh(ctx, state, cw) | ||||
| 			if cw.refreshed { | ||||
| 				return ret, err | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		// Adjust the time to get a 0 TTL in the reply built from a stale item. | ||||
| 		now = now.Add(time.Duration(ttl) * time.Second) | ||||
| 		cw := newPrefetchResponseWriter(server, state, c) | ||||
| 		go c.doPrefetch(ctx, state, cw, i, now) | ||||
| 		if !c.verifyStale { | ||||
| 			cw := newPrefetchResponseWriter(server, state, c) | ||||
| 			go c.doPrefetch(ctx, state, cw, i, now) | ||||
| 		} | ||||
| 		servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc() | ||||
| 	} else if c.shouldPrefetch(i, now) { | ||||
| 		cw := newPrefetchResponseWriter(server, state, c) | ||||
| 		go c.doPrefetch(ctx, state, cw, i, now) | ||||
| @@ -70,7 +80,7 @@ func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *Respo | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw *ResponseWriter) (int, error) { | ||||
| func (c *Cache) doRefresh(ctx context.Context, state request.Request, cw dns.ResponseWriter) (int, error) { | ||||
| 	if !state.Do() { | ||||
| 		setDo(state.Req) | ||||
| 	} | ||||
|   | ||||
							
								
								
									
										12
									
								
								plugin/cache/setup.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								plugin/cache/setup.go
									
									
									
									
										vendored
									
									
								
							| @@ -166,11 +166,11 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { | ||||
|  | ||||
| 			case "serve_stale": | ||||
| 				args := c.RemainingArgs() | ||||
| 				if len(args) > 1 { | ||||
| 				if len(args) > 2 { | ||||
| 					return nil, c.ArgErr() | ||||
| 				} | ||||
| 				ca.staleUpTo = 1 * time.Hour | ||||
| 				if len(args) == 1 { | ||||
| 				if len(args) > 0 { | ||||
| 					d, err := time.ParseDuration(args[0]) | ||||
| 					if err != nil { | ||||
| 						return nil, err | ||||
| @@ -180,6 +180,14 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { | ||||
| 					} | ||||
| 					ca.staleUpTo = d | ||||
| 				} | ||||
| 				ca.verifyStale = false | ||||
| 				if len(args) > 1 { | ||||
| 					mode := strings.ToLower(args[1]) | ||||
| 					if mode != "immediate" && mode != "verify" { | ||||
| 						return nil, fmt.Errorf("invalid value for serve_stale refresh mode: %s", mode) | ||||
| 					} | ||||
| 					ca.verifyStale = mode == "verify" | ||||
| 				} | ||||
| 			default: | ||||
| 				return nil, c.ArgErr() | ||||
| 			} | ||||
|   | ||||
							
								
								
									
										29
									
								
								plugin/cache/setup_test.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								plugin/cache/setup_test.go
									
									
									
									
										vendored
									
									
								
							| @@ -117,20 +117,25 @@ func TestSetup(t *testing.T) { | ||||
|  | ||||
| func TestServeStale(t *testing.T) { | ||||
| 	tests := []struct { | ||||
| 		input     string | ||||
| 		shouldErr bool | ||||
| 		staleUpTo time.Duration | ||||
| 		input       string | ||||
| 		shouldErr   bool | ||||
| 		staleUpTo   time.Duration | ||||
| 		verifyStale bool | ||||
| 	}{ | ||||
| 		{"serve_stale", false, 1 * time.Hour}, | ||||
| 		{"serve_stale 20m", false, 20 * time.Minute}, | ||||
| 		{"serve_stale 1h20m", false, 80 * time.Minute}, | ||||
| 		{"serve_stale 0m", false, 0}, | ||||
| 		{"serve_stale 0", false, 0}, | ||||
| 		{"serve_stale", false, 1 * time.Hour, false}, | ||||
| 		{"serve_stale 20m", false, 20 * time.Minute, false}, | ||||
| 		{"serve_stale 1h20m", false, 80 * time.Minute, false}, | ||||
| 		{"serve_stale 0m", false, 0, false}, | ||||
| 		{"serve_stale 0", false, 0, false}, | ||||
| 		{"serve_stale 0 verify", false, 0, true}, | ||||
| 		{"serve_stale 0 immediate", false, 0, false}, | ||||
| 		{"serve_stale 0 VERIFY", false, 0, true}, | ||||
| 		// fails | ||||
| 		{"serve_stale 20", true, 0}, | ||||
| 		{"serve_stale -20m", true, 0}, | ||||
| 		{"serve_stale aa", true, 0}, | ||||
| 		{"serve_stale 1m nono", true, 0}, | ||||
| 		{"serve_stale 20", true, 0, false}, | ||||
| 		{"serve_stale -20m", true, 0, false}, | ||||
| 		{"serve_stale aa", true, 0, false}, | ||||
| 		{"serve_stale 1m nono", true, 0, false}, | ||||
| 		{"serve_stale 0 after nono", true, 0, false}, | ||||
| 	} | ||||
| 	for i, test := range tests { | ||||
| 		c := caddy.NewTestController("dns", fmt.Sprintf("cache {\n%s\n}", test.input)) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user