mirror of
https://github.com/coredns/coredns.git
synced 2026-04-05 03:35:33 -04:00
Add an atomic.Bool to singleflight prefetching (#7963)
Also updated plugin to document single-flighting Signed-off-by: Ryan Brewster <rpb@anthropic.com>
This commit is contained in:
5
plugin/cache/README.md
vendored
5
plugin/cache/README.md
vendored
@@ -60,6 +60,9 @@ cache [TTL] [ZONES...] {
|
||||
**DURATION** defaults to 1m. Prefetching will happen when the TTL drops below **PERCENTAGE**,
|
||||
which defaults to `10%`, or latest 1 second before TTL expiration. Values should be in the range `[10%, 90%]`.
|
||||
Note the percent sign is mandatory. **PERCENTAGE** is treated as an `int`.
|
||||
Concurrent requests that trigger a prefetch for the same cache entry dispatch at most one
|
||||
background fetch, so prefetch load scales with the number of distinct eligible entries rather
|
||||
than request rate.
|
||||
* `serve_stale`, when serve\_stale is set, cache will always serve an expired entry to a client if there is one
|
||||
available as long as it has not been expired for longer than **DURATION** (default 1 hour). By default, the _cache_ plugin will
|
||||
attempt to refresh the cache entry after sending the expired cache entry to the client. The
|
||||
@@ -69,6 +72,8 @@ cache [TTL] [ZONES...] {
|
||||
checking to see if the entry is available from the source. **REFRESH_MODE** defaults to `immediate`. Setting this
|
||||
value to `verify` can lead to increased latency when serving stale responses, but will prevent stale entries
|
||||
from ever being served if an updated response can be retrieved from the source.
|
||||
In `immediate` mode, concurrent requests for the same expired entry dispatch at most one
|
||||
background refresh.
|
||||
* `servfail` cache SERVFAIL responses for **DURATION**. Setting **DURATION** to 0 will disable caching of SERVFAIL
|
||||
responses. If this option is not set, SERVFAIL responses will be cached for 5 seconds. **DURATION** may not be
|
||||
greater than 5 minutes.
|
||||
|
||||
20
plugin/cache/handler.go
vendored
20
plugin/cache/handler.go
vendored
@@ -55,13 +55,11 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
||||
// Adjust the time to get a 0 TTL in the reply built from a stale item.
|
||||
now = now.Add(time.Duration(ttl) * time.Second)
|
||||
if !c.verifyStale {
|
||||
cw := newPrefetchResponseWriter(server, rc, do, cd, c)
|
||||
go c.doPrefetch(ctx, cw, i, now)
|
||||
c.tryPrefetch(ctx, i, server, rc, do, cd, now)
|
||||
}
|
||||
servedStale.WithLabelValues(server, c.zonesMetricLabel, c.viewMetricLabel).Inc()
|
||||
} else if c.shouldPrefetch(i, now) {
|
||||
cw := newPrefetchResponseWriter(server, rc, do, cd, c)
|
||||
go c.doPrefetch(ctx, cw, i, now)
|
||||
c.tryPrefetch(ctx, i, server, rc, do, cd, now)
|
||||
}
|
||||
|
||||
if i.wildcard != "" {
|
||||
@@ -91,6 +89,20 @@ func wildcardFunc(ctx context.Context) func() string {
|
||||
}
|
||||
}
|
||||
|
||||
// tryPrefetch dispatches a background prefetch for i if one is not already in
|
||||
// flight. The CAS on i.refreshing ensures at most one prefetch goroutine per
|
||||
// item, so prefetch load scales with distinct stale keys rather than QPS.
|
||||
func (c *Cache) tryPrefetch(ctx context.Context, i *item, server string, req *dns.Msg, do, cd bool, now time.Time) {
|
||||
if !i.refreshing.CompareAndSwap(false, true) {
|
||||
return
|
||||
}
|
||||
cw := newPrefetchResponseWriter(server, req, do, cd, c)
|
||||
go func() {
|
||||
defer i.refreshing.Store(false)
|
||||
c.doPrefetch(ctx, cw, i, now)
|
||||
}()
|
||||
}
|
||||
|
||||
func (c *Cache) doPrefetch(ctx context.Context, cw *ResponseWriter, i *item, now time.Time) {
|
||||
// Use a fresh metadata map to avoid concurrent writes to the original request's metadata.
|
||||
ctx = metadata.ContextWithMetadata(ctx)
|
||||
|
||||
9
plugin/cache/item.go
vendored
9
plugin/cache/item.go
vendored
@@ -2,6 +2,7 @@ package cache
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/coredns/coredns/plugin/cache/freq"
|
||||
@@ -25,6 +26,14 @@ type item struct {
|
||||
stored time.Time
|
||||
|
||||
*freq.Freq
|
||||
|
||||
// refreshing is set via CAS when a prefetch goroutine is dispatched for
|
||||
// this item and cleared when it returns, bounding in-flight prefetches
|
||||
// per item to one. A successful prefetch replaces this item in the cache
|
||||
// with a new one (zero-valued refreshing); the deferred clear matters
|
||||
// only when the prefetch fails and this item remains cached, so the next
|
||||
// hit can retry.
|
||||
refreshing atomic.Bool
|
||||
}
|
||||
|
||||
func newItem(m *dns.Msg, now time.Time, d time.Duration) *item {
|
||||
|
||||
83
plugin/cache/prefetch_test.go
vendored
83
plugin/cache/prefetch_test.go
vendored
@@ -3,6 +3,8 @@ package cache
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -209,6 +211,87 @@ type verification struct {
|
||||
fetch bool
|
||||
}
|
||||
|
||||
// TestPrefetchDedup verifies that concurrent hits on a single cache item
|
||||
// dispatch at most one prefetch goroutine, on both the serve_stale and
|
||||
// shouldPrefetch paths. See https://github.com/coredns/coredns/issues/7904.
|
||||
func TestPrefetchDedup(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
name string
|
||||
staleUpTo time.Duration
|
||||
prefetch int
|
||||
percentage int
|
||||
hitAt time.Duration // all concurrent hits land here
|
||||
}{
|
||||
{name: "serve_stale", staleUpTo: time.Hour, hitAt: 110 * time.Second},
|
||||
{name: "prefetch", prefetch: 1, percentage: 50, hitAt: 70 * time.Second},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
const N = 200
|
||||
var upstream atomic.Int32
|
||||
release := make(chan struct{})
|
||||
done := make(chan struct{}, N)
|
||||
|
||||
c := New()
|
||||
c.staleUpTo = tc.staleUpTo
|
||||
c.prefetch = tc.prefetch
|
||||
c.percentage = tc.percentage
|
||||
c.duration = time.Minute
|
||||
c.Next = plugin.HandlerFunc(func(_ context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
|
||||
n := upstream.Add(1)
|
||||
if n > 1 {
|
||||
// Block the prefetch so all concurrent hits race against
|
||||
// the in-flight flag, not a completed refresh.
|
||||
<-release
|
||||
defer func() { done <- struct{}{} }()
|
||||
}
|
||||
m := new(dns.Msg)
|
||||
m.SetReply(r)
|
||||
m.Response = true
|
||||
m.Answer = []dns.RR{test.A("dedup.example.org. 100 IN A 127.0.0.1")}
|
||||
w.WriteMsg(m)
|
||||
return dns.RcodeSuccess, nil
|
||||
})
|
||||
|
||||
t0 := time.Now().UTC()
|
||||
c.now = func() time.Time { return t0 }
|
||||
req := new(dns.Msg)
|
||||
req.SetQuestion("dedup.example.org.", dns.TypeA)
|
||||
c.ServeDNS(context.TODO(), &test.ResponseWriter{}, req)
|
||||
if upstream.Load() != 1 {
|
||||
t.Fatalf("initial populate: want 1 upstream call, got %d", upstream.Load())
|
||||
}
|
||||
|
||||
// Fire N concurrent hits while the item is prefetch-eligible.
|
||||
// Without dedup each would spawn its own prefetch goroutine;
|
||||
// with dedup only the CAS winner spawns one, and the remaining
|
||||
// N-1 hits serve from cache without touching upstream.
|
||||
c.now = func() time.Time { return t0.Add(tc.hitAt) }
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(N)
|
||||
for range N {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
req := new(dns.Msg)
|
||||
req.SetQuestion("dedup.example.org.", dns.TypeA)
|
||||
c.ServeDNS(context.TODO(), &test.ResponseWriter{}, req)
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
close(release)
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("prefetch goroutine never completed")
|
||||
}
|
||||
|
||||
if got := upstream.Load(); got != 2 {
|
||||
t.Fatalf("want exactly 2 upstream calls (populate + 1 deduped prefetch), got %d", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// prefetchHandler is a fake plugin implementation which returns a single A
|
||||
// record with the given qname and ttl. The returned IP address starts at
|
||||
// 127.0.0.1 and is incremented on every request.
|
||||
|
||||
Reference in New Issue
Block a user