add zones label to cache metrics (#5124)

* add zones to cache metrics

Signed-off-by: Elijah Andrews <elijahcandrews@gmail.com>
This commit is contained in:
Elijah Andrews
2022-02-14 12:10:30 -05:00
committed by GitHub
parent d97dbbef61
commit 80195c399f
5 changed files with 35 additions and 31 deletions

View File

@@ -73,14 +73,14 @@ Entries with 0 TTL will remain in the cache until randomly evicted when the shar
If monitoring is enabled (via the *prometheus* plugin) then the following metrics are exported: If monitoring is enabled (via the *prometheus* plugin) then the following metrics are exported:
* `coredns_cache_entries{server, type}` - Total elements in the cache by cache type. * `coredns_cache_entries{server, type, zones}` - Total elements in the cache by cache type.
* `coredns_cache_hits_total{server, type}` - Counter of cache hits by cache type. * `coredns_cache_hits_total{server, type, zones}` - Counter of cache hits by cache type.
* `coredns_cache_misses_total{server}` - Counter of cache misses. - Deprecated, derive misses from cache hits/requests counters. * `coredns_cache_misses_total{server, zones}` - Counter of cache misses. - Deprecated, derive misses from cache hits/requests counters.
* `coredns_cache_requests_total{server}` - Counter of cache requests. * `coredns_cache_requests_total{server, zones}` - Counter of cache requests.
* `coredns_cache_prefetch_total{server}` - Counter of times the cache has prefetched a cached item. * `coredns_cache_prefetch_total{server, zones}` - Counter of times the cache has prefetched a cached item.
* `coredns_cache_drops_total{server}` - Counter of responses excluded from the cache due to request/response question name mismatch. * `coredns_cache_drops_total{server, zones}` - Counter of responses excluded from the cache due to request/response question name mismatch.
* `coredns_cache_served_stale_total{server}` - Counter of requests served from stale cache entries. * `coredns_cache_served_stale_total{server, zones}` - Counter of requests served from stale cache entries.
* `coredns_cache_evictions_total{server, type}` - Counter of cache evictions. * `coredns_cache_evictions_total{server, type, zones}` - Counter of cache evictions.
Cache types are either "denial" or "success". `Server` is the server handling the request, see the Cache types are either "denial" or "success". `Server` is the server handling the request, see the
prometheus plugin for documentation. prometheus plugin for documentation.

12
plugin/cache/cache.go vendored
View File

@@ -21,6 +21,8 @@ type Cache struct {
Next plugin.Handler Next plugin.Handler
Zones []string Zones []string
zonesMetricLabel string
ncache *cache.Cache ncache *cache.Cache
ncap int ncap int
nttl time.Duration nttl time.Duration
@@ -162,11 +164,11 @@ func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {
if hasKey && duration > 0 { if hasKey && duration > 0 {
if w.state.Match(res) { if w.state.Match(res) {
w.set(res, key, mt, duration) w.set(res, key, mt, duration)
cacheSize.WithLabelValues(w.server, Success).Set(float64(w.pcache.Len())) cacheSize.WithLabelValues(w.server, Success, w.zonesMetricLabel).Set(float64(w.pcache.Len()))
cacheSize.WithLabelValues(w.server, Denial).Set(float64(w.ncache.Len())) cacheSize.WithLabelValues(w.server, Denial, w.zonesMetricLabel).Set(float64(w.ncache.Len()))
} else { } else {
// Don't log it, but increment counter // Don't log it, but increment counter
cacheDrops.WithLabelValues(w.server).Inc() cacheDrops.WithLabelValues(w.server, w.zonesMetricLabel).Inc()
} }
} }
@@ -195,7 +197,7 @@ func (w *ResponseWriter) set(m *dns.Msg, key uint64, mt response.Type, duration
case response.NoError, response.Delegation: case response.NoError, response.Delegation:
i := newItem(m, w.now(), duration) i := newItem(m, w.now(), duration)
if w.pcache.Add(key, i) { if w.pcache.Add(key, i) {
evictions.WithLabelValues(w.server, Success).Inc() evictions.WithLabelValues(w.server, Success, w.zonesMetricLabel).Inc()
} }
// when pre-fetching, remove the negative cache entry if it exists // when pre-fetching, remove the negative cache entry if it exists
if w.prefetch { if w.prefetch {
@@ -205,7 +207,7 @@ func (w *ResponseWriter) set(m *dns.Msg, key uint64, mt response.Type, duration
case response.NameError, response.NoData, response.ServerError: case response.NameError, response.NoData, response.ServerError:
i := newItem(m, w.now(), duration) i := newItem(m, w.now(), duration)
if w.ncache.Add(key, i) { if w.ncache.Add(key, i) {
evictions.WithLabelValues(w.server, Denial).Inc() evictions.WithLabelValues(w.server, Denial, w.zonesMetricLabel).Inc()
} }
case response.OtherError: case response.OtherError:

View File

@@ -43,7 +43,7 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return c.doRefresh(ctx, state, crr) return c.doRefresh(ctx, state, crr)
} }
if ttl < 0 { if ttl < 0 {
servedStale.WithLabelValues(server).Inc() servedStale.WithLabelValues(server, c.zonesMetricLabel).Inc()
// Adjust the time to get a 0 TTL in the reply built from a stale item. // Adjust the time to get a 0 TTL in the reply built from a stale item.
now = now.Add(time.Duration(ttl) * time.Second) now = now.Add(time.Duration(ttl) * time.Second)
cw := newPrefetchResponseWriter(server, state, c) cw := newPrefetchResponseWriter(server, state, c)
@@ -59,7 +59,7 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
} }
func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *ResponseWriter, i *item, now time.Time) { func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *ResponseWriter, i *item, now time.Time) {
cachePrefetches.WithLabelValues(cw.server).Inc() cachePrefetches.WithLabelValues(cw.server, c.zonesMetricLabel).Inc()
c.doRefresh(ctx, state, cw) c.doRefresh(ctx, state, cw)
// When prefetching we loose the item i, and with it the frequency // When prefetching we loose the item i, and with it the frequency
@@ -91,41 +91,41 @@ func (c *Cache) Name() string { return "cache" }
func (c *Cache) get(now time.Time, state request.Request, server string) (*item, bool) { func (c *Cache) get(now time.Time, state request.Request, server string) (*item, bool) {
k := hash(state.Name(), state.QType()) k := hash(state.Name(), state.QType())
cacheRequests.WithLabelValues(server).Inc() cacheRequests.WithLabelValues(server, c.zonesMetricLabel).Inc()
if i, ok := c.ncache.Get(k); ok && i.(*item).ttl(now) > 0 { if i, ok := c.ncache.Get(k); ok && i.(*item).ttl(now) > 0 {
cacheHits.WithLabelValues(server, Denial).Inc() cacheHits.WithLabelValues(server, Denial, c.zonesMetricLabel).Inc()
return i.(*item), true return i.(*item), true
} }
if i, ok := c.pcache.Get(k); ok && i.(*item).ttl(now) > 0 { if i, ok := c.pcache.Get(k); ok && i.(*item).ttl(now) > 0 {
cacheHits.WithLabelValues(server, Success).Inc() cacheHits.WithLabelValues(server, Success, c.zonesMetricLabel).Inc()
return i.(*item), true return i.(*item), true
} }
cacheMisses.WithLabelValues(server).Inc() cacheMisses.WithLabelValues(server, c.zonesMetricLabel).Inc()
return nil, false return nil, false
} }
// getIgnoreTTL unconditionally returns an item if it exists in the cache. // getIgnoreTTL unconditionally returns an item if it exists in the cache.
func (c *Cache) getIgnoreTTL(now time.Time, state request.Request, server string) *item { func (c *Cache) getIgnoreTTL(now time.Time, state request.Request, server string) *item {
k := hash(state.Name(), state.QType()) k := hash(state.Name(), state.QType())
cacheRequests.WithLabelValues(server).Inc() cacheRequests.WithLabelValues(server, c.zonesMetricLabel).Inc()
if i, ok := c.ncache.Get(k); ok { if i, ok := c.ncache.Get(k); ok {
ttl := i.(*item).ttl(now) ttl := i.(*item).ttl(now)
if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) { if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) {
cacheHits.WithLabelValues(server, Denial).Inc() cacheHits.WithLabelValues(server, Denial, c.zonesMetricLabel).Inc()
return i.(*item) return i.(*item)
} }
} }
if i, ok := c.pcache.Get(k); ok { if i, ok := c.pcache.Get(k); ok {
ttl := i.(*item).ttl(now) ttl := i.(*item).ttl(now)
if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) { if ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds())) {
cacheHits.WithLabelValues(server, Success).Inc() cacheHits.WithLabelValues(server, Success, c.zonesMetricLabel).Inc()
return i.(*item) return i.(*item)
} }
} }
cacheMisses.WithLabelValues(server).Inc() cacheMisses.WithLabelValues(server, c.zonesMetricLabel).Inc()
return nil return nil
} }

View File

@@ -14,54 +14,54 @@ var (
Subsystem: "cache", Subsystem: "cache",
Name: "entries", Name: "entries",
Help: "The number of elements in the cache.", Help: "The number of elements in the cache.",
}, []string{"server", "type"}) }, []string{"server", "type", "zones"})
// cacheRequests is a counter of all requests through the cache. // cacheRequests is a counter of all requests through the cache.
cacheRequests = promauto.NewCounterVec(prometheus.CounterOpts{ cacheRequests = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "requests_total", Name: "requests_total",
Help: "The count of cache requests.", Help: "The count of cache requests.",
}, []string{"server"}) }, []string{"server", "zones"})
// cacheHits is counter of cache hits by cache type. // cacheHits is counter of cache hits by cache type.
cacheHits = promauto.NewCounterVec(prometheus.CounterOpts{ cacheHits = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "hits_total", Name: "hits_total",
Help: "The count of cache hits.", Help: "The count of cache hits.",
}, []string{"server", "type"}) }, []string{"server", "type", "zones"})
// cacheMisses is the counter of cache misses. - Deprecated // cacheMisses is the counter of cache misses. - Deprecated
cacheMisses = promauto.NewCounterVec(prometheus.CounterOpts{ cacheMisses = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "misses_total", Name: "misses_total",
Help: "The count of cache misses. Deprecated, derive misses from cache hits/requests counters.", Help: "The count of cache misses. Deprecated, derive misses from cache hits/requests counters.",
}, []string{"server"}) }, []string{"server", "zones"})
// cachePrefetches is the number of time the cache has prefetched a cached item. // cachePrefetches is the number of time the cache has prefetched a cached item.
cachePrefetches = promauto.NewCounterVec(prometheus.CounterOpts{ cachePrefetches = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "prefetch_total", Name: "prefetch_total",
Help: "The number of times the cache has prefetched a cached item.", Help: "The number of times the cache has prefetched a cached item.",
}, []string{"server"}) }, []string{"server", "zones"})
// cacheDrops is the number responses that are not cached, because the reply is malformed. // cacheDrops is the number responses that are not cached, because the reply is malformed.
cacheDrops = promauto.NewCounterVec(prometheus.CounterOpts{ cacheDrops = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "drops_total", Name: "drops_total",
Help: "The number responses that are not cached, because the reply is malformed.", Help: "The number responses that are not cached, because the reply is malformed.",
}, []string{"server"}) }, []string{"server", "zones"})
// servedStale is the number of requests served from stale cache entries. // servedStale is the number of requests served from stale cache entries.
servedStale = promauto.NewCounterVec(prometheus.CounterOpts{ servedStale = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "served_stale_total", Name: "served_stale_total",
Help: "The number of requests served from stale cache entries.", Help: "The number of requests served from stale cache entries.",
}, []string{"server"}) }, []string{"server", "zones"})
// evictions is the counter of cache evictions. // evictions is the counter of cache evictions.
evictions = promauto.NewCounterVec(prometheus.CounterOpts{ evictions = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace, Namespace: plugin.Namespace,
Subsystem: "cache", Subsystem: "cache",
Name: "evictions_total", Name: "evictions_total",
Help: "The count of cache evictions.", Help: "The count of cache evictions.",
}, []string{"server", "type"}) }, []string{"server", "type", "zones"})
) )

View File

@@ -4,6 +4,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/coredns/caddy" "github.com/coredns/caddy"
@@ -185,6 +186,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
} }
ca.Zones = origins ca.Zones = origins
ca.zonesMetricLabel = strings.Join(origins, ",")
ca.pcache = cache.New(ca.pcap) ca.pcache = cache.New(ca.pcap)
ca.ncache = cache.New(ca.ncap) ca.ncache = cache.New(ca.ncap)
} }