mirror of
https://github.com/coredns/coredns.git
synced 2025-10-27 00:04:15 -04:00
middleware/metrics: cleanup (#355)
* middleware/metrics: add more metrics middleware/cache: Add metrics for number of elements in the cache. Also export the total size. Update README to detail the new metrics. middleware/metrics Move metrics into subpackage called "vars". This breaks the import cycle and is cleaner. This allows vars.Report to be used in the the dnsserver to log refused queries. middleware/metrics: tests Add tests to the metrics framework. The metrics/test subpackage allows scraping of the local server. Do a few test scrape of the metrics that are defined in the metrics middleware. This also allows metrics integration tests to check if the caching and dnssec middleware export their metrics correctly. * update README * typos * fix tests
This commit is contained in:
@@ -41,3 +41,19 @@ func GetConfig(c *caddy.Controller) *Config {
|
||||
ctx.saveConfig(c.Key, &Config{})
|
||||
return GetConfig(c)
|
||||
}
|
||||
|
||||
// GetMiddleware returns the middleware handler that has been added to the config under name.
|
||||
// This is useful to inspect if a certain middleware is active in this server.
|
||||
// Note that this is order dependent and the order is defined in directives.go, i.e. if your middleware
|
||||
// comes before the middleware you are checking; it will not be there (yet).
|
||||
func GetMiddleware(c *caddy.Controller, name string) middleware.Handler {
|
||||
// TODO(miek): calling the handler h(nil) should be a noop...
|
||||
conf := GetConfig(c)
|
||||
for _, h := range conf.Middleware {
|
||||
x := h(nil)
|
||||
if name == x.Name() {
|
||||
return x
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -10,7 +10,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/miekg/coredns/middleware"
|
||||
"github.com/miekg/coredns/middleware/metrics/vars"
|
||||
"github.com/miekg/coredns/middleware/pkg/edns"
|
||||
"github.com/miekg/coredns/middleware/pkg/rcode"
|
||||
"github.com/miekg/coredns/request"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
@@ -247,14 +249,16 @@ func (s *Server) OnStartupComplete() {
|
||||
}
|
||||
|
||||
// DefaultErrorFunc responds to an DNS request with an error.
|
||||
func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rcode int) {
|
||||
func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
|
||||
state := request.Request{W: w, Req: r}
|
||||
|
||||
answer := new(dns.Msg)
|
||||
answer.SetRcode(r, rcode)
|
||||
answer.SetRcode(r, rc)
|
||||
|
||||
state.SizeAndDo(answer)
|
||||
|
||||
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
|
||||
|
||||
w.WriteMsg(answer)
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,8 @@ TODO(miek): text here on how to hook up middleware.
|
||||
## Metrics
|
||||
|
||||
When exporting metrics the *Namespace* should be `middleware.Namespace` (="coredns"), and the
|
||||
*Subsystem* should be the name of the middleware.
|
||||
*Subsystem* should be the name of the middleware. The README.md for the middleware should then
|
||||
also contain a *Metrics* section detailing the metrics.
|
||||
|
||||
## Documentation
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/miekg/coredns/middleware"
|
||||
"github.com/miekg/coredns/middleware/file"
|
||||
"github.com/miekg/coredns/middleware/metrics"
|
||||
"github.com/miekg/coredns/request"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
@@ -20,6 +21,7 @@ type (
|
||||
Next middleware.Handler
|
||||
*Zones
|
||||
|
||||
metrics *metrics.Metrics
|
||||
loader
|
||||
}
|
||||
|
||||
@@ -97,3 +99,5 @@ func (a Auto) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i
|
||||
w.WriteMsg(m)
|
||||
return dns.RcodeSuccess, nil
|
||||
}
|
||||
|
||||
func (a Auto) Name() string { return "auto" }
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"github.com/miekg/coredns/core/dnsserver"
|
||||
"github.com/miekg/coredns/middleware"
|
||||
"github.com/miekg/coredns/middleware/file"
|
||||
"github.com/miekg/coredns/middleware/metrics"
|
||||
|
||||
"github.com/mholt/caddy"
|
||||
)
|
||||
@@ -28,10 +29,16 @@ func setup(c *caddy.Controller) error {
|
||||
return middleware.Error("auto", err)
|
||||
}
|
||||
|
||||
// If we have enabled prometheus we should add newly discovered zones to it.
|
||||
met := dnsserver.GetMiddleware(c, "prometheus")
|
||||
if met != nil {
|
||||
a.metrics = met.(*metrics.Metrics)
|
||||
}
|
||||
|
||||
walkChan := make(chan bool)
|
||||
|
||||
c.OnStartup(func() error {
|
||||
err := a.Zones.Walk(a.loader)
|
||||
err := a.Walk()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -43,7 +50,7 @@ func setup(c *caddy.Controller) error {
|
||||
case <-walkChan:
|
||||
return
|
||||
case <-ticker.C:
|
||||
a.Zones.Walk(a.loader)
|
||||
a.Walk()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -13,26 +13,26 @@ import (
|
||||
)
|
||||
|
||||
// Walk will recursively walk of the file under l.directory and adds the one that match l.re.
|
||||
func (z *Zones) Walk(l loader) error {
|
||||
func (a Auto) Walk() error {
|
||||
|
||||
// TODO(miek): should add something so that we don't stomp on each other.
|
||||
|
||||
toDelete := make(map[string]bool)
|
||||
for _, n := range z.Names() {
|
||||
for _, n := range a.Zones.Names() {
|
||||
toDelete[n] = true
|
||||
}
|
||||
|
||||
filepath.Walk(l.directory, func(path string, info os.FileInfo, err error) error {
|
||||
filepath.Walk(a.loader.directory, func(path string, info os.FileInfo, err error) error {
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
match, origin := matches(l.re, info.Name(), l.template)
|
||||
match, origin := matches(a.loader.re, info.Name(), a.loader.template)
|
||||
if !match {
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, ok := z.Z[origin]; ok {
|
||||
if _, ok := a.Zones.Z[origin]; ok {
|
||||
// we already have this zone
|
||||
toDelete[origin] = false
|
||||
return nil
|
||||
@@ -50,10 +50,14 @@ func (z *Zones) Walk(l loader) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
zo.NoReload = l.noReload
|
||||
zo.TransferTo = l.transferTo
|
||||
zo.NoReload = a.loader.noReload
|
||||
zo.TransferTo = a.loader.transferTo
|
||||
|
||||
z.Insert(zo, origin)
|
||||
a.Zones.Add(zo, origin)
|
||||
|
||||
if a.metrics != nil {
|
||||
a.metrics.AddZone(origin)
|
||||
}
|
||||
|
||||
zo.Notify()
|
||||
|
||||
@@ -68,7 +72,13 @@ func (z *Zones) Walk(l loader) error {
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
z.Delete(origin)
|
||||
|
||||
if a.metrics != nil {
|
||||
a.metrics.RemoveZone(origin)
|
||||
}
|
||||
|
||||
a.Zones.Remove(origin)
|
||||
|
||||
log.Printf("[INFO] Deleting zone `%s'", origin)
|
||||
}
|
||||
|
||||
|
||||
@@ -37,13 +37,16 @@ func TestWalk(t *testing.T) {
|
||||
template: `${1}`,
|
||||
}
|
||||
|
||||
z := &Zones{}
|
||||
a := Auto{
|
||||
loader: ldr,
|
||||
Zones: &Zones{},
|
||||
}
|
||||
|
||||
z.Walk(ldr)
|
||||
a.Walk()
|
||||
|
||||
// db.example.org and db.example.com should be here (created in createFiles)
|
||||
for _, name := range []string{"example.com.", "example.org."} {
|
||||
if _, ok := z.Z[name]; !ok {
|
||||
if _, ok := a.Zones.Z[name]; !ok {
|
||||
t.Errorf("%s should have been added", name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,15 +27,18 @@ func TestWatcher(t *testing.T) {
|
||||
template: `${1}`,
|
||||
}
|
||||
|
||||
z := &Zones{}
|
||||
a := Auto{
|
||||
loader: ldr,
|
||||
Zones: &Zones{},
|
||||
}
|
||||
|
||||
z.Walk(ldr)
|
||||
a.Walk()
|
||||
|
||||
// example.org and example.com should exist
|
||||
if x := len(z.Z["example.org."].All()); x != 4 {
|
||||
if x := len(a.Zones.Z["example.org."].All()); x != 4 {
|
||||
t.Fatalf("expected 4 RRs, got %d", x)
|
||||
}
|
||||
if x := len(z.Z["example.com."].All()); x != 4 {
|
||||
if x := len(a.Zones.Z["example.com."].All()); x != 4 {
|
||||
t.Fatalf("expected 4 RRs, got %d", x)
|
||||
}
|
||||
|
||||
@@ -44,5 +47,6 @@ func TestWatcher(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
z.Walk(ldr)
|
||||
a.Walk()
|
||||
// TODO(miek): check
|
||||
}
|
||||
|
||||
@@ -40,9 +40,9 @@ func (z *Zones) Zones(name string) *file.Zone {
|
||||
return zo
|
||||
}
|
||||
|
||||
// Insert inserts a new zone into z. If zo.NoReload is false, the
|
||||
// Add adds a new zone into z. If zo.NoReload is false, the
|
||||
// reload goroutine is started.
|
||||
func (z *Zones) Insert(zo *file.Zone, name string) {
|
||||
func (z *Zones) Add(zo *file.Zone, name string) {
|
||||
z.Lock()
|
||||
|
||||
if z.Z == nil {
|
||||
@@ -51,14 +51,13 @@ func (z *Zones) Insert(zo *file.Zone, name string) {
|
||||
|
||||
z.Z[name] = zo
|
||||
z.names = append(z.names, name)
|
||||
|
||||
zo.Reload()
|
||||
|
||||
z.Unlock()
|
||||
}
|
||||
|
||||
// Delete removes the zone named name from z. It also stop the the zone's reload goroutine.
|
||||
func (z *Zones) Delete(name string) {
|
||||
// Remove removes the zone named name from z. It also stop the the zone's reload goroutine.
|
||||
func (z *Zones) Remove(name string) {
|
||||
z.Lock()
|
||||
|
||||
if zo, ok := z.Z[name]; ok && !zo.NoReload {
|
||||
@@ -67,10 +66,11 @@ func (z *Zones) Delete(name string) {
|
||||
|
||||
delete(z.Z, name)
|
||||
|
||||
// just regenerate Names (might be bad if you have a lot of zones...)
|
||||
// TODO(miek): just regenerate Names (might be bad if you have a lot of zones...)
|
||||
z.names = []string{}
|
||||
for n := range z.Z {
|
||||
z.names = append(z.names, n)
|
||||
}
|
||||
|
||||
z.Unlock()
|
||||
}
|
||||
|
||||
14
middleware/cache/README.md
vendored
14
middleware/cache/README.md
vendored
@@ -35,24 +35,24 @@ There is a third category (`error`) but those responses are never cached.
|
||||
|
||||
The minimum TTL allowed on resource records is 5 seconds.
|
||||
|
||||
If monitoring is enabled (via the *prometheus* directive) then the following extra metrics are added:
|
||||
## Metrics
|
||||
|
||||
* coredns_cache_hit_count_total, and
|
||||
* coredns_cache_miss_count_total
|
||||
If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
|
||||
|
||||
They both work on a per-zone basis and just count the hit and miss counts for each query.
|
||||
* coredns_cache_size_guage{type} - total elements in the case, type is either "denial" or "success".
|
||||
* coredns_cache_capacity_guage{type} - total capacity of the cache, type is either "denial" or "success".
|
||||
|
||||
## Examples
|
||||
|
||||
Enable caching for all zones, but cap everything to a TTL of 10 seconds:
|
||||
|
||||
~~~
|
||||
cache 10
|
||||
~~~
|
||||
|
||||
Enable caching for all zones, but cap everything to a TTL of 10 seconds.
|
||||
Proxy to Google Public DNS and only cache responses for example.org (or below).
|
||||
|
||||
~~~
|
||||
proxy . 8.8.8.8:53
|
||||
cache example.org
|
||||
~~~
|
||||
|
||||
Proxy to Google Public DNS and only cache responses for example.org (or below).
|
||||
|
||||
9
middleware/cache/cache.go
vendored
9
middleware/cache/cache.go
vendored
@@ -79,6 +79,9 @@ func (c *ResponseWriter) WriteMsg(res *dns.Msg) error {
|
||||
|
||||
if key != "" {
|
||||
c.set(res, key, mt, duration)
|
||||
|
||||
cacheSize.WithLabelValues(Success).Set(float64(c.pcache.Len()))
|
||||
cacheSize.WithLabelValues(Denial).Set(float64(c.ncache.Len()))
|
||||
}
|
||||
|
||||
setMsgTTL(res, uint32(duration.Seconds()))
|
||||
@@ -103,7 +106,6 @@ func (c *ResponseWriter) set(m *dns.Msg, key string, mt response.Type, duration
|
||||
|
||||
case response.OtherError:
|
||||
// don't cache these
|
||||
// TODO(miek): what do we do with these?
|
||||
default:
|
||||
log.Printf("[WARNING] Caching called with unknown classification: %d", mt)
|
||||
}
|
||||
@@ -122,4 +124,9 @@ const (
|
||||
minTTL = 5 * time.Second
|
||||
|
||||
defaultCap = 10000 // default capacity of the cache.
|
||||
|
||||
// Success is the class for caching postive caching.
|
||||
Success = "success"
|
||||
// Denial is the class defined for negative caching.
|
||||
Denial = "denial"
|
||||
)
|
||||
|
||||
26
middleware/cache/handler.go
vendored
26
middleware/cache/handler.go
vendored
@@ -30,17 +30,15 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
||||
state.SizeAndDo(resp)
|
||||
w.WriteMsg(resp)
|
||||
|
||||
cacheHitCount.WithLabelValues(zone).Inc()
|
||||
|
||||
return dns.RcodeSuccess, nil
|
||||
}
|
||||
|
||||
cacheMissCount.WithLabelValues(zone).Inc()
|
||||
|
||||
crr := &ResponseWriter{w, c}
|
||||
return c.Next.ServeDNS(ctx, crr, r)
|
||||
}
|
||||
|
||||
func (c *Cache) Name() string { return "cache" }
|
||||
|
||||
func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) {
|
||||
k := rawKey(qname, qtype, do)
|
||||
|
||||
@@ -55,24 +53,24 @@ func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) {
|
||||
}
|
||||
|
||||
var (
|
||||
cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "hit_count_total",
|
||||
Help: "Counter of DNS requests that were found in the cache.",
|
||||
}, []string{"zone"})
|
||||
Name: "size_guage",
|
||||
Help: "Gauge of number of elements in the cache.",
|
||||
}, []string{"type"})
|
||||
|
||||
cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "miss_count_total",
|
||||
Help: "Counter of DNS requests that were not found in the cache.",
|
||||
}, []string{"zone"})
|
||||
Name: "capacity_gauge",
|
||||
Help: "Gauge of cache's capacity.",
|
||||
}, []string{"type"})
|
||||
)
|
||||
|
||||
const subsystem = "cache"
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(cacheHitCount)
|
||||
prometheus.MustRegister(cacheMissCount)
|
||||
prometheus.MustRegister(cacheSize)
|
||||
prometheus.MustRegister(cacheCapacity)
|
||||
}
|
||||
|
||||
8
middleware/cache/setup.go
vendored
8
middleware/cache/setup.go
vendored
@@ -28,6 +28,10 @@ func setup(c *caddy.Controller) error {
|
||||
return ca
|
||||
})
|
||||
|
||||
// Export the capacity for the metrics. This only happens once, because this is a re-load change only.
|
||||
cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap))
|
||||
cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -58,7 +62,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
||||
for c.NextBlock() {
|
||||
switch c.Val() {
|
||||
// first number is cap, second is an new ttl
|
||||
case "success":
|
||||
case Success:
|
||||
args := c.RemainingArgs()
|
||||
if len(args) == 0 {
|
||||
return nil, c.ArgErr()
|
||||
@@ -75,7 +79,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
|
||||
}
|
||||
ca.pttl = time.Duration(pttl) * time.Second
|
||||
}
|
||||
case "denial":
|
||||
case Denial:
|
||||
args := c.RemainingArgs()
|
||||
if len(args) == 0 {
|
||||
return nil, c.ArgErr()
|
||||
|
||||
@@ -51,6 +51,8 @@ func (c Chaos) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (c Chaos) Name() string { return "chaos" }
|
||||
|
||||
func trim(s string) string {
|
||||
if len(s) < 256 {
|
||||
return s
|
||||
|
||||
@@ -34,9 +34,14 @@ dnssec [ZONES... ] {
|
||||
will be signed with all keys. Generating a key can be done with `dnssec-keygen`: `dnssec-keygen -a
|
||||
ECDSAP256SHA256 <zonename>`. A key created for zone *A* can be safely used for zone *B*.
|
||||
|
||||
|
||||
* `cache_capacity` indicates the capacity of the LRU cache. The dnssec middleware uses LRU cache to manage
|
||||
objects and the default capacity is 10000.
|
||||
|
||||
## Metrics
|
||||
|
||||
If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
|
||||
|
||||
* coredns_dnssec_size_guage{type} - total elements in the cache, type is "signature".
|
||||
* coredns_dnssec_capacity_guage{type} - total capacity of the cache, type is "signature".
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -40,24 +40,26 @@ func (d Dnssec) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
||||
}
|
||||
|
||||
var (
|
||||
cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "hit_count_total",
|
||||
Help: "Counter of signatures that were found in the cache.",
|
||||
}, []string{"zone"})
|
||||
Name: "size_guage",
|
||||
Help: "Gauge of number of elements in the cache.",
|
||||
}, []string{"type"})
|
||||
|
||||
cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "miss_count_total",
|
||||
Help: "Counter of signatures that were not found in the cache.",
|
||||
}, []string{"zone"})
|
||||
Name: "capacity_gauge",
|
||||
Help: "Gauge of cache's capacity.",
|
||||
}, []string{"type"})
|
||||
)
|
||||
|
||||
func (d Dnssec) Name() string { return "dnssec" }
|
||||
|
||||
const subsystem = "dnssec"
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(cacheHitCount)
|
||||
prometheus.MustRegister(cacheMissCount)
|
||||
prometheus.MustRegister(cacheSize)
|
||||
prometheus.MustRegister(cacheCapacity)
|
||||
}
|
||||
|
||||
@@ -30,6 +30,8 @@ func (d *ResponseWriter) WriteMsg(res *dns.Msg) error {
|
||||
|
||||
if state.Do() {
|
||||
res = d.d.Sign(state, zone, time.Now().UTC())
|
||||
|
||||
cacheSize.WithLabelValues("signature").Set(float64(d.d.cache.Len()))
|
||||
}
|
||||
state.SizeAndDo(res)
|
||||
|
||||
|
||||
@@ -32,6 +32,9 @@ func setup(c *caddy.Controller) error {
|
||||
return New(zones, keys, next, cache)
|
||||
})
|
||||
|
||||
// Export the capacity for the metrics. This only happens once, because this is a re-load change only.
|
||||
cacheCapacity.WithLabelValues("signature").Set(float64(capacity))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -48,6 +48,8 @@ func (h errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns
|
||||
return rcode, err
|
||||
}
|
||||
|
||||
func (h errorHandler) Name() string { return "errors" }
|
||||
|
||||
func (h errorHandler) recovery(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) {
|
||||
rec := recover()
|
||||
if rec == nil {
|
||||
|
||||
@@ -117,6 +117,8 @@ func (e *Etcd) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
|
||||
return dns.RcodeSuccess, nil
|
||||
}
|
||||
|
||||
func (e *Etcd) Name() string { return "etcd" }
|
||||
|
||||
// Err write an error response to the client.
|
||||
func (e *Etcd) Err(zone string, rcode int, state request.Request, debug []msg.Service, err error, opt Options) (int, error) {
|
||||
m := new(dns.Msg)
|
||||
|
||||
@@ -110,6 +110,8 @@ func (f File) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i
|
||||
return dns.RcodeSuccess, nil
|
||||
}
|
||||
|
||||
func (f File) Name() string { return "file" }
|
||||
|
||||
// Parse parses the zone in filename and returns a new Zone or an error.
|
||||
func Parse(f io.Reader, origin, fileName string) (*Zone, error) {
|
||||
tokens := dns.ParseZone(f, dns.Fqdn(origin), fileName)
|
||||
|
||||
@@ -101,6 +101,8 @@ func (k Kubernetes) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.M
|
||||
return dns.RcodeSuccess, nil
|
||||
}
|
||||
|
||||
func (k Kubernetes) Name() string { return "kubernetes" }
|
||||
|
||||
// Err writes an error response back to the client.
|
||||
func (k Kubernetes) Err(zone string, rcode int, state request.Request) (int, error) {
|
||||
m := new(dns.Msg)
|
||||
|
||||
@@ -18,3 +18,5 @@ func (rr RoundRobin) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.
|
||||
wrr := &RoundRobinResponseWriter{w}
|
||||
return rr.Next.ServeDNS(ctx, wrr, r)
|
||||
}
|
||||
|
||||
func (rr RoundRobin) Name() string { return "loadbalance" }
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/miekg/coredns/middleware"
|
||||
"github.com/miekg/coredns/middleware/metrics"
|
||||
"github.com/miekg/coredns/middleware/metrics/vars"
|
||||
"github.com/miekg/coredns/middleware/pkg/dnsrecorder"
|
||||
"github.com/miekg/coredns/middleware/pkg/rcode"
|
||||
"github.com/miekg/coredns/middleware/pkg/replacer"
|
||||
@@ -45,7 +45,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
||||
answer.SetRcode(r, rc)
|
||||
state.SizeAndDo(answer)
|
||||
|
||||
metrics.Report(state, metrics.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
|
||||
vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
|
||||
|
||||
w.WriteMsg(answer)
|
||||
}
|
||||
@@ -64,6 +64,8 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
||||
return l.Next.ServeDNS(ctx, w, r)
|
||||
}
|
||||
|
||||
func (l Logger) Name() string { return "log" }
|
||||
|
||||
// Rule configures the logging middleware.
|
||||
type Rule struct {
|
||||
NameScope string
|
||||
|
||||
@@ -7,12 +7,10 @@ The following metrics are exported:
|
||||
|
||||
* coredns_dns_request_count_total{zone, proto, family}
|
||||
* coredns_dns_request_duration_milliseconds{zone}
|
||||
* coredns_dns_request_size_bytes{zone,, proto}
|
||||
* coredns_dns_request_transfer_size_bytes{zone,, proto}
|
||||
* coredns_dns_request_size_bytes{zone, proto}
|
||||
* coredns_dns_request_do_count_total{zone}
|
||||
* coredns_dns_request_type_count_total{zone, type}
|
||||
* coredns_dns_response_size_bytes{zone, proto}
|
||||
* coredns_dns_response_transfer_size_bytes{zone, proto}
|
||||
* coredns_dns_response_rcode_count_total{zone, rcode}
|
||||
|
||||
Each counter has a label `zone` which is the zonename used for the request/response.
|
||||
@@ -27,10 +25,7 @@ Extra labels used are:
|
||||
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
|
||||
|
||||
If monitoring is enabled, queries that do not enter the middleware chain are exported under the fake
|
||||
domain "dropped" (without a closing dot).
|
||||
|
||||
Restarting CoreDNS will stop the monitoring. This is a bug. Also [this upstream
|
||||
Caddy bug](https://github.com/mholt/caddy/issues/675).
|
||||
name "dropped" (without a closing dot - this is never a valid domain name).
|
||||
|
||||
## Syntax
|
||||
|
||||
@@ -44,3 +39,9 @@ It optionally takes an address to which the metrics are exported; the default
|
||||
is `localhost:9153`. The metrics path is fixed to `/metrics`.
|
||||
|
||||
## Examples
|
||||
|
||||
Use an alternative address:
|
||||
|
||||
~~~
|
||||
prometheus localhost:9253
|
||||
~~~
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/miekg/coredns/middleware"
|
||||
"github.com/miekg/coredns/middleware/metrics/vars"
|
||||
"github.com/miekg/coredns/middleware/pkg/dnsrecorder"
|
||||
"github.com/miekg/coredns/middleware/pkg/rcode"
|
||||
"github.com/miekg/coredns/request"
|
||||
@@ -17,7 +16,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
state := request.Request{W: w, Req: r}
|
||||
|
||||
qname := state.QName()
|
||||
zone := middleware.Zones(m.ZoneNames).Matches(qname)
|
||||
zone := middleware.Zones(m.ZoneNames()).Matches(qname)
|
||||
if zone == "" {
|
||||
zone = "."
|
||||
}
|
||||
@@ -26,71 +25,9 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
rw := dnsrecorder.New(w)
|
||||
status, err := m.Next.ServeDNS(ctx, rw, r)
|
||||
|
||||
Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start)
|
||||
vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start)
|
||||
|
||||
return status, err
|
||||
}
|
||||
|
||||
// Report is a plain reporting function that the server can use for REFUSED and other
|
||||
// queries that are turned down because they don't match any middleware.
|
||||
func Report(req request.Request, zone, rcode string, size int, start time.Time) {
|
||||
if requestCount == nil {
|
||||
// no metrics are enabled
|
||||
return
|
||||
}
|
||||
|
||||
// Proto and Family
|
||||
net := req.Proto()
|
||||
fam := "1"
|
||||
if req.Family() == 2 {
|
||||
fam = "2"
|
||||
}
|
||||
|
||||
typ := req.QType()
|
||||
|
||||
requestCount.WithLabelValues(zone, net, fam).Inc()
|
||||
requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
|
||||
|
||||
if req.Do() {
|
||||
requestDo.WithLabelValues(zone).Inc()
|
||||
}
|
||||
|
||||
if _, known := monitorType[typ]; known {
|
||||
requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
|
||||
} else {
|
||||
requestType.WithLabelValues(zone, other).Inc()
|
||||
}
|
||||
|
||||
if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
|
||||
responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
|
||||
requestTransferSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
|
||||
} else {
|
||||
responseSize.WithLabelValues(zone, net).Observe(float64(size))
|
||||
requestSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
|
||||
}
|
||||
|
||||
responseRcode.WithLabelValues(zone, rcode).Inc()
|
||||
}
|
||||
|
||||
var monitorType = map[uint16]bool{
|
||||
dns.TypeAAAA: true,
|
||||
dns.TypeA: true,
|
||||
dns.TypeCNAME: true,
|
||||
dns.TypeDNSKEY: true,
|
||||
dns.TypeDS: true,
|
||||
dns.TypeMX: true,
|
||||
dns.TypeNSEC3: true,
|
||||
dns.TypeNSEC: true,
|
||||
dns.TypeNS: true,
|
||||
dns.TypePTR: true,
|
||||
dns.TypeRRSIG: true,
|
||||
dns.TypeSOA: true,
|
||||
dns.TypeSRV: true,
|
||||
dns.TypeTXT: true,
|
||||
// Meta Qtypes
|
||||
dns.TypeIXFR: true,
|
||||
dns.TypeAXFR: true,
|
||||
dns.TypeANY: true,
|
||||
}
|
||||
|
||||
const other = "other"
|
||||
func (m *Metrics) Name() string { return "prometheus" }
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// Package metrics implement a handler and middleware that provides Prometheus
|
||||
// metrics.
|
||||
// Package metrics implement a handler and middleware that provides Prometheus metrics.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
@@ -9,37 +8,51 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/miekg/coredns/middleware"
|
||||
"github.com/miekg/coredns/middleware/metrics/vars"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
requestCount *prometheus.CounterVec
|
||||
requestDuration *prometheus.HistogramVec
|
||||
requestSize *prometheus.HistogramVec
|
||||
requestTransferSize *prometheus.HistogramVec
|
||||
requestDo *prometheus.CounterVec
|
||||
requestType *prometheus.CounterVec
|
||||
|
||||
responseSize *prometheus.HistogramVec
|
||||
responseTransferSize *prometheus.HistogramVec
|
||||
responseRcode *prometheus.CounterVec
|
||||
)
|
||||
|
||||
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
|
||||
type Metrics struct {
|
||||
Next middleware.Handler
|
||||
Addr string
|
||||
ln net.Listener
|
||||
mux *http.ServeMux
|
||||
Once sync.Once
|
||||
ZoneNames []string
|
||||
Next middleware.Handler
|
||||
Addr string
|
||||
ln net.Listener
|
||||
mux *http.ServeMux
|
||||
Once sync.Once
|
||||
|
||||
zoneNames []string
|
||||
zoneMap map[string]bool
|
||||
zoneMu sync.RWMutex
|
||||
}
|
||||
|
||||
// AddZone adds zone z to m.
|
||||
func (m *Metrics) AddZone(z string) {
|
||||
m.zoneMu.Lock()
|
||||
m.zoneMap[z] = true
|
||||
m.zoneNames = keys(m.zoneMap)
|
||||
m.zoneMu.Unlock()
|
||||
}
|
||||
|
||||
// RemoveZone remove zone z from m.
|
||||
func (m *Metrics) RemoveZone(z string) {
|
||||
m.zoneMu.Lock()
|
||||
delete(m.zoneMap, z)
|
||||
m.zoneNames = keys(m.zoneMap)
|
||||
m.zoneMu.Unlock()
|
||||
}
|
||||
|
||||
// ZoneNames returns the zones of m.
|
||||
func (m *Metrics) ZoneNames() []string {
|
||||
m.zoneMu.RLock()
|
||||
s := m.zoneNames
|
||||
m.zoneMu.RUnlock()
|
||||
return s
|
||||
}
|
||||
|
||||
// OnStartup sets up the metrics on startup.
|
||||
func (m *Metrics) OnStartup() error {
|
||||
m.Once.Do(func() {
|
||||
define()
|
||||
|
||||
ln, err := net.Listen("tcp", m.Addr)
|
||||
if err != nil {
|
||||
@@ -51,18 +64,16 @@ func (m *Metrics) OnStartup() error {
|
||||
|
||||
m.mux = http.NewServeMux()
|
||||
|
||||
prometheus.MustRegister(requestCount)
|
||||
prometheus.MustRegister(requestDuration)
|
||||
prometheus.MustRegister(requestSize)
|
||||
prometheus.MustRegister(requestTransferSize)
|
||||
prometheus.MustRegister(requestDo)
|
||||
prometheus.MustRegister(requestType)
|
||||
prometheus.MustRegister(vars.RequestCount)
|
||||
prometheus.MustRegister(vars.RequestDuration)
|
||||
prometheus.MustRegister(vars.RequestSize)
|
||||
prometheus.MustRegister(vars.RequestDo)
|
||||
prometheus.MustRegister(vars.RequestType)
|
||||
|
||||
prometheus.MustRegister(responseSize)
|
||||
prometheus.MustRegister(responseTransferSize)
|
||||
prometheus.MustRegister(responseRcode)
|
||||
prometheus.MustRegister(vars.ResponseSize)
|
||||
prometheus.MustRegister(vars.ResponseRcode)
|
||||
|
||||
m.mux.Handle(path, prometheus.Handler())
|
||||
m.mux.Handle("/metrics", prometheus.Handler())
|
||||
|
||||
go func() {
|
||||
http.Serve(m.ln, m.mux)
|
||||
@@ -79,79 +90,10 @@ func (m *Metrics) OnShutdown() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func define() {
|
||||
requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_count_total",
|
||||
Help: "Counter of DNS requests made per zone, protocol and family.",
|
||||
}, []string{"zone", "proto", "family"})
|
||||
|
||||
requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_duration_milliseconds",
|
||||
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
|
||||
Help: "Histogram of the time (in milliseconds) each request took.",
|
||||
}, []string{"zone"})
|
||||
|
||||
requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_size_bytes",
|
||||
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_transfer_size_bytes",
|
||||
Help: "Size of the incoming zone transfer in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_do_count_total",
|
||||
Help: "Counter of DNS requests with DO bit set per zone.",
|
||||
}, []string{"zone"})
|
||||
|
||||
requestType = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_type_count_total",
|
||||
Help: "Counter of DNS requests per type, per zone.",
|
||||
}, []string{"zone", "type"})
|
||||
|
||||
responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_size_bytes",
|
||||
Help: "Size of the returned response in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_transfer_size_bytes",
|
||||
Help: "Size of the returned zone transfer in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
responseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_rcode_count_total",
|
||||
Help: "Counter of response status codes.",
|
||||
}, []string{"zone", "rcode"})
|
||||
func keys(m map[string]bool) []string {
|
||||
sx := []string{}
|
||||
for k := range m {
|
||||
sx = append(sx, k)
|
||||
}
|
||||
return sx
|
||||
}
|
||||
|
||||
const (
|
||||
// Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
|
||||
Dropped = "dropped"
|
||||
subsystem = "dns"
|
||||
path = "/metrics"
|
||||
)
|
||||
|
||||
83
middleware/metrics/metrics_test.go
Normal file
83
middleware/metrics/metrics_test.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/miekg/coredns/middleware"
|
||||
mtest "github.com/miekg/coredns/middleware/metrics/test"
|
||||
"github.com/miekg/coredns/middleware/pkg/dnsrecorder"
|
||||
"github.com/miekg/coredns/middleware/test"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
func TestMetrics(t *testing.T) {
|
||||
met := &Metrics{Addr: Addr, zoneMap: make(map[string]bool)}
|
||||
if err := met.OnStartup(); err != nil {
|
||||
t.Fatalf("Failed to start metrics handler: %s", err)
|
||||
}
|
||||
defer met.OnShutdown()
|
||||
|
||||
met.AddZone("example.org.")
|
||||
|
||||
tests := []struct {
|
||||
next middleware.Handler
|
||||
qname string
|
||||
qtype uint16
|
||||
metric string
|
||||
expectedValue string
|
||||
}{
|
||||
// This all works because 1 bucket (1 zone, 1 type)
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_request_count_total",
|
||||
expectedValue: "1",
|
||||
},
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_request_count_total",
|
||||
expectedValue: "2",
|
||||
},
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_request_type_count_total",
|
||||
expectedValue: "3",
|
||||
},
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_response_rcode_count_total",
|
||||
expectedValue: "4",
|
||||
},
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
|
||||
for i, tc := range tests {
|
||||
req := new(dns.Msg)
|
||||
if tc.qtype == 0 {
|
||||
tc.qtype = dns.TypeA
|
||||
}
|
||||
req.SetQuestion(dns.Fqdn(tc.qname), tc.qtype)
|
||||
met.Next = tc.next
|
||||
|
||||
rec := dnsrecorder.New(&test.ResponseWriter{})
|
||||
_, err := met.ServeDNS(ctx, rec, req)
|
||||
if err != nil {
|
||||
t.Fatalf("Test %d: Expected no error, but got %s", i, err)
|
||||
}
|
||||
|
||||
result := mtest.Scrape(t, "http://"+Addr+"/metrics")
|
||||
|
||||
if tc.expectedValue != "" {
|
||||
got, _ := mtest.MetricValue(tc.metric, result)
|
||||
if got != tc.expectedValue {
|
||||
t.Errorf("Test %d: Expected value %s for metrics %s, but got %s", i, tc.expectedValue, tc.metric, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -38,18 +38,17 @@ func setup(c *caddy.Controller) error {
|
||||
|
||||
func prometheusParse(c *caddy.Controller) (*Metrics, error) {
|
||||
var (
|
||||
met = &Metrics{Addr: addr}
|
||||
met = &Metrics{Addr: Addr, zoneMap: make(map[string]bool)}
|
||||
err error
|
||||
)
|
||||
|
||||
for c.Next() {
|
||||
if len(met.ZoneNames) > 0 {
|
||||
return met, c.Err("metrics: can only have one metrics module per server")
|
||||
if len(met.ZoneNames()) > 0 {
|
||||
return met, c.Err("can only have one metrics module per server")
|
||||
}
|
||||
met.ZoneNames = make([]string, len(c.ServerBlockKeys))
|
||||
copy(met.ZoneNames, c.ServerBlockKeys)
|
||||
for i := range met.ZoneNames {
|
||||
met.ZoneNames[i] = middleware.Host(met.ZoneNames[i]).Normalize()
|
||||
|
||||
for _, z := range c.ServerBlockKeys {
|
||||
met.AddZone(middleware.Host(z).Normalize())
|
||||
}
|
||||
args := c.RemainingArgs()
|
||||
|
||||
@@ -78,7 +77,7 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) {
|
||||
return met, e
|
||||
}
|
||||
default:
|
||||
return met, c.Errf("metrics: unknown item: %s", c.Val())
|
||||
return met, c.Errf("unknown item: %s", c.Val())
|
||||
}
|
||||
|
||||
}
|
||||
@@ -88,4 +87,4 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) {
|
||||
|
||||
var metricsOnce sync.Once
|
||||
|
||||
const addr = "localhost:9153"
|
||||
const Addr = "localhost:9153"
|
||||
|
||||
225
middleware/metrics/test/scrape.go
Normal file
225
middleware/metrics/test/scrape.go
Normal file
@@ -0,0 +1,225 @@
|
||||
// Adapted by Miek Gieben for CoreDNS testing.
|
||||
//
|
||||
// License from prom2json
|
||||
// Copyright 2014 Prometheus Team
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package test will scrape a target and you can inspect the variables.
|
||||
// Basic usage:
|
||||
//
|
||||
// result := Scrape("http://localhost:9153/metrics")
|
||||
// v := MetricValue("coredns_cache_capacity_gauge", result)
|
||||
//
|
||||
package test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"mime"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/matttproud/golang_protobuf_extensions/pbutil"
|
||||
"github.com/prometheus/common/expfmt"
|
||||
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
)
|
||||
|
||||
type (
|
||||
// MetricFamily holds a prometheus metric.
|
||||
MetricFamily struct {
|
||||
Name string `json:"name"`
|
||||
Help string `json:"help"`
|
||||
Type string `json:"type"`
|
||||
Metrics []interface{} `json:"metrics,omitempty"` // Either metric or summary.
|
||||
}
|
||||
|
||||
// metric is for all "single value" metrics.
|
||||
metric struct {
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
summary struct {
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Quantiles map[string]string `json:"quantiles,omitempty"`
|
||||
Count string `json:"count"`
|
||||
Sum string `json:"sum"`
|
||||
}
|
||||
|
||||
histogram struct {
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Buckets map[string]string `json:"buckets,omitempty"`
|
||||
Count string `json:"count"`
|
||||
Sum string `json:"sum"`
|
||||
}
|
||||
)
|
||||
|
||||
// Scrape returns the all the vars a []*metricFamily.
|
||||
func Scrape(t *testing.T, url string) []*MetricFamily {
|
||||
mfChan := make(chan *dto.MetricFamily, 1024)
|
||||
|
||||
go fetchMetricFamilies(t, url, mfChan)
|
||||
|
||||
result := []*MetricFamily{}
|
||||
for mf := range mfChan {
|
||||
result = append(result, newMetricFamily(mf))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// MetricValue returns the value associated with name as a string as well as the labels.
|
||||
// It only returns the first metrics of the slice.
|
||||
func MetricValue(name string, mfs []*MetricFamily) (string, map[string]string) {
|
||||
for _, mf := range mfs {
|
||||
if mf.Name == name {
|
||||
// Only works with Gauge and Counter...
|
||||
return mf.Metrics[0].(metric).Value, mf.Metrics[0].(metric).Labels
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// MetricValueLabel returns the value for name *and* label *value*.
|
||||
func MetricValueLabel(name, label string, mfs []*MetricFamily) (string, map[string]string) {
|
||||
// bit hacky is this really handy...?
|
||||
for _, mf := range mfs {
|
||||
if mf.Name == name {
|
||||
for _, m := range mf.Metrics {
|
||||
for _, v := range m.(metric).Labels {
|
||||
if v == label {
|
||||
return m.(metric).Value, m.(metric).Labels
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func newMetricFamily(dtoMF *dto.MetricFamily) *MetricFamily {
|
||||
mf := &MetricFamily{
|
||||
Name: dtoMF.GetName(),
|
||||
Help: dtoMF.GetHelp(),
|
||||
Type: dtoMF.GetType().String(),
|
||||
Metrics: make([]interface{}, len(dtoMF.Metric)),
|
||||
}
|
||||
for i, m := range dtoMF.Metric {
|
||||
if dtoMF.GetType() == dto.MetricType_SUMMARY {
|
||||
mf.Metrics[i] = summary{
|
||||
Labels: makeLabels(m),
|
||||
Quantiles: makeQuantiles(m),
|
||||
Count: fmt.Sprint(m.GetSummary().GetSampleCount()),
|
||||
Sum: fmt.Sprint(m.GetSummary().GetSampleSum()),
|
||||
}
|
||||
} else if dtoMF.GetType() == dto.MetricType_HISTOGRAM {
|
||||
mf.Metrics[i] = histogram{
|
||||
Labels: makeLabels(m),
|
||||
Buckets: makeBuckets(m),
|
||||
Count: fmt.Sprint(m.GetHistogram().GetSampleCount()),
|
||||
Sum: fmt.Sprint(m.GetSummary().GetSampleSum()),
|
||||
}
|
||||
} else {
|
||||
mf.Metrics[i] = metric{
|
||||
Labels: makeLabels(m),
|
||||
Value: fmt.Sprint(value(m)),
|
||||
}
|
||||
}
|
||||
}
|
||||
return mf
|
||||
}
|
||||
|
||||
func value(m *dto.Metric) float64 {
|
||||
if m.Gauge != nil {
|
||||
return m.GetGauge().GetValue()
|
||||
}
|
||||
if m.Counter != nil {
|
||||
return m.GetCounter().GetValue()
|
||||
}
|
||||
if m.Untyped != nil {
|
||||
return m.GetUntyped().GetValue()
|
||||
}
|
||||
return 0.
|
||||
}
|
||||
|
||||
func makeLabels(m *dto.Metric) map[string]string {
|
||||
result := map[string]string{}
|
||||
for _, lp := range m.Label {
|
||||
result[lp.GetName()] = lp.GetValue()
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func makeQuantiles(m *dto.Metric) map[string]string {
|
||||
result := map[string]string{}
|
||||
for _, q := range m.GetSummary().Quantile {
|
||||
result[fmt.Sprint(q.GetQuantile())] = fmt.Sprint(q.GetValue())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func makeBuckets(m *dto.Metric) map[string]string {
|
||||
result := map[string]string{}
|
||||
for _, b := range m.GetHistogram().Bucket {
|
||||
result[fmt.Sprint(b.GetUpperBound())] = fmt.Sprint(b.GetCumulativeCount())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func fetchMetricFamilies(t *testing.T, url string, ch chan<- *dto.MetricFamily) {
|
||||
defer close(ch)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("creating GET request for URL %q failed: %s", url, err)
|
||||
}
|
||||
req.Header.Add("Accept", acceptHeader)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.Fatalf("executing GET request for URL %q failed: %s", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("GET request for URL %q returned HTTP status %s", url, resp.Status)
|
||||
}
|
||||
|
||||
mediatype, params, err := mime.ParseMediaType(resp.Header.Get("Content-Type"))
|
||||
if err == nil && mediatype == "application/vnd.google.protobuf" &&
|
||||
params["encoding"] == "delimited" &&
|
||||
params["proto"] == "io.prometheus.client.MetricFamily" {
|
||||
for {
|
||||
mf := &dto.MetricFamily{}
|
||||
if _, err = pbutil.ReadDelimited(resp.Body, mf); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatalf("reading metric family protocol buffer failed: %s", err)
|
||||
}
|
||||
ch <- mf
|
||||
}
|
||||
} else {
|
||||
// We could do further content-type checks here, but the
|
||||
// fallback for now will anyway be the text format
|
||||
// version 0.0.4, so just go for it and see if it works.
|
||||
var parser expfmt.TextParser
|
||||
metricFamilies, err := parser.TextToMetricFamilies(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatal("reading text format failed:", err)
|
||||
}
|
||||
for _, mf := range metricFamilies {
|
||||
ch <- mf
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3`
|
||||
62
middleware/metrics/vars/report.go
Normal file
62
middleware/metrics/vars/report.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package vars
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/miekg/coredns/request"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
// Report reports the metrics data associcated with request.
|
||||
func Report(req request.Request, zone, rcode string, size int, start time.Time) {
|
||||
// Proto and Family
|
||||
net := req.Proto()
|
||||
fam := "1"
|
||||
if req.Family() == 2 {
|
||||
fam = "2"
|
||||
}
|
||||
|
||||
typ := req.QType()
|
||||
|
||||
RequestCount.WithLabelValues(zone, net, fam).Inc()
|
||||
RequestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
|
||||
|
||||
if req.Do() {
|
||||
RequestDo.WithLabelValues(zone).Inc()
|
||||
}
|
||||
|
||||
if _, known := monitorType[typ]; known {
|
||||
RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
|
||||
} else {
|
||||
RequestType.WithLabelValues(zone, other).Inc()
|
||||
}
|
||||
|
||||
ResponseSize.WithLabelValues(zone, net).Observe(float64(size))
|
||||
RequestSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
|
||||
|
||||
ResponseRcode.WithLabelValues(zone, rcode).Inc()
|
||||
}
|
||||
|
||||
var monitorType = map[uint16]bool{
|
||||
dns.TypeAAAA: true,
|
||||
dns.TypeA: true,
|
||||
dns.TypeCNAME: true,
|
||||
dns.TypeDNSKEY: true,
|
||||
dns.TypeDS: true,
|
||||
dns.TypeMX: true,
|
||||
dns.TypeNSEC3: true,
|
||||
dns.TypeNSEC: true,
|
||||
dns.TypeNS: true,
|
||||
dns.TypePTR: true,
|
||||
dns.TypeRRSIG: true,
|
||||
dns.TypeSOA: true,
|
||||
dns.TypeSRV: true,
|
||||
dns.TypeTXT: true,
|
||||
// Meta Qtypes
|
||||
dns.TypeIXFR: true,
|
||||
dns.TypeAXFR: true,
|
||||
dns.TypeANY: true,
|
||||
}
|
||||
|
||||
const other = "other"
|
||||
68
middleware/metrics/vars/vars.go
Normal file
68
middleware/metrics/vars/vars.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package vars
|
||||
|
||||
import (
|
||||
"github.com/miekg/coredns/middleware"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_count_total",
|
||||
Help: "Counter of DNS requests made per zone, protocol and family.",
|
||||
}, []string{"zone", "proto", "family"})
|
||||
|
||||
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_duration_milliseconds",
|
||||
Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...),
|
||||
Help: "Histogram of the time (in milliseconds) each request took.",
|
||||
}, []string{"zone"})
|
||||
|
||||
RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_size_bytes",
|
||||
Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_do_count_total",
|
||||
Help: "Counter of DNS requests with DO bit set per zone.",
|
||||
}, []string{"zone"})
|
||||
|
||||
RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_type_count_total",
|
||||
Help: "Counter of DNS requests per type, per zone.",
|
||||
}, []string{"zone", "type"})
|
||||
|
||||
ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_size_bytes",
|
||||
Help: "Size of the returned response in bytes.",
|
||||
Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
|
||||
}, []string{"zone", "proto"})
|
||||
|
||||
ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: middleware.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_rcode_count_total",
|
||||
Help: "Counter of response status codes.",
|
||||
}, []string{"zone", "rcode"})
|
||||
)
|
||||
|
||||
const (
|
||||
subsystem = "dns"
|
||||
|
||||
// Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
|
||||
Dropped = "dropped"
|
||||
)
|
||||
@@ -45,6 +45,7 @@ type (
|
||||
// chain by returning them unchanged.
|
||||
Handler interface {
|
||||
ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error)
|
||||
Name() string
|
||||
}
|
||||
|
||||
// HandlerFunc is a convenience type like dns.HandlerFunc, except
|
||||
@@ -58,6 +59,8 @@ func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.
|
||||
return f(ctx, w, r)
|
||||
}
|
||||
|
||||
func (f HandlerFunc) Name() string { return "handlerfunc" }
|
||||
|
||||
// Error returns err with 'middleware/name: ' prefixed to it.
|
||||
func Error(name string, err error) error { return fmt.Errorf("%s/%s: %s", "middleware", name, err) }
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ func (n Name) Normalize() string { return strings.ToLower(dns.Fqdn(string(n))) }
|
||||
type (
|
||||
// Host represents a host from the Corefile, may contain port.
|
||||
Host string // Host represents a host from the Corefile, may contain port.
|
||||
// Addr resprents an address in the Corefile.
|
||||
// Addr represents an address in the Corefile.
|
||||
Addr string // Addr resprents an address in the Corefile.
|
||||
)
|
||||
|
||||
|
||||
@@ -102,5 +102,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
|
||||
return p.Next.ServeDNS(ctx, w, r)
|
||||
}
|
||||
|
||||
func (p Proxy) Name() string { return "proxy" }
|
||||
|
||||
// defaultTimeout is the default networking timeout for DNS requests.
|
||||
const defaultTimeout = 5 * time.Second
|
||||
|
||||
@@ -52,6 +52,8 @@ func (rw Rewrite) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
return rw.Next.ServeDNS(ctx, w, r)
|
||||
}
|
||||
|
||||
func (rw Rewrite) Name() string { return "rewrite" }
|
||||
|
||||
// Rule describes an internal location rewrite rule.
|
||||
type Rule interface {
|
||||
// Rewrite rewrites the internal location of the current request.
|
||||
|
||||
@@ -283,6 +283,7 @@ type (
|
||||
// Handler interface defines a middleware.
|
||||
Handler interface {
|
||||
ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error)
|
||||
Name() string
|
||||
}
|
||||
)
|
||||
|
||||
@@ -290,3 +291,5 @@ type (
|
||||
func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
|
||||
return f(ctx, w, r)
|
||||
}
|
||||
|
||||
func (f HandlerFunc) Name() string { return "handlerfunc" }
|
||||
|
||||
@@ -55,3 +55,5 @@ func (wh Whoami) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (wh Whoami) Name() string { return "whoami" }
|
||||
|
||||
@@ -1,12 +1,31 @@
|
||||
package test
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/miekg/coredns/middleware/cache"
|
||||
"github.com/miekg/coredns/middleware/metrics"
|
||||
mtest "github.com/miekg/coredns/middleware/metrics/test"
|
||||
"github.com/miekg/coredns/middleware/metrics/vars"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
// Start test server that has metrics enabled. Then tear it down again.
|
||||
func TestMetricsServer(t *testing.T) {
|
||||
corefile := `.:0 {
|
||||
corefile := `example.org:0 {
|
||||
chaos CoreDNS-001 miek@miek.nl
|
||||
prometheus localhost:0
|
||||
prometheus
|
||||
}
|
||||
|
||||
example.com:0 {
|
||||
proxy . 8.8.4.4:53
|
||||
prometheus
|
||||
}
|
||||
`
|
||||
srv, err := CoreDNSServer(corefile)
|
||||
@@ -15,3 +34,140 @@ func TestMetricsServer(t *testing.T) {
|
||||
}
|
||||
defer srv.Stop()
|
||||
}
|
||||
|
||||
func TestMetricsRefused(t *testing.T) {
|
||||
metricName := "coredns_dns_response_rcode_count_total"
|
||||
|
||||
corefile := `example.org:0 {
|
||||
proxy . 8.8.8.8:53
|
||||
prometheus
|
||||
}
|
||||
`
|
||||
srv, err := CoreDNSServer(corefile)
|
||||
if err != nil {
|
||||
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
|
||||
}
|
||||
defer srv.Stop()
|
||||
|
||||
udp, _ := CoreDNSServerPorts(srv, 0)
|
||||
|
||||
m := new(dns.Msg)
|
||||
m.SetQuestion("google.com.", dns.TypeA)
|
||||
|
||||
if _, err = dns.Exchange(m, udp); err != nil {
|
||||
t.Fatalf("Could not send message: %s", err)
|
||||
}
|
||||
|
||||
data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
|
||||
got, labels := mtest.MetricValue(metricName, data)
|
||||
|
||||
if got != "1" {
|
||||
t.Errorf("Expected value %s for refused, but got %s", "1", got)
|
||||
}
|
||||
if labels["zone"] != vars.Dropped {
|
||||
t.Errorf("Expected zone value %s for refused, but got %s", vars.Dropped, labels["zone"])
|
||||
}
|
||||
if labels["rcode"] != "REFUSED" {
|
||||
t.Errorf("Expected zone value %s for refused, but got %s", "REFUSED", labels["rcode"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsCache(t *testing.T) {
|
||||
metricName := "coredns_cache_size_guage"
|
||||
|
||||
corefile := `example.net:0 {
|
||||
proxy . 8.8.8.8:53
|
||||
prometheus
|
||||
cache
|
||||
}
|
||||
`
|
||||
srv, err := CoreDNSServer(corefile)
|
||||
if err != nil {
|
||||
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
|
||||
}
|
||||
defer srv.Stop()
|
||||
|
||||
udp, _ := CoreDNSServerPorts(srv, 0)
|
||||
|
||||
m := new(dns.Msg)
|
||||
m.SetQuestion("www.example.net.", dns.TypeA)
|
||||
|
||||
if _, err = dns.Exchange(m, udp); err != nil {
|
||||
t.Fatalf("Could not send message: %s", err)
|
||||
}
|
||||
|
||||
data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
|
||||
// Get the value for the metrics where the one of the labels values matches "success"
|
||||
got, _ := mtest.MetricValueLabel(metricName, cache.Success, data)
|
||||
|
||||
if got != "1" {
|
||||
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMetricsAuto(t *testing.T) {
|
||||
tmpdir, err := ioutil.TempDir(os.TempDir(), "coredns")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// TODO(miek): Random port as string and use that later?
|
||||
corefile := `org:0 {
|
||||
auto {
|
||||
directory ` + tmpdir + ` db\.(.*) {1} 1
|
||||
}
|
||||
prometheus
|
||||
}
|
||||
`
|
||||
|
||||
i, err := CoreDNSServer(corefile)
|
||||
if err != nil {
|
||||
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
|
||||
}
|
||||
|
||||
udp, _ := CoreDNSServerPorts(i, 0)
|
||||
if udp == "" {
|
||||
t.Fatalf("Could not get UDP listening port")
|
||||
}
|
||||
defer i.Stop()
|
||||
|
||||
log.SetOutput(ioutil.Discard)
|
||||
|
||||
// Write db.example.org to get example.org.
|
||||
if err = ioutil.WriteFile(path.Join(tmpdir, "db.example.org"), []byte(zoneContent), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// TODO(miek): make the auto sleep even less.
|
||||
time.Sleep(1100 * time.Millisecond) // wait for it to be picked up
|
||||
|
||||
m := new(dns.Msg)
|
||||
m.SetQuestion("www.example.org.", dns.TypeA)
|
||||
|
||||
if _, err := dns.Exchange(m, udp); err != nil {
|
||||
t.Fatalf("Could not send message: %s", err)
|
||||
}
|
||||
|
||||
metricName := "coredns_dns_request_count_total" //{zone, proto, family}
|
||||
|
||||
data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
|
||||
// Get the value for the metrics where the one of the labels values matches "example.org."
|
||||
got, _ := mtest.MetricValueLabel(metricName, "example.org.", data)
|
||||
|
||||
if got != "1" {
|
||||
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
|
||||
}
|
||||
|
||||
// Remove db.example.org again. And see if the metric stops increasing.
|
||||
os.Remove(path.Join(tmpdir, "db.example.org"))
|
||||
time.Sleep(1100 * time.Millisecond) // wait for it to be picked up
|
||||
if _, err := dns.Exchange(m, udp); err != nil {
|
||||
t.Fatalf("Could not send message: %s", err)
|
||||
}
|
||||
|
||||
data = mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
|
||||
got, _ = mtest.MetricValueLabel(metricName, "example.org.", data)
|
||||
|
||||
if got != "1" {
|
||||
t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user