Cleanup metrics (#3776)

Cleanup a variety of metric issues.
* Eliminate department of redundancy "count_total" naming.
* Use the plural of the unit when appropriate. (ex, "requests")
* Remove label names from metric names where appropriate. (ex, "rcode")
* Simplify request metrics by consolidating type label in to the base
request counter.
* Re-generate man pages.

Signed-off-by: Ben Kochie <superq@gmail.com>

Co-authored-by: Ben Kochie <superq@gmail.com>
This commit is contained in:
Miek Gieben
2020-03-26 09:17:33 +01:00
committed by GitHub
parent eb23cce1a7
commit 19cfa2960c
16 changed files with 39 additions and 50 deletions

View File

@@ -11,14 +11,13 @@ The default location for the metrics is `localhost:9153`. The metrics path is fi
The following metrics are exported:
* `coredns_build_info{version, revision, goversion}` - info about CoreDNS itself.
* `coredns_panic_count_total{}` - total number of panics.
* `coredns_dns_request_count_total{server, zone, proto, family}` - total query count.
* `coredns_panics_total{}` - total number of panics.
* `coredns_dns_requests_total{server, zone, proto, family, type}` - total query count.
* `coredns_dns_request_duration_seconds{server, zone, type}` - duration to process each query.
* `coredns_dns_request_size_bytes{server, zone, proto}` - size of the request in bytes.
* `coredns_dns_request_do_count_total{server, zone}` - queries that have the DO bit set
* `coredns_dns_request_type_count_total{server, zone, type}` - counter of queries per zone and type.
* `coredns_dns_do_requests_total{server, zone}` - queries that have the DO bit set
* `coredns_dns_response_size_bytes{server, zone, proto}` - response size in bytes.
* `coredns_dns_response_rcode_count_total{server, zone, rcode}` - response per zone and rcode.
* `coredns_dns_responses_total{server, zone, rcode}` - response per zone and rcode.
* `coredns_plugin_enabled{server, zone, name}` - indicates whether a plugin is enabled on per server and zone basis.
Each counter has a label `zone` which is the zonename used for the request/response.
@@ -33,7 +32,6 @@ Extra labels used are:
* `type` which holds the query type. It holds most common types (A, AAAA, MX, SOA, CNAME, PTR, TXT,
NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all
other types.
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled, queries that do not enter the plugin chain are exported under the fake
name "dropped" (without a closing dot - this is never a valid domain name).

View File

@@ -51,7 +51,6 @@ func New(addr string) *Metrics {
met.MustRegister(vars.RequestDuration)
met.MustRegister(vars.RequestSize)
met.MustRegister(vars.RequestDo)
met.MustRegister(vars.RequestType)
met.MustRegister(vars.ResponseSize)
met.MustRegister(vars.ResponseRcode)
met.MustRegister(vars.PluginEnabled)

View File

@@ -31,25 +31,25 @@ func TestMetrics(t *testing.T) {
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_request_count_total",
metric: "coredns_dns_requests_total",
expectedValue: "1",
},
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_request_count_total",
metric: "coredns_dns_requests_total",
expectedValue: "2",
},
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_request_type_count_total",
metric: "coredns_dns_requests_total",
expectedValue: "3",
},
{
next: test.NextHandler(dns.RcodeSuccess, nil),
qname: "example.org",
metric: "coredns_dns_response_rcode_count_total",
metric: "coredns_dns_responses_total",
expectedValue: "4",
},
}

View File

@@ -20,17 +20,16 @@ func Report(server string, req request.Request, zone, rcode string, size int, st
}
typ := req.QType()
RequestCount.WithLabelValues(server, zone, net, fam).Inc()
if req.Do() {
RequestDo.WithLabelValues(server, zone).Inc()
}
if _, known := monitorType[typ]; known {
RequestType.WithLabelValues(server, zone, dns.Type(typ).String()).Inc()
RequestCount.WithLabelValues(server, zone, net, fam, dns.Type(typ).String()).Inc()
RequestDuration.WithLabelValues(server, zone, dns.Type(typ).String()).Observe(time.Since(start).Seconds())
} else {
RequestType.WithLabelValues(server, zone, other).Inc()
RequestCount.WithLabelValues(server, zone, net, fam, other).Inc()
RequestDuration.WithLabelValues(server, zone, other).Observe(time.Since(start).Seconds())
}

View File

@@ -11,9 +11,9 @@ var (
RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
Name: "request_count_total",
Name: "requests_total",
Help: "Counter of DNS requests made per zone, protocol and family.",
}, []string{"server", "zone", "proto", "family"})
}, []string{"server", "zone", "proto", "family", "type"})
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
@@ -34,17 +34,10 @@ var (
RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
Name: "request_do_count_total",
Name: "do_requests_total",
Help: "Counter of DNS requests with DO bit set per zone.",
}, []string{"server", "zone"})
RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
Name: "request_type_count_total",
Help: "Counter of DNS requests per type, per zone.",
}, []string{"server", "zone", "type"})
ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
@@ -56,13 +49,13 @@ var (
ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: subsystem,
Name: "response_rcode_count_total",
Name: "responses_total",
Help: "Counter of response status codes.",
}, []string{"server", "zone", "rcode"})
Panic = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Name: "panic_count_total",
Name: "panics_total",
Help: "A metrics that counts the number of panics.",
})