mirror of
https://github.com/coredns/coredns.git
synced 2025-10-27 08:14:18 -04:00
Cleanup metrics (#3776)
Cleanup a variety of metric issues. * Eliminate department of redundancy "count_total" naming. * Use the plural of the unit when appropriate. (ex, "requests") * Remove label names from metric names where appropriate. (ex, "rcode") * Simplify request metrics by consolidating type label in to the base request counter. * Re-generate man pages. Signed-off-by: Ben Kochie <superq@gmail.com> Co-authored-by: Ben Kochie <superq@gmail.com>
This commit is contained in:
@@ -11,14 +11,13 @@ The default location for the metrics is `localhost:9153`. The metrics path is fi
|
||||
The following metrics are exported:
|
||||
|
||||
* `coredns_build_info{version, revision, goversion}` - info about CoreDNS itself.
|
||||
* `coredns_panic_count_total{}` - total number of panics.
|
||||
* `coredns_dns_request_count_total{server, zone, proto, family}` - total query count.
|
||||
* `coredns_panics_total{}` - total number of panics.
|
||||
* `coredns_dns_requests_total{server, zone, proto, family, type}` - total query count.
|
||||
* `coredns_dns_request_duration_seconds{server, zone, type}` - duration to process each query.
|
||||
* `coredns_dns_request_size_bytes{server, zone, proto}` - size of the request in bytes.
|
||||
* `coredns_dns_request_do_count_total{server, zone}` - queries that have the DO bit set
|
||||
* `coredns_dns_request_type_count_total{server, zone, type}` - counter of queries per zone and type.
|
||||
* `coredns_dns_do_requests_total{server, zone}` - queries that have the DO bit set
|
||||
* `coredns_dns_response_size_bytes{server, zone, proto}` - response size in bytes.
|
||||
* `coredns_dns_response_rcode_count_total{server, zone, rcode}` - response per zone and rcode.
|
||||
* `coredns_dns_responses_total{server, zone, rcode}` - response per zone and rcode.
|
||||
* `coredns_plugin_enabled{server, zone, name}` - indicates whether a plugin is enabled on per server and zone basis.
|
||||
|
||||
Each counter has a label `zone` which is the zonename used for the request/response.
|
||||
@@ -33,7 +32,6 @@ Extra labels used are:
|
||||
* `type` which holds the query type. It holds most common types (A, AAAA, MX, SOA, CNAME, PTR, TXT,
|
||||
NS, SRV, DS, DNSKEY, RRSIG, NSEC, NSEC3, IXFR, AXFR and ANY) and "other" which lumps together all
|
||||
other types.
|
||||
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
|
||||
|
||||
If monitoring is enabled, queries that do not enter the plugin chain are exported under the fake
|
||||
name "dropped" (without a closing dot - this is never a valid domain name).
|
||||
|
||||
@@ -51,7 +51,6 @@ func New(addr string) *Metrics {
|
||||
met.MustRegister(vars.RequestDuration)
|
||||
met.MustRegister(vars.RequestSize)
|
||||
met.MustRegister(vars.RequestDo)
|
||||
met.MustRegister(vars.RequestType)
|
||||
met.MustRegister(vars.ResponseSize)
|
||||
met.MustRegister(vars.ResponseRcode)
|
||||
met.MustRegister(vars.PluginEnabled)
|
||||
|
||||
@@ -31,25 +31,25 @@ func TestMetrics(t *testing.T) {
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_request_count_total",
|
||||
metric: "coredns_dns_requests_total",
|
||||
expectedValue: "1",
|
||||
},
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_request_count_total",
|
||||
metric: "coredns_dns_requests_total",
|
||||
expectedValue: "2",
|
||||
},
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_request_type_count_total",
|
||||
metric: "coredns_dns_requests_total",
|
||||
expectedValue: "3",
|
||||
},
|
||||
{
|
||||
next: test.NextHandler(dns.RcodeSuccess, nil),
|
||||
qname: "example.org",
|
||||
metric: "coredns_dns_response_rcode_count_total",
|
||||
metric: "coredns_dns_responses_total",
|
||||
expectedValue: "4",
|
||||
},
|
||||
}
|
||||
|
||||
@@ -20,17 +20,16 @@ func Report(server string, req request.Request, zone, rcode string, size int, st
|
||||
}
|
||||
|
||||
typ := req.QType()
|
||||
RequestCount.WithLabelValues(server, zone, net, fam).Inc()
|
||||
|
||||
if req.Do() {
|
||||
RequestDo.WithLabelValues(server, zone).Inc()
|
||||
}
|
||||
|
||||
if _, known := monitorType[typ]; known {
|
||||
RequestType.WithLabelValues(server, zone, dns.Type(typ).String()).Inc()
|
||||
RequestCount.WithLabelValues(server, zone, net, fam, dns.Type(typ).String()).Inc()
|
||||
RequestDuration.WithLabelValues(server, zone, dns.Type(typ).String()).Observe(time.Since(start).Seconds())
|
||||
} else {
|
||||
RequestType.WithLabelValues(server, zone, other).Inc()
|
||||
RequestCount.WithLabelValues(server, zone, net, fam, other).Inc()
|
||||
RequestDuration.WithLabelValues(server, zone, other).Observe(time.Since(start).Seconds())
|
||||
}
|
||||
|
||||
|
||||
@@ -11,9 +11,9 @@ var (
|
||||
RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_count_total",
|
||||
Name: "requests_total",
|
||||
Help: "Counter of DNS requests made per zone, protocol and family.",
|
||||
}, []string{"server", "zone", "proto", "family"})
|
||||
}, []string{"server", "zone", "proto", "family", "type"})
|
||||
|
||||
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
@@ -34,17 +34,10 @@ var (
|
||||
RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_do_count_total",
|
||||
Name: "do_requests_total",
|
||||
Help: "Counter of DNS requests with DO bit set per zone.",
|
||||
}, []string{"server", "zone"})
|
||||
|
||||
RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "request_type_count_total",
|
||||
Help: "Counter of DNS requests per type, per zone.",
|
||||
}, []string{"server", "zone", "type"})
|
||||
|
||||
ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
@@ -56,13 +49,13 @@ var (
|
||||
ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "response_rcode_count_total",
|
||||
Name: "responses_total",
|
||||
Help: "Counter of response status codes.",
|
||||
}, []string{"server", "zone", "rcode"})
|
||||
|
||||
Panic = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Name: "panic_count_total",
|
||||
Name: "panics_total",
|
||||
Help: "A metrics that counts the number of panics.",
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user