Cleanup metrics (#3776)

Cleanup a variety of metric issues. * Eliminate department of redundancy "count_total" naming. * Use the plural of the unit when appropriate. (ex, "requests") * Remove label names from metric names where appropriate. (ex, "rcode") * Simplify request metrics by consolidating type label in to the base request counter. * Re-generate man pages. Signed-off-by: Ben Kochie <superq@gmail.com> Co-authored-by: Ben Kochie <superq@gmail.com>
2026-06-23 01:10:18 -04:00 · 2020-03-26 09:17:33 +01:00
parent eb23cce1a7
commit 19cfa2960c
16 changed files with 39 additions and 50 deletions
--- a/plugin/forward/README.md
+++ b/plugin/forward/README.md
@@ -108,12 +108,12 @@ On each endpoint, the timeouts of the communication are set by default and autom
 If monitoring is enabled (via the *prometheus* plugin) then the following metric are exported:

 * `coredns_forward_request_duration_seconds{to}` - duration per upstream interaction.
-* `coredns_forward_request_count_total{to}` - query count per upstream.
-* `coredns_forward_response_rcode_count_total{to, rcode}` - count of RCODEs per upstream.
-* `coredns_forward_healthcheck_failure_count_total{to}` - number of failed health checks per upstream.
-* `coredns_forward_healthcheck_broken_count_total{}` - counter of when all upstreams are unhealthy,
+* `coredns_forward_requests_total{to}` - query count per upstream.
+* `coredns_forward_responses_total{to, rcode}` - count of RCODEs per upstream.
+* `coredns_forward_healthcheck_failures_total{to}` - number of failed health checks per upstream.
+* `coredns_forward_healthcheck_broken_total{}` - counter of when all upstreams are unhealthy,
  and we are randomly (this always uses the `random` policy) spraying to an upstream.
-* `max_concurrent_reject_count_total{}` - counter of the number of queries rejected because the
+* `max_concurrent_rejects_total{}` - counter of the number of queries rejected because the
  number of concurrent queries were at maximum.
 Where `to` is one of the upstream servers (**TO** from the config), `rcode` is the returned RCODE
 from the upstream.
--- a/plugin/forward/metrics.go
+++ b/plugin/forward/metrics.go
@@ -11,13 +11,13 @@ var (
 	RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: plugin.Namespace,
 		Subsystem: "forward",
-		Name:      "request_count_total",
+		Name:      "requests_total",
 		Help:      "Counter of requests made per upstream.",
 	}, []string{"to"})
 	RcodeCount = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: plugin.Namespace,
 		Subsystem: "forward",
-		Name:      "response_rcode_count_total",
+		Name:      "responses_total",
 		Help:      "Counter of requests made per upstream.",
 	}, []string{"rcode", "to"})
 	RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
@@ -30,13 +30,13 @@ var (
 	HealthcheckFailureCount = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: plugin.Namespace,
 		Subsystem: "forward",
-		Name:      "healthcheck_failure_count_total",
+		Name:      "healthcheck_failures_total",
 		Help:      "Counter of the number of failed healthchecks.",
 	}, []string{"to"})
 	HealthcheckBrokenCount = prometheus.NewCounter(prometheus.CounterOpts{
 		Namespace: plugin.Namespace,
 		Subsystem: "forward",
-		Name:      "healthcheck_broken_count_total",
+		Name:      "healthcheck_broken_total",
 		Help:      "Counter of the number of complete failures of the healthchecks.",
 	})
 	SocketGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -48,7 +48,7 @@ var (
 	MaxConcurrentRejectCount = prometheus.NewCounter(prometheus.CounterOpts{
 		Namespace: plugin.Namespace,
 		Subsystem: "forward",
-		Name:      "max_concurrent_reject_count_total",
+		Name:      "max_concurrent_rejects_total",
 		Help:      "Counter of the number of queries rejected because the concurrent queries were at maximum.",
 	})
 )