mirror of
https://github.com/coredns/coredns.git
synced 2025-10-27 08:14:18 -04:00
Cleanup metrics (#3776)
Cleanup a variety of metric issues. * Eliminate department of redundancy "count_total" naming. * Use the plural of the unit when appropriate. (ex, "requests") * Remove label names from metric names where appropriate. (ex, "rcode") * Simplify request metrics by consolidating type label in to the base request counter. * Re-generate man pages. Signed-off-by: Ben Kochie <superq@gmail.com> Co-authored-by: Ben Kochie <superq@gmail.com>
This commit is contained in:
@@ -108,12 +108,12 @@ On each endpoint, the timeouts of the communication are set by default and autom
|
||||
If monitoring is enabled (via the *prometheus* plugin) then the following metric are exported:
|
||||
|
||||
* `coredns_forward_request_duration_seconds{to}` - duration per upstream interaction.
|
||||
* `coredns_forward_request_count_total{to}` - query count per upstream.
|
||||
* `coredns_forward_response_rcode_count_total{to, rcode}` - count of RCODEs per upstream.
|
||||
* `coredns_forward_healthcheck_failure_count_total{to}` - number of failed health checks per upstream.
|
||||
* `coredns_forward_healthcheck_broken_count_total{}` - counter of when all upstreams are unhealthy,
|
||||
* `coredns_forward_requests_total{to}` - query count per upstream.
|
||||
* `coredns_forward_responses_total{to, rcode}` - count of RCODEs per upstream.
|
||||
* `coredns_forward_healthcheck_failures_total{to}` - number of failed health checks per upstream.
|
||||
* `coredns_forward_healthcheck_broken_total{}` - counter of when all upstreams are unhealthy,
|
||||
and we are randomly (this always uses the `random` policy) spraying to an upstream.
|
||||
* `max_concurrent_reject_count_total{}` - counter of the number of queries rejected because the
|
||||
* `max_concurrent_rejects_total{}` - counter of the number of queries rejected because the
|
||||
number of concurrent queries were at maximum.
|
||||
Where `to` is one of the upstream servers (**TO** from the config), `rcode` is the returned RCODE
|
||||
from the upstream.
|
||||
|
||||
@@ -11,13 +11,13 @@ var (
|
||||
RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "forward",
|
||||
Name: "request_count_total",
|
||||
Name: "requests_total",
|
||||
Help: "Counter of requests made per upstream.",
|
||||
}, []string{"to"})
|
||||
RcodeCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "forward",
|
||||
Name: "response_rcode_count_total",
|
||||
Name: "responses_total",
|
||||
Help: "Counter of requests made per upstream.",
|
||||
}, []string{"rcode", "to"})
|
||||
RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
@@ -30,13 +30,13 @@ var (
|
||||
HealthcheckFailureCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "forward",
|
||||
Name: "healthcheck_failure_count_total",
|
||||
Name: "healthcheck_failures_total",
|
||||
Help: "Counter of the number of failed healthchecks.",
|
||||
}, []string{"to"})
|
||||
HealthcheckBrokenCount = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "forward",
|
||||
Name: "healthcheck_broken_count_total",
|
||||
Name: "healthcheck_broken_total",
|
||||
Help: "Counter of the number of complete failures of the healthchecks.",
|
||||
})
|
||||
SocketGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
@@ -48,7 +48,7 @@ var (
|
||||
MaxConcurrentRejectCount = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Namespace: plugin.Namespace,
|
||||
Subsystem: "forward",
|
||||
Name: "max_concurrent_reject_count_total",
|
||||
Name: "max_concurrent_rejects_total",
|
||||
Help: "Counter of the number of queries rejected because the concurrent queries were at maximum.",
|
||||
})
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user