Fix forward metrics for backwards compatibility (#6178)

This commit is contained in:
Pat Downey
2023-07-04 15:35:55 +01:00
committed by GitHub
parent 6e1263d3d9
commit ea293da1d6
14 changed files with 87 additions and 78 deletions

View File

@@ -115,20 +115,28 @@ plugin is also enabled:
If monitoring is enabled (via the *prometheus* plugin) then the following metric are exported:
* `coredns_forward_requests_total{to}` - query count per upstream.
* `coredns_forward_responses_total{to}` - Counter of responses received per upstream.
* `coredns_forward_request_duration_seconds{to, rcode, type}` - duration per upstream, RCODE, type
* `coredns_forward_responses_total{to, rcode}` - count of RCODEs per upstream.
* `coredns_forward_healthcheck_failures_total{to}` - number of failed health checks per upstream.
* `coredns_forward_healthcheck_broken_total{}` - counter of when all upstreams are unhealthy,
* `coredns_forward_healthcheck_broken_total{}` - count of when all upstreams are unhealthy,
and we are randomly (this always uses the `random` policy) spraying to an upstream.
* `coredns_forward_max_concurrent_rejects_total{}` - counter of the number of queries rejected because the
* `coredns_forward_max_concurrent_rejects_total{}` - count of queries rejected because the
number of concurrent queries were at maximum.
* `coredns_forward_conn_cache_hits_total{to, proto}` - counter of connection cache hits per upstream and protocol.
* `coredns_forward_conn_cache_misses_total{to, proto}` - counter of connection cache misses per upstream and protocol.
* `coredns_proxy_request_duration_seconds{proxy_name="forward", to, rcode}` - histogram per upstream, RCODE
* `coredns_proxy_healthcheck_failures_total{proxy_name="forward", to, rcode}`- count of failed health checks per upstream.
* `coredns_proxy_conn_cache_hits_total{proxy_name="forward", to, proto}`- count of connection cache hits per upstream and protocol.
* `coredns_proxy_conn_cache_misses_total{proxy_name="forward", to, proto}` - count of connection cache misses per upstream and protocol.
Where `to` is one of the upstream servers (**TO** from the config), `rcode` is the returned RCODE
from the upstream, `proto` is the transport protocol like `udp`, `tcp`, `tcp-tls`.
The following metrics have recently been deprecated:
* `coredns_forward_healthcheck_failures_total{to, rcode}`
* Can be replaced with `coredns_proxy_healthcheck_failures_total{proxy_name="forward", to, rcode}`
* `coredns_forward_requests_total{to}`
* Can be replaced with `sum(coredns_proxy_request_duration_seconds_count{proxy_name="forward", to})`
* `coredns_forward_responses_total{to, rcode}`
* Can be replaced with `coredns_proxy_request_duration_seconds_count{proxy_name="forward", to, rcode}`
* `coredns_forward_request_duration_seconds{to, rcode}`
* Can be replaced with `coredns_proxy_request_duration_seconds{proxy_name="forward", to, rcode}`
## Examples
Proxy all requests within `example.org.` to a nameserver running on a different port:

View File

@@ -97,7 +97,7 @@ func (f *Forward) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
count := atomic.AddInt64(&(f.concurrent), 1)
defer atomic.AddInt64(&(f.concurrent), -1)
if count > f.maxConcurrent {
MaxConcurrentRejectCount.Add(1)
maxConcurrentRejectCount.Add(1)
return dns.RcodeRefused, f.ErrLimitExceeded
}
}
@@ -129,7 +129,7 @@ func (f *Forward) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
r := new(random)
proxy = r.List(f.proxies)[0]
HealthcheckBrokenCount.Add(1)
healthcheckBrokenCount.Add(1)
}
if span != nil {
@@ -150,6 +150,7 @@ func (f *Forward) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
for {
ret, err = proxy.Connect(ctx, state, opts)
if err == ErrCachedClosed { // Remote side closed conn, can only happen with TCP.
continue
}

View File

@@ -15,17 +15,17 @@ import (
func TestList(t *testing.T) {
f := Forward{
proxies: []*proxy.Proxy{
proxy.NewProxy("1.1.1.1:53", transport.DNS),
proxy.NewProxy("2.2.2.2:53", transport.DNS),
proxy.NewProxy("3.3.3.3:53", transport.DNS),
proxy.NewProxy("TestList", "1.1.1.1:53", transport.DNS),
proxy.NewProxy("TestList", "2.2.2.2:53", transport.DNS),
proxy.NewProxy("TestList", "3.3.3.3:53", transport.DNS),
},
p: &roundRobin{},
}
expect := []*proxy.Proxy{
proxy.NewProxy("2.2.2.2:53", transport.DNS),
proxy.NewProxy("1.1.1.1:53", transport.DNS),
proxy.NewProxy("3.3.3.3:53", transport.DNS),
proxy.NewProxy("TestList", "2.2.2.2:53", transport.DNS),
proxy.NewProxy("TestList", "1.1.1.1:53", transport.DNS),
proxy.NewProxy("TestList", "3.3.3.3:53", transport.DNS),
}
got := f.List()

View File

@@ -33,7 +33,7 @@ func TestHealth(t *testing.T) {
})
defer s.Close()
p := proxy.NewProxy(s.Addr, transport.DNS)
p := proxy.NewProxy("TestHealth", s.Addr, transport.DNS)
p.GetHealthchecker().SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetWriteTimeout(10 * time.Millisecond)
f := New()
@@ -71,7 +71,7 @@ func TestHealthTCP(t *testing.T) {
})
defer s.Close()
p := proxy.NewProxy(s.Addr, transport.DNS)
p := proxy.NewProxy("TestHealthTCP", s.Addr, transport.DNS)
p.GetHealthchecker().SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetWriteTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetTCPTransport()
@@ -110,7 +110,7 @@ func TestHealthNoRecursion(t *testing.T) {
})
defer s.Close()
p := proxy.NewProxy(s.Addr, transport.DNS)
p := proxy.NewProxy("TestHealthNoRecursion", s.Addr, transport.DNS)
p.GetHealthchecker().SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetWriteTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetRecursionDesired(false)
@@ -154,7 +154,7 @@ func TestHealthTimeout(t *testing.T) {
})
defer s.Close()
p := proxy.NewProxy(s.Addr, transport.DNS)
p := proxy.NewProxy("TestHealthTimeout", s.Addr, transport.DNS)
p.GetHealthchecker().SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetWriteTimeout(10 * time.Millisecond)
f := New()
@@ -182,7 +182,7 @@ func TestHealthMaxFails(t *testing.T) {
})
defer s.Close()
p := proxy.NewProxy(s.Addr, transport.DNS)
p := proxy.NewProxy("TestHealthMaxFails", s.Addr, transport.DNS)
p.SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetWriteTimeout(10 * time.Millisecond)
@@ -219,7 +219,7 @@ func TestHealthNoMaxFails(t *testing.T) {
})
defer s.Close()
p := proxy.NewProxy(s.Addr, transport.DNS)
p := proxy.NewProxy("TestHealthNoMaxFails", s.Addr, transport.DNS)
p.GetHealthchecker().SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetWriteTimeout(10 * time.Millisecond)
f := New()
@@ -258,7 +258,7 @@ func TestHealthDomain(t *testing.T) {
w.WriteMsg(ret)
})
defer s.Close()
p := proxy.NewProxy(s.Addr, transport.DNS)
p := proxy.NewProxy("TestHealthDomain", s.Addr, transport.DNS)
p.GetHealthchecker().SetReadTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetWriteTimeout(10 * time.Millisecond)
p.GetHealthchecker().SetDomain(hcDomain)

View File

@@ -9,13 +9,14 @@ import (
// Variables declared for monitoring.
var (
HealthcheckBrokenCount = promauto.NewCounter(prometheus.CounterOpts{
healthcheckBrokenCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "forward",
Name: "healthcheck_broken_total",
Help: "Counter of the number of complete failures of the healthchecks.",
})
MaxConcurrentRejectCount = promauto.NewCounter(prometheus.CounterOpts{
maxConcurrentRejectCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "forward",
Name: "max_concurrent_rejects_total",

View File

@@ -19,7 +19,9 @@ import (
"github.com/miekg/dns"
)
func init() { plugin.Register("forward", setup) }
func init() {
plugin.Register("forward", setup)
}
func setup(c *caddy.Controller) error {
fs, err := parseForward(c)
@@ -128,7 +130,7 @@ func parseStanza(c *caddy.Controller) (*Forward, error) {
if !allowedTrans[trans] {
return f, fmt.Errorf("'%s' is not supported as a destination protocol in forward: %s", trans, host)
}
p := proxy.NewProxy(h, trans)
p := proxy.NewProxy("forward", h, trans)
f.proxies = append(f.proxies, p)
transports[i] = trans
}