Overloaded (#1364)

* plugin/health: add 'overloaded metrics'

Query our on health endpoint and record (and export as a metric) the
time it takes. The Get has a 5s timeout, that, when reached, will set
the metric duration to 5s. The actually call "I'm I overloaded" is left
to an external entity.

* README

* golint and govet

* and the tests
This commit is contained in:
Miek Gieben
2018-01-10 11:41:22 +00:00
committed by GitHub
parent cced1a4c12
commit 48059a6c3e
5 changed files with 90 additions and 7 deletions

View File

@@ -25,6 +25,14 @@ supports health checks has a section "Health" in their README.
Any plugin that implements the Healther interface will be used to report health.
## Metrics
If monitoring is enabled (via the *prometheus* directive) then the following metric is exported:
* `coredns_health_request_duration_seconds{}` - duration to process a /health query. As this should
be a local operation it should be fast. A (large) increases in this duration indicates the
CoreDNS process is having trouble keeping up.
## Examples
Run another health endpoint on http://localhost:8091.

View File

@@ -21,9 +21,11 @@ type health struct {
h []Healther
sync.RWMutex
ok bool // ok is the global boolean indicating an all healthy plugin stack
stop chan bool
}
func (h *health) Startup() error {
func (h *health) OnStartup() error {
if h.Addr == "" {
h.Addr = defAddr
}
@@ -51,14 +53,20 @@ func (h *health) Startup() error {
go func() {
http.Serve(h.ln, h.mux)
}()
go func() {
h.overloaded()
}()
})
return nil
}
func (h *health) Shutdown() error {
func (h *health) OnShutdown() error {
if h.ln != nil {
return h.ln.Close()
}
h.stop <- true
return nil
}

View File

@@ -13,10 +13,10 @@ func TestHealth(t *testing.T) {
h := health{Addr: ":0"}
h.h = append(h.h, &erratic.Erratic{})
if err := h.Startup(); err != nil {
if err := h.OnStartup(); err != nil {
t.Fatalf("Unable to startup the health server: %v", err)
}
defer h.Shutdown()
defer h.OnShutdown()
// Reconstruct the http address based on the port allocated by operating system.
address := fmt.Sprintf("http://%s%s", h.ln.Addr().String(), path)

View File

@@ -0,0 +1,52 @@
package health
import (
"net/http"
"sync"
"time"
"github.com/coredns/coredns/plugin"
"github.com/prometheus/client_golang/prometheus"
)
// overloaded queries the health end point and updates a metrics showing how long it took.
func (h *health) overloaded() {
timeout := time.Duration(5 * time.Second)
client := http.Client{
Timeout: timeout,
}
url := "http://" + h.Addr
tick := time.NewTicker(1 * time.Second)
for {
select {
case <-tick.C:
start := time.Now()
resp, err := client.Get(url)
if err != nil {
HealthDuration.Observe(timeout.Seconds())
continue
}
resp.Body.Close()
HealthDuration.Observe(time.Since(start).Seconds())
case <-h.stop:
tick.Stop()
return
}
}
}
var (
// HealthDuration is the metric used for exporting how fast we can retrieve the /health endpoint.
HealthDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
Subsystem: "health",
Name: "request_duration_seconds",
Buckets: plugin.TimeBuckets,
Help: "Histogram of the time (in seconds) each request took.",
})
)
var onceMetric sync.Once

View File

@@ -6,6 +6,7 @@ import (
"github.com/coredns/coredns/core/dnsserver"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics"
"github.com/mholt/caddy"
)
@@ -23,7 +24,7 @@ func setup(c *caddy.Controller) error {
return plugin.Error("health", err)
}
h := &health{Addr: addr}
h := &health{Addr: addr, stop: make(chan bool)}
c.OnStartup(func() error {
plugins := dnsserver.GetConfig(c).Handlers()
@@ -36,6 +37,7 @@ func setup(c *caddy.Controller) error {
})
c.OnStartup(func() error {
// Poll all middleware every second.
h.poll()
go func() {
for {
@@ -46,8 +48,21 @@ func setup(c *caddy.Controller) error {
return nil
})
c.OnStartup(h.Startup)
c.OnFinalShutdown(h.Shutdown)
c.OnStartup(func() error {
onceMetric.Do(func() {
m := dnsserver.GetConfig(c).Handler("prometheus")
if m == nil {
return
}
if x, ok := m.(*metrics.Metrics); ok {
x.MustRegister(HealthDuration)
}
})
return nil
})
c.OnStartup(h.OnStartup)
c.OnFinalShutdown(h.OnShutdown)
// Don't do AddPlugin, as health is not *really* a plugin just a separate webserver running.
return nil