Files
coredns/plugin/metrics/metrics.go

170 lines
4.2 KiB
Go
Raw Normal View History

// Package metrics implement a handler and plugin that provides Prometheus metrics.
2016-03-18 20:57:35 +00:00
package metrics
import (
"context"
"net"
2016-03-18 20:57:35 +00:00
"net/http"
"sync"
"time"
2016-03-18 20:57:35 +00:00
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metrics/vars"
2016-03-18 20:57:35 +00:00
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
2016-03-18 20:57:35 +00:00
)
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics .
2016-03-18 20:57:35 +00:00
type Metrics struct {
Next plugin.Handler
Addr string
Reg *prometheus.Registry
reload: use OnRestart (#1709) * reload: use OnRestart Close the listener on OnRestart for health and metrics so the default setup function can setup the listener when the plugin is "starting up". Lightly test with some SIGUSR1-ing. Also checked the reload plugin with this, seems fine: .com.:1043 .:1043 2018/04/20 15:01:25 [INFO] CoreDNS-1.1.1 2018/04/20 15:01:25 [INFO] linux/amd64, go1.10, CoreDNS-1.1.1 linux/amd64, go1.10, 2018/04/20 15:01:25 [INFO] Running configuration MD5 = aa8b3f03946fb60546ca1f725d482714 2018/04/20 15:02:01 [INFO] Reloading 2018/04/20 15:02:01 [INFO] Running configuration MD5 = b34a96d99e01db4015a892212560155f 2018/04/20 15:02:01 [INFO] Reloading complete ^C2018/04/20 15:02:06 [INFO] SIGINT: Shutting down With this corefile: .com { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } . { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } The prometheus port was 9053, changed that to 54 so reload would pick it up. From a cursory look it seems this also fixes: Fixes #1604 #1618 #1686 #1492 * At least make it test * Use onfinalshutdown * reload: add reload test This test #1604 adn right now fails. * Address review comments * Add bug section explaining things a bit * compile tests * Fix tests * fixes * slightly less crazy * try to make prometheus setup less confusing * Use ephermal port for test * Don't use the listener * These are shared between goroutines, just use the boolean in the main structure. * Fix text in the reload README, * Set addr to TODO once stopping it * Morph fturb's comment into test, to test reload and scrape health and metric endpoint
2018-04-21 17:43:02 +01:00
ln net.Listener
lnSetup bool
mux *http.ServeMux
srv *http.Server
zoneNames []string
zoneMap map[string]struct{}
zoneMu sync.RWMutex
}
// New returns a new instance of Metrics with the given address.
func New(addr string) *Metrics {
met := &Metrics{
Addr: addr,
Reg: prometheus.NewRegistry(),
zoneMap: make(map[string]struct{}),
}
// Add the default collectors
met.MustRegister(prometheus.NewGoCollector())
met.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}))
// Add all of our collectors
met.MustRegister(buildInfo)
met.MustRegister(vars.Panic)
met.MustRegister(vars.RequestCount)
met.MustRegister(vars.RequestDuration)
met.MustRegister(vars.RequestSize)
met.MustRegister(vars.RequestDo)
met.MustRegister(vars.RequestType)
met.MustRegister(vars.ResponseSize)
met.MustRegister(vars.ResponseRcode)
met.MustRegister(vars.PluginEnabled)
return met
}
// MustRegister wraps m.Reg.MustRegister.
func (m *Metrics) MustRegister(c prometheus.Collector) {
err := m.Reg.Register(c)
if err != nil {
// ignore any duplicate error, but fatal on any other kind of error
if _, ok := err.(prometheus.AlreadyRegisteredError); !ok {
log.Fatalf("Cannot register metrics collector: %s", err)
}
}
}
// AddZone adds zone z to m.
func (m *Metrics) AddZone(z string) {
m.zoneMu.Lock()
m.zoneMap[z] = struct{}{}
m.zoneNames = keys(m.zoneMap)
m.zoneMu.Unlock()
}
// RemoveZone remove zone z from m.
func (m *Metrics) RemoveZone(z string) {
m.zoneMu.Lock()
delete(m.zoneMap, z)
m.zoneNames = keys(m.zoneMap)
m.zoneMu.Unlock()
}
// ZoneNames returns the zones of m.
func (m *Metrics) ZoneNames() []string {
m.zoneMu.RLock()
s := m.zoneNames
m.zoneMu.RUnlock()
return s
2016-03-18 20:57:35 +00:00
}
Golint2 (#280) * Fix linter errors * More linting fixes * More docs and making members private that dont need to be public * Fix linter errors * More linting fixes * More docs and making members private that dont need to be public * More lint fixes This leaves: ~~~ middleware/kubernetes/nametemplate/nametemplate.go:64:6: exported type NameTemplate should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:71:1: exported method NameTemplate.SetTemplate should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:108:1: exported method NameTemplate.GetZoneFromSegmentArray should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:116:1: exported method NameTemplate.GetNamespaceFromSegmentArray should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:120:1: exported method NameTemplate.GetServiceFromSegmentArray should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:124:1: exported method NameTemplate.GetTypeFromSegmentArray should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:135:1: exported method NameTemplate.GetSymbolFromSegmentArray should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:167:1: exported method NameTemplate.IsValid should have comment or be unexported middleware/kubernetes/nametemplate/nametemplate.go:182:6: exported type NameValues should have comment or be unexported middleware/kubernetes/util/util.go:1:1: package comment should be of the form "Package util ..." middleware/kubernetes/util/util.go:27:2: exported const WildcardStar should have comment (or a comment on this block) or be unexported middleware/proxy/lookup.go:66:1: exported method Proxy.Forward should have comment or be unexported middleware/proxy/proxy.go:24:6: exported type Client should have comment or be unexported middleware/proxy/proxy.go:107:1: exported function Clients should have comment or be unexported middleware/proxy/reverseproxy.go:10:6: exported type ReverseProxy should have comment or be unexported middleware/proxy/reverseproxy.go:16:1: exported method ReverseProxy.ServeDNS should have comment or be unexported middleware/proxy/upstream.go:42:6: exported type Options should have comment or be unexported ~~~ I plan on reworking the proxy anyway, so I'll leave that be.
2016-09-23 09:14:12 +01:00
// OnStartup sets up the metrics on startup.
func (m *Metrics) OnStartup() error {
ln, err := net.Listen("tcp", m.Addr)
if err != nil {
log.Errorf("Failed to start metrics handler: %s", err)
return err
}
2016-06-25 18:12:13 +01:00
m.ln = ln
reload: use OnRestart (#1709) * reload: use OnRestart Close the listener on OnRestart for health and metrics so the default setup function can setup the listener when the plugin is "starting up". Lightly test with some SIGUSR1-ing. Also checked the reload plugin with this, seems fine: .com.:1043 .:1043 2018/04/20 15:01:25 [INFO] CoreDNS-1.1.1 2018/04/20 15:01:25 [INFO] linux/amd64, go1.10, CoreDNS-1.1.1 linux/amd64, go1.10, 2018/04/20 15:01:25 [INFO] Running configuration MD5 = aa8b3f03946fb60546ca1f725d482714 2018/04/20 15:02:01 [INFO] Reloading 2018/04/20 15:02:01 [INFO] Running configuration MD5 = b34a96d99e01db4015a892212560155f 2018/04/20 15:02:01 [INFO] Reloading complete ^C2018/04/20 15:02:06 [INFO] SIGINT: Shutting down With this corefile: .com { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } . { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } The prometheus port was 9053, changed that to 54 so reload would pick it up. From a cursory look it seems this also fixes: Fixes #1604 #1618 #1686 #1492 * At least make it test * Use onfinalshutdown * reload: add reload test This test #1604 adn right now fails. * Address review comments * Add bug section explaining things a bit * compile tests * Fix tests * fixes * slightly less crazy * try to make prometheus setup less confusing * Use ephermal port for test * Don't use the listener * These are shared between goroutines, just use the boolean in the main structure. * Fix text in the reload README, * Set addr to TODO once stopping it * Morph fturb's comment into test, to test reload and scrape health and metric endpoint
2018-04-21 17:43:02 +01:00
m.lnSetup = true
2016-03-18 20:57:35 +00:00
m.mux = http.NewServeMux()
m.mux.Handle("/metrics", promhttp.HandlerFor(m.Reg, promhttp.HandlerOpts{}))
// creating some helper variables to avoid data races on m.srv and m.ln
server := &http.Server{Handler: m.mux}
m.srv = server
go func() {
server.Serve(ln)
}()
ListenAddr = ln.Addr().String() // For tests.
2016-03-18 20:57:35 +00:00
return nil
}
reload: use OnRestart (#1709) * reload: use OnRestart Close the listener on OnRestart for health and metrics so the default setup function can setup the listener when the plugin is "starting up". Lightly test with some SIGUSR1-ing. Also checked the reload plugin with this, seems fine: .com.:1043 .:1043 2018/04/20 15:01:25 [INFO] CoreDNS-1.1.1 2018/04/20 15:01:25 [INFO] linux/amd64, go1.10, CoreDNS-1.1.1 linux/amd64, go1.10, 2018/04/20 15:01:25 [INFO] Running configuration MD5 = aa8b3f03946fb60546ca1f725d482714 2018/04/20 15:02:01 [INFO] Reloading 2018/04/20 15:02:01 [INFO] Running configuration MD5 = b34a96d99e01db4015a892212560155f 2018/04/20 15:02:01 [INFO] Reloading complete ^C2018/04/20 15:02:06 [INFO] SIGINT: Shutting down With this corefile: .com { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } . { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } The prometheus port was 9053, changed that to 54 so reload would pick it up. From a cursory look it seems this also fixes: Fixes #1604 #1618 #1686 #1492 * At least make it test * Use onfinalshutdown * reload: add reload test This test #1604 adn right now fails. * Address review comments * Add bug section explaining things a bit * compile tests * Fix tests * fixes * slightly less crazy * try to make prometheus setup less confusing * Use ephermal port for test * Don't use the listener * These are shared between goroutines, just use the boolean in the main structure. * Fix text in the reload README, * Set addr to TODO once stopping it * Morph fturb's comment into test, to test reload and scrape health and metric endpoint
2018-04-21 17:43:02 +01:00
// OnRestart stops the listener on reload.
func (m *Metrics) OnRestart() error {
if !m.lnSetup {
return nil
}
u.Unset(m.Addr)
return m.stopServer()
}
reload: use OnRestart (#1709) * reload: use OnRestart Close the listener on OnRestart for health and metrics so the default setup function can setup the listener when the plugin is "starting up". Lightly test with some SIGUSR1-ing. Also checked the reload plugin with this, seems fine: .com.:1043 .:1043 2018/04/20 15:01:25 [INFO] CoreDNS-1.1.1 2018/04/20 15:01:25 [INFO] linux/amd64, go1.10, CoreDNS-1.1.1 linux/amd64, go1.10, 2018/04/20 15:01:25 [INFO] Running configuration MD5 = aa8b3f03946fb60546ca1f725d482714 2018/04/20 15:02:01 [INFO] Reloading 2018/04/20 15:02:01 [INFO] Running configuration MD5 = b34a96d99e01db4015a892212560155f 2018/04/20 15:02:01 [INFO] Reloading complete ^C2018/04/20 15:02:06 [INFO] SIGINT: Shutting down With this corefile: .com { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } . { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } The prometheus port was 9053, changed that to 54 so reload would pick it up. From a cursory look it seems this also fixes: Fixes #1604 #1618 #1686 #1492 * At least make it test * Use onfinalshutdown * reload: add reload test This test #1604 adn right now fails. * Address review comments * Add bug section explaining things a bit * compile tests * Fix tests * fixes * slightly less crazy * try to make prometheus setup less confusing * Use ephermal port for test * Don't use the listener * These are shared between goroutines, just use the boolean in the main structure. * Fix text in the reload README, * Set addr to TODO once stopping it * Morph fturb's comment into test, to test reload and scrape health and metric endpoint
2018-04-21 17:43:02 +01:00
func (m *Metrics) stopServer() error {
if !m.lnSetup {
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
defer cancel()
if err := m.srv.Shutdown(ctx); err != nil {
log.Infof("Failed to stop prometheus http server: %s", err)
return err
}
reload: use OnRestart (#1709) * reload: use OnRestart Close the listener on OnRestart for health and metrics so the default setup function can setup the listener when the plugin is "starting up". Lightly test with some SIGUSR1-ing. Also checked the reload plugin with this, seems fine: .com.:1043 .:1043 2018/04/20 15:01:25 [INFO] CoreDNS-1.1.1 2018/04/20 15:01:25 [INFO] linux/amd64, go1.10, CoreDNS-1.1.1 linux/amd64, go1.10, 2018/04/20 15:01:25 [INFO] Running configuration MD5 = aa8b3f03946fb60546ca1f725d482714 2018/04/20 15:02:01 [INFO] Reloading 2018/04/20 15:02:01 [INFO] Running configuration MD5 = b34a96d99e01db4015a892212560155f 2018/04/20 15:02:01 [INFO] Reloading complete ^C2018/04/20 15:02:06 [INFO] SIGINT: Shutting down With this corefile: .com { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } . { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } The prometheus port was 9053, changed that to 54 so reload would pick it up. From a cursory look it seems this also fixes: Fixes #1604 #1618 #1686 #1492 * At least make it test * Use onfinalshutdown * reload: add reload test This test #1604 adn right now fails. * Address review comments * Add bug section explaining things a bit * compile tests * Fix tests * fixes * slightly less crazy * try to make prometheus setup less confusing * Use ephermal port for test * Don't use the listener * These are shared between goroutines, just use the boolean in the main structure. * Fix text in the reload README, * Set addr to TODO once stopping it * Morph fturb's comment into test, to test reload and scrape health and metric endpoint
2018-04-21 17:43:02 +01:00
m.lnSetup = false
m.ln.Close()
reload: use OnRestart (#1709) * reload: use OnRestart Close the listener on OnRestart for health and metrics so the default setup function can setup the listener when the plugin is "starting up". Lightly test with some SIGUSR1-ing. Also checked the reload plugin with this, seems fine: .com.:1043 .:1043 2018/04/20 15:01:25 [INFO] CoreDNS-1.1.1 2018/04/20 15:01:25 [INFO] linux/amd64, go1.10, CoreDNS-1.1.1 linux/amd64, go1.10, 2018/04/20 15:01:25 [INFO] Running configuration MD5 = aa8b3f03946fb60546ca1f725d482714 2018/04/20 15:02:01 [INFO] Reloading 2018/04/20 15:02:01 [INFO] Running configuration MD5 = b34a96d99e01db4015a892212560155f 2018/04/20 15:02:01 [INFO] Reloading complete ^C2018/04/20 15:02:06 [INFO] SIGINT: Shutting down With this corefile: .com { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } . { proxy . 127.0.0.1:53 prometheus :9054 whoami reload } The prometheus port was 9053, changed that to 54 so reload would pick it up. From a cursory look it seems this also fixes: Fixes #1604 #1618 #1686 #1492 * At least make it test * Use onfinalshutdown * reload: add reload test This test #1604 adn right now fails. * Address review comments * Add bug section explaining things a bit * compile tests * Fix tests * fixes * slightly less crazy * try to make prometheus setup less confusing * Use ephermal port for test * Don't use the listener * These are shared between goroutines, just use the boolean in the main structure. * Fix text in the reload README, * Set addr to TODO once stopping it * Morph fturb's comment into test, to test reload and scrape health and metric endpoint
2018-04-21 17:43:02 +01:00
return nil
}
// OnFinalShutdown tears down the metrics listener on shutdown and restart.
func (m *Metrics) OnFinalShutdown() error { return m.stopServer() }
func keys(m map[string]struct{}) []string {
sx := []string{}
for k := range m {
sx = append(sx, k)
}
return sx
2016-03-18 20:57:35 +00:00
}
// ListenAddr is assigned the address of the prometheus listener. Its use is mainly in tests where
// we listen on "localhost:0" and need to retrieve the actual address.
var ListenAddr string
// shutdownTimeout is the maximum amount of time the metrics plugin will wait
// before erroring when it tries to close the metrics server
const shutdownTimeout time.Duration = time.Second * 5
var buildInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: plugin.Namespace,
Name: "build_info",
Help: "A metric with a constant '1' value labeled by version, revision, and goversion from which CoreDNS was built.",
}, []string{"version", "revision", "goversion"})