Files
coredns/plugin/kubernetes/metrics.go

74 lines
3.4 KiB
Go
Raw Normal View History

package kubernetes
import (
"time"
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/kubernetes/object"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
api "k8s.io/api/core/v1"
plugin/kubernetes: Watch EndpointSlices (#4209) * initial commit Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * convert endpointslices to object.endpoints Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add opt hard coded for now Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * check that server supports endpointslice Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix import grouping Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * dont use endpoint slice in 1.17 or 1.18 Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * bump kind/k8s in circle ci to latest Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * drop k8s to latest supported by kind Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * use endpointslice name as endoint Name; index by Service name Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * use index key comparison in nsAddrs() Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add Index to object.Endpoint fixtures; fix direct endpoint name compares Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add slice dup check and test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * todo Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add ep-slice skew dup test for reverse Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * nsaddrs: de-dup ep-slice skew dups; add test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove todo Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * address various feedback Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * consolidate endpoint/slice informer code Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix endpoint informer consolidation; use clearer func name Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * log info; use major/minor fields Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix nsAddr and unit test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add latency tracking for endpointslices Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * endpointslice latency unit test & fix Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * code shuffling Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * rename endpointslices in tests Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove de-dup from nsAddrs and test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove de-dup from findServices / test Signed-off-by: Chris O'Haver <cohaver@infoblox.com>
2020-10-30 08:14:30 -04:00
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
)
var (
// DNSProgrammingLatency is defined as the time it took to program a DNS instance - from the time
// a service or pod has changed to the time the change was propagated and was available to be
// served by a DNS server.
// The definition of this SLI can be found at https://github.com/kubernetes/community/blob/master/sig-scalability/slos/dns_programming_latency.md
// Note that the metrics is partially based on the time exported by the endpoints controller on
// the master machine. The measurement may be inaccurate if there is a clock drift between the
// node and master machine.
// The service_kind label can be one of:
// * cluster_ip
// * headless_with_selector
// * headless_without_selector
DNSProgrammingLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: plugin.Namespace,
Subsystem: pluginName,
Name: "dns_programming_duration_seconds",
// From 1 millisecond to ~17 minutes.
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
Help: "Histogram of the time (in seconds) it took to program a dns instance.",
}, []string{"service_kind"})
// durationSinceFunc returns the duration elapsed since the given time.
// Added as a global variable to allow injection for testing.
durationSinceFunc = time.Since
)
plugin/kubernetes: Watch EndpointSlices (#4209) * initial commit Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * convert endpointslices to object.endpoints Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add opt hard coded for now Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * check that server supports endpointslice Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix import grouping Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * dont use endpoint slice in 1.17 or 1.18 Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * bump kind/k8s in circle ci to latest Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * drop k8s to latest supported by kind Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * use endpointslice name as endoint Name; index by Service name Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * use index key comparison in nsAddrs() Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add Index to object.Endpoint fixtures; fix direct endpoint name compares Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add slice dup check and test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * todo Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add ep-slice skew dup test for reverse Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * nsaddrs: de-dup ep-slice skew dups; add test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove todo Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * address various feedback Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * consolidate endpoint/slice informer code Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix endpoint informer consolidation; use clearer func name Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * log info; use major/minor fields Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix nsAddr and unit test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add latency tracking for endpointslices Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * endpointslice latency unit test & fix Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * code shuffling Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * rename endpointslices in tests Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove de-dup from nsAddrs and test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove de-dup from findServices / test Signed-off-by: Chris O'Haver <cohaver@infoblox.com>
2020-10-30 08:14:30 -04:00
func recordDNSProgrammingLatency(svcs []*object.Service, endpoints meta.Object) {
// getLastChangeTriggerTime is the time.Time value of the EndpointsLastChangeTriggerTime
// annotation stored in the given endpoints object or the "zero" time if the annotation wasn't set
var lastChangeTriggerTime time.Time
plugin/kubernetes: Watch EndpointSlices (#4209) * initial commit Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * convert endpointslices to object.endpoints Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add opt hard coded for now Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * check that server supports endpointslice Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix import grouping Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * dont use endpoint slice in 1.17 or 1.18 Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * bump kind/k8s in circle ci to latest Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * drop k8s to latest supported by kind Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * use endpointslice name as endoint Name; index by Service name Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * use index key comparison in nsAddrs() Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add Index to object.Endpoint fixtures; fix direct endpoint name compares Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add slice dup check and test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * todo Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add ep-slice skew dup test for reverse Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * nsaddrs: de-dup ep-slice skew dups; add test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove todo Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * address various feedback Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * consolidate endpoint/slice informer code Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix endpoint informer consolidation; use clearer func name Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * log info; use major/minor fields Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * fix nsAddr and unit test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * add latency tracking for endpointslices Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * endpointslice latency unit test & fix Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * code shuffling Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * rename endpointslices in tests Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove de-dup from nsAddrs and test Signed-off-by: Chris O'Haver <cohaver@infoblox.com> * remove de-dup from findServices / test Signed-off-by: Chris O'Haver <cohaver@infoblox.com>
2020-10-30 08:14:30 -04:00
stringVal, ok := endpoints.GetAnnotations()[api.EndpointsLastChangeTriggerTime]
if ok {
ts, err := time.Parse(time.RFC3339Nano, stringVal)
if err != nil {
log.Warningf("DnsProgrammingLatency cannot be calculated for Endpoints '%s/%s'; invalid %q annotation RFC3339 value of %q",
endpoints.GetNamespace(), endpoints.GetName(), api.EndpointsLastChangeTriggerTime, stringVal)
// In case of error val = time.Zero, which is ignored in the upstream code.
}
lastChangeTriggerTime = ts
}
// isHeadless indicates whether the endpoints object belongs to a headless
// service (i.e. clusterIp = None). Note that this can be a false negatives if the service
// informer is lagging, i.e. we may not see a recently created service. Given that the services
// don't change very often (comparing to much more frequent endpoints changes), cases when this method
// will return wrong answer should be relatively rare. Because of that we intentionally accept this
// flaw to keep the solution simple.
isHeadless := len(svcs) == 1 && svcs[0].ClusterIP == api.ClusterIPNone
if endpoints == nil || !isHeadless || lastChangeTriggerTime.IsZero() {
return
}
// If we're here it means that the Endpoints object is for a headless service and that
// the Endpoints object was created by the endpoints-controller (because the
// LastChangeTriggerTime annotation is set). It means that the corresponding service is a
// "headless service with selector".
DNSProgrammingLatency.WithLabelValues("headless_with_selector").
Observe(durationSinceFunc(lastChangeTriggerTime).Seconds())
}