package object import ( "time" "github.com/coredns/coredns/plugin" "github.com/coredns/coredns/plugin/pkg/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" api "k8s.io/api/core/v1" meta "k8s.io/apimachinery/pkg/apis/meta/v1" ) var ( // DNSProgrammingLatency is defined as the time it took to program a DNS instance - from the time // a service or pod has changed to the time the change was propagated and was available to be // served by a DNS server. // The definition of this SLI can be found at https://github.com/kubernetes/community/blob/master/sig-scalability/slos/dns_programming_latency.md // Note that the metrics is partially based on the time exported by the endpoints controller on // the master machine. The measurement may be inaccurate if there is a clock drift between the // node and master machine. // The service_kind label can be one of: // * cluster_ip // * headless_with_selector // * headless_without_selector DNSProgrammingLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{ Namespace: plugin.Namespace, Subsystem: "kubernetes", Name: "dns_programming_duration_seconds", // From 1 millisecond to ~17 minutes. Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor, Help: "Histogram of the time (in seconds) it took to program a dns instance.", }, []string{"service_kind"}) // DurationSinceFunc returns the duration elapsed since the given time. // Added as a global variable to allow injection for testing. DurationSinceFunc = time.Since ) // EndpointLatencyRecorder records latency metric for endpoint objects type EndpointLatencyRecorder struct { TT time.Time ServiceFunc func(meta.Object) []*Service Services []*Service } func (l *EndpointLatencyRecorder) init(o meta.Object) { l.Services = l.ServiceFunc(o) l.TT = time.Time{} stringVal, ok := o.GetAnnotations()[api.EndpointsLastChangeTriggerTime] if ok { tt, err := time.Parse(time.RFC3339Nano, stringVal) if err != nil { log.Warningf("DnsProgrammingLatency cannot be calculated for Endpoints '%s/%s'; invalid %q annotation RFC3339 value of %q", o.GetNamespace(), o.GetName(), api.EndpointsLastChangeTriggerTime, stringVal) // In case of error val = time.Zero, which is ignored downstream. } l.TT = tt } } func (l *EndpointLatencyRecorder) record() { // Note: len(l.Services) != 1 can be a false negative if the service informer is lagging, // i.e. we may not see a recently created service. Given that services don't change very // often (compared to much more frequent endpoint changes), cases when this method skips // recording should be relatively rare. We intentionally accept this flaw to keep the // solution simple. if l.TT.IsZero() || len(l.Services) != 1 { return } // If we're here it means that the Endpoints object was created by the endpoints-controller // (because the LastChangeTriggerTime annotation is set) and the backing Service is known. // For headless services this means the service has a selector ("headless_with_selector"). // For non-headless services the service kind is "cluster_ip". serviceKind := "cluster_ip" if l.Services[0].Headless() { serviceKind = "headless_with_selector" } DNSProgrammingLatency.WithLabelValues(serviceKind). Observe(DurationSinceFunc(l.TT).Seconds()) }