mirror of
				https://github.com/coredns/coredns.git
				synced 2025-10-31 02:03:20 -04:00 
			
		
		
		
	Add a NativeHistogramBucketFactor parameter to the use of `NewHistogramVec` in order to enable use of Prometheus Native Histograms. This will store automatically computed sparse buckets in CoreDNS. If a compatible Prometeus requests native histograms this data will returned instead of the static buckets. The default factor of 1.05 should provide high quality resolution data. Signed-off-by: SuperQ <superq@gmail.com>
		
			
				
	
	
		
			85 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			85 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package health
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"net"
 | |
| 	"net/http"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/coredns/coredns/plugin"
 | |
| 
 | |
| 	"github.com/prometheus/client_golang/prometheus"
 | |
| 	"github.com/prometheus/client_golang/prometheus/promauto"
 | |
| )
 | |
| 
 | |
| // overloaded queries the health end point and updates a metrics showing how long it took.
 | |
| func (h *health) overloaded(ctx context.Context) {
 | |
| 	bypassProxy := &http.Transport{
 | |
| 		Proxy: nil,
 | |
| 		DialContext: (&net.Dialer{
 | |
| 			Timeout:   30 * time.Second,
 | |
| 			KeepAlive: 30 * time.Second,
 | |
| 		}).DialContext,
 | |
| 		ForceAttemptHTTP2:     true,
 | |
| 		MaxIdleConns:          100,
 | |
| 		IdleConnTimeout:       90 * time.Second,
 | |
| 		TLSHandshakeTimeout:   10 * time.Second,
 | |
| 		ExpectContinueTimeout: 1 * time.Second,
 | |
| 	}
 | |
| 	timeout := 3 * time.Second
 | |
| 	client := http.Client{
 | |
| 		Timeout:   timeout,
 | |
| 		Transport: bypassProxy,
 | |
| 	}
 | |
| 
 | |
| 	req, _ := http.NewRequestWithContext(ctx, http.MethodGet, h.healthURI.String(), nil)
 | |
| 	tick := time.NewTicker(1 * time.Second)
 | |
| 	defer tick.Stop()
 | |
| 
 | |
| 	for {
 | |
| 		select {
 | |
| 		case <-tick.C:
 | |
| 			start := time.Now()
 | |
| 			resp, err := client.Do(req)
 | |
| 			if err != nil && ctx.Err() == context.Canceled {
 | |
| 				// request was cancelled by parent goroutine
 | |
| 				return
 | |
| 			}
 | |
| 			if err != nil {
 | |
| 				HealthDuration.Observe(time.Since(start).Seconds())
 | |
| 				HealthFailures.Inc()
 | |
| 				log.Warningf("Local health request to %q failed: %s", req.URL.String(), err)
 | |
| 				continue
 | |
| 			}
 | |
| 			resp.Body.Close()
 | |
| 			elapsed := time.Since(start)
 | |
| 			HealthDuration.Observe(elapsed.Seconds())
 | |
| 			if elapsed > time.Second { // 1s is pretty random, but a *local* scrape taking that long isn't good
 | |
| 				log.Warningf("Local health request to %q took more than 1s: %s", req.URL.String(), elapsed)
 | |
| 			}
 | |
| 
 | |
| 		case <-ctx.Done():
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| var (
 | |
| 	// HealthDuration is the metric used for exporting how fast we can retrieve the /health endpoint.
 | |
| 	HealthDuration = promauto.NewHistogram(prometheus.HistogramOpts{
 | |
| 		Namespace:                   plugin.Namespace,
 | |
| 		Subsystem:                   "health",
 | |
| 		Name:                        "request_duration_seconds",
 | |
| 		Buckets:                     plugin.SlimTimeBuckets,
 | |
| 		NativeHistogramBucketFactor: plugin.NativeHistogramBucketFactor,
 | |
| 		Help:                        "Histogram of the time (in seconds) each request took.",
 | |
| 	})
 | |
| 	// HealthFailures is the metric used to count how many times the health request failed
 | |
| 	HealthFailures = promauto.NewCounter(prometheus.CounterOpts{
 | |
| 		Namespace: plugin.Namespace,
 | |
| 		Subsystem: "health",
 | |
| 		Name:      "request_failures_total",
 | |
| 		Help:      "The number of times the health check failed.",
 | |
| 	})
 | |
| )
 |