| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | package health
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import (
 | 
					
						
							| 
									
										
										
										
											2022-04-13 19:09:03 +02:00
										 |  |  | 	"context"
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 	"net/http"
 | 
					
						
							|  |  |  | 	"time"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/coredns/coredns/plugin"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/prometheus/client_golang/prometheus"
 | 
					
						
							| 
									
										
										
										
											2020-07-25 23:06:28 +08:00
										 |  |  | 	"github.com/prometheus/client_golang/prometheus/promauto"
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // overloaded queries the health end point and updates a metrics showing how long it took.
 | 
					
						
							| 
									
										
										
										
											2022-04-13 19:09:03 +02:00
										 |  |  | func (h *health) overloaded(ctx context.Context) {
 | 
					
						
							|  |  |  | 	timeout := 3 * time.Second
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 	client := http.Client{
 | 
					
						
							|  |  |  | 		Timeout: timeout,
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2022-04-13 19:09:03 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-27 10:15:42 +02:00
										 |  |  | 	url := "http://" + h.Addr + "/health"
 | 
					
						
							| 
									
										
										
										
											2022-04-13 19:09:03 +02:00
										 |  |  | 	req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 	tick := time.NewTicker(1 * time.Second)
 | 
					
						
							| 
									
										
										
										
											2019-05-04 21:06:04 +01:00
										 |  |  | 	defer tick.Stop()
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	for {
 | 
					
						
							|  |  |  | 		select {
 | 
					
						
							|  |  |  | 		case <-tick.C:
 | 
					
						
							|  |  |  | 			start := time.Now()
 | 
					
						
							| 
									
										
										
										
											2022-04-13 19:09:03 +02:00
										 |  |  | 			resp, err := client.Do(req)
 | 
					
						
							|  |  |  | 			if err != nil && ctx.Err() == context.Canceled {
 | 
					
						
							|  |  |  | 				// request was cancelled by parent goroutine
 | 
					
						
							|  |  |  | 				return
 | 
					
						
							|  |  |  | 			}
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 			if err != nil {
 | 
					
						
							| 
									
										
										
										
											2021-05-27 15:16:38 +02:00
										 |  |  | 				HealthDuration.Observe(time.Since(start).Seconds())
 | 
					
						
							|  |  |  | 				HealthFailures.Inc()
 | 
					
						
							| 
									
										
										
										
											2021-03-19 11:40:38 +01:00
										 |  |  | 				log.Warningf("Local health request to %q failed: %s", url, err)
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 				continue
 | 
					
						
							|  |  |  | 			}
 | 
					
						
							|  |  |  | 			resp.Body.Close()
 | 
					
						
							| 
									
										
										
										
											2021-03-19 11:40:38 +01:00
										 |  |  | 			elapsed := time.Since(start)
 | 
					
						
							|  |  |  | 			HealthDuration.Observe(elapsed.Seconds())
 | 
					
						
							|  |  |  | 			if elapsed > time.Second { // 1s is pretty random, but a *local* scrape taking that long isn't good
 | 
					
						
							|  |  |  | 				log.Warningf("Local health request to %q took more than 1s: %s", url, elapsed)
 | 
					
						
							|  |  |  | 			}
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-13 19:09:03 +02:00
										 |  |  | 		case <-ctx.Done():
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 			return
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var (
 | 
					
						
							|  |  |  | 	// HealthDuration is the metric used for exporting how fast we can retrieve the /health endpoint.
 | 
					
						
							| 
									
										
										
										
											2020-07-25 23:06:28 +08:00
										 |  |  | 	HealthDuration = promauto.NewHistogram(prometheus.HistogramOpts{
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 		Namespace: plugin.Namespace,
 | 
					
						
							|  |  |  | 		Subsystem: "health",
 | 
					
						
							|  |  |  | 		Name:      "request_duration_seconds",
 | 
					
						
							| 
									
										
										
										
											2021-05-27 15:16:38 +02:00
										 |  |  | 		Buckets:   plugin.SlimTimeBuckets,
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | 		Help:      "Histogram of the time (in seconds) each request took.",
 | 
					
						
							|  |  |  | 	})
 | 
					
						
							| 
									
										
										
										
											2021-11-15 20:29:52 +08:00
										 |  |  | 	// HealthFailures is the metric used to count how many times the health request failed
 | 
					
						
							| 
									
										
										
										
											2021-05-27 15:16:38 +02:00
										 |  |  | 	HealthFailures = promauto.NewCounter(prometheus.CounterOpts{
 | 
					
						
							|  |  |  | 		Namespace: plugin.Namespace,
 | 
					
						
							|  |  |  | 		Subsystem: "health",
 | 
					
						
							|  |  |  | 		Name:      "request_failures_total",
 | 
					
						
							|  |  |  | 		Help:      "The number of times the health check failed.",
 | 
					
						
							|  |  |  | 	})
 | 
					
						
							| 
									
										
										
										
											2018-01-10 11:41:22 +00:00
										 |  |  | )
 |