2018-02-05 22:00:47 +00:00
|
|
|
package forward
|
|
|
|
|
|
|
|
|
|
import (
|
2018-07-09 15:14:55 +01:00
|
|
|
"crypto/tls"
|
2018-02-05 22:00:47 +00:00
|
|
|
"sync/atomic"
|
2018-07-09 15:14:55 +01:00
|
|
|
"time"
|
2018-02-05 22:00:47 +00:00
|
|
|
|
2018-09-19 07:29:37 +01:00
|
|
|
"github.com/coredns/coredns/plugin/pkg/transport"
|
|
|
|
|
|
2018-02-05 22:00:47 +00:00
|
|
|
"github.com/miekg/dns"
|
|
|
|
|
)
|
|
|
|
|
|
2018-07-09 15:14:55 +01:00
|
|
|
// HealthChecker checks the upstream health.
|
|
|
|
|
type HealthChecker interface {
|
|
|
|
|
Check(*Proxy) error
|
|
|
|
|
SetTLSConfig(*tls.Config)
|
2020-03-06 11:52:43 +01:00
|
|
|
SetRecursionDesired(bool)
|
|
|
|
|
GetRecursionDesired() bool
|
2018-07-09 15:14:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dnsHc is a health checker for a DNS endpoint (DNS, and DoT).
|
2020-03-06 11:52:43 +01:00
|
|
|
type dnsHc struct {
|
|
|
|
|
c *dns.Client
|
|
|
|
|
recursionDesired bool
|
|
|
|
|
}
|
2018-07-09 15:14:55 +01:00
|
|
|
|
Speed up testing (#4239)
* Speed up testing
* make notification run in the background, this recudes the test_readme
time from 18s to 0.10s
* reduce time for zone reload
* TestServeDNSConcurrent remove entirely. This took a whopping 58s for
... ? A few minutes staring didn't reveal wth it is actually testing.
Making values smaller revealed race conditions in the tests. Remove
entirely.
* Move many interval values to variables so we can reset them to short
values for the tests.
* test_large_axfr: make the zone smaller. The number used 64K has no
rational, make it 64/10 to speed up.
* TestProxyThreeWay: use client with shorter timeout
A few random tidbits in other tests.
Total time saved: 177s (almost 3m) - which makes it worthwhile again to
run the test locally:
this branch:
~~~
ok github.com/coredns/coredns/test 10.437s
cd plugin; time go t ./...
5,51s user 7,51s system 11,15s elapsed 744%CPU (
~~~
master:
~~~
ok github.com/coredns/coredns/test 35.252s
cd plugin; time go t ./...
157,64s user 15,39s system 50,05s elapsed 345%CPU ()
~~~
tests/ -25s
plugins/ -40s
This brings the total on 20s, and another 10s can be saved by fixing
dnstapio. Moving this to 5s would be even better, but 10s is also nice.
Signed-off-by: Miek Gieben <miek@miek.nl>
* Also 0.01
Signed-off-by: Miek Gieben <miek@miek.nl>
2020-10-30 10:27:04 +01:00
|
|
|
var (
|
|
|
|
|
hcReadTimeout = 1 * time.Second
|
|
|
|
|
hcWriteTimeout = 1 * time.Second
|
|
|
|
|
)
|
|
|
|
|
|
2018-09-19 07:29:37 +01:00
|
|
|
// NewHealthChecker returns a new HealthChecker based on transport.
|
2020-03-06 11:52:43 +01:00
|
|
|
func NewHealthChecker(trans string, recursionDesired bool) HealthChecker {
|
2018-09-19 07:29:37 +01:00
|
|
|
switch trans {
|
|
|
|
|
case transport.DNS, transport.TLS:
|
2018-07-09 15:14:55 +01:00
|
|
|
c := new(dns.Client)
|
|
|
|
|
c.Net = "udp"
|
Speed up testing (#4239)
* Speed up testing
* make notification run in the background, this recudes the test_readme
time from 18s to 0.10s
* reduce time for zone reload
* TestServeDNSConcurrent remove entirely. This took a whopping 58s for
... ? A few minutes staring didn't reveal wth it is actually testing.
Making values smaller revealed race conditions in the tests. Remove
entirely.
* Move many interval values to variables so we can reset them to short
values for the tests.
* test_large_axfr: make the zone smaller. The number used 64K has no
rational, make it 64/10 to speed up.
* TestProxyThreeWay: use client with shorter timeout
A few random tidbits in other tests.
Total time saved: 177s (almost 3m) - which makes it worthwhile again to
run the test locally:
this branch:
~~~
ok github.com/coredns/coredns/test 10.437s
cd plugin; time go t ./...
5,51s user 7,51s system 11,15s elapsed 744%CPU (
~~~
master:
~~~
ok github.com/coredns/coredns/test 35.252s
cd plugin; time go t ./...
157,64s user 15,39s system 50,05s elapsed 345%CPU ()
~~~
tests/ -25s
plugins/ -40s
This brings the total on 20s, and another 10s can be saved by fixing
dnstapio. Moving this to 5s would be even better, but 10s is also nice.
Signed-off-by: Miek Gieben <miek@miek.nl>
* Also 0.01
Signed-off-by: Miek Gieben <miek@miek.nl>
2020-10-30 10:27:04 +01:00
|
|
|
c.ReadTimeout = hcReadTimeout
|
|
|
|
|
c.WriteTimeout = hcWriteTimeout
|
2018-07-09 15:14:55 +01:00
|
|
|
|
2020-03-06 11:52:43 +01:00
|
|
|
return &dnsHc{c: c, recursionDesired: recursionDesired}
|
2018-07-09 15:14:55 +01:00
|
|
|
}
|
|
|
|
|
|
2018-10-09 22:50:30 +03:00
|
|
|
log.Warningf("No healthchecker for transport %q", trans)
|
2018-07-09 15:14:55 +01:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (h *dnsHc) SetTLSConfig(cfg *tls.Config) {
|
|
|
|
|
h.c.Net = "tcp-tls"
|
|
|
|
|
h.c.TLSConfig = cfg
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-06 11:52:43 +01:00
|
|
|
func (h *dnsHc) SetRecursionDesired(recursionDesired bool) {
|
|
|
|
|
h.recursionDesired = recursionDesired
|
|
|
|
|
}
|
|
|
|
|
func (h *dnsHc) GetRecursionDesired() bool {
|
|
|
|
|
return h.recursionDesired
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// For HC we send to . IN NS +[no]rec message to the upstream. Dial timeouts and empty
|
2018-02-05 22:00:47 +00:00
|
|
|
// replies are considered fails, basically anything else constitutes a healthy upstream.
|
|
|
|
|
|
2018-02-15 10:21:57 +01:00
|
|
|
// Check is used as the up.Func in the up.Probe.
|
2018-07-09 15:14:55 +01:00
|
|
|
func (h *dnsHc) Check(p *Proxy) error {
|
|
|
|
|
err := h.send(p.addr)
|
2018-02-05 22:00:47 +00:00
|
|
|
if err != nil {
|
2018-02-15 10:21:57 +01:00
|
|
|
HealthcheckFailureCount.WithLabelValues(p.addr).Add(1)
|
|
|
|
|
atomic.AddUint32(&p.fails, 1)
|
|
|
|
|
return err
|
2018-02-05 22:00:47 +00:00
|
|
|
}
|
|
|
|
|
|
2018-02-15 10:21:57 +01:00
|
|
|
atomic.StoreUint32(&p.fails, 0)
|
|
|
|
|
return nil
|
2018-02-05 22:00:47 +00:00
|
|
|
}
|
|
|
|
|
|
2018-07-09 15:14:55 +01:00
|
|
|
func (h *dnsHc) send(addr string) error {
|
|
|
|
|
ping := new(dns.Msg)
|
|
|
|
|
ping.SetQuestion(".", dns.TypeNS)
|
2020-03-06 11:52:43 +01:00
|
|
|
ping.MsgHdr.RecursionDesired = h.recursionDesired
|
2018-02-05 22:00:47 +00:00
|
|
|
|
2018-07-09 15:14:55 +01:00
|
|
|
m, _, err := h.c.Exchange(ping, addr)
|
|
|
|
|
// If we got a header, we're alright, basically only care about I/O errors 'n stuff.
|
2018-02-05 22:00:47 +00:00
|
|
|
if err != nil && m != nil {
|
2018-07-09 15:14:55 +01:00
|
|
|
// Silly check, something sane came back.
|
2018-02-05 22:00:47 +00:00
|
|
|
if m.Response || m.Opcode == dns.OpcodeQuery {
|
|
|
|
|
err = nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return err
|
|
|
|
|
}
|