mirror of
https://github.com/coredns/coredns.git
synced 2025-11-15 00:12:16 -05:00
Implement debugging with TXT records
Signed-off-by: Miek Gieben <miek@miek.nl>
This commit is contained in:
@@ -27,13 +27,21 @@ the *management cluster* (see `cluster` below in "Syntax"). By default the name
|
||||
When bootstrapping *traffic* tries to retrieve the cluster endpoints for the management cluster,
|
||||
when the cluster is not found *traffic* will return a fatal error.
|
||||
|
||||
The *traffic* plugin handles A, AAAA and SRV queries. Queries for non-existent clusters get a
|
||||
NXDOMAIN, where the minimal TTL is also set to 5s.
|
||||
The *traffic* plugin handles A, AAAA, SRV and TXT queries. TXT queries are purely used for debugging
|
||||
as health status of the endpoints is ignored in that case.
|
||||
Queries for non-existent clusters get a NXDOMAIN, where the minimal TTL is also set to 5s.
|
||||
|
||||
For A and AAAA queries each DNS response contains a single IP address that's considered the best
|
||||
one. The TTL on these answer is set to 5s. It will only return successful responses either with an
|
||||
answer or, otherwise, a NODATA response.
|
||||
|
||||
TXT replies will use the SRV record format augmented with the health status of each backend, as this
|
||||
is useful for debugging.
|
||||
|
||||
~~~
|
||||
web.lb.example.org. 5 IN TXT "100" "100" "18008" "endpoint-0.web.lb.example.org." "HEALTHY"
|
||||
~~~
|
||||
|
||||
For SRV queries *all* healthy backends will be returned - assuming the client doing the query
|
||||
is smart enough to select the best one. When SRV records are returned, the endpoint DNS names
|
||||
are synthesized `endpoint-<N>.<cluster>.<zone>` that carries the IP address. Querying for these
|
||||
@@ -55,8 +63,7 @@ traffic TO...
|
||||
This enabled the *traffic* plugin, with a default node ID of `coredns` and no TLS.
|
||||
|
||||
* **TO...** are the control plane endpoints to bootstrap from. These must start with `grpc://`. The
|
||||
port number defaults to 443, if not specified. These endpoint will be tried in the order given.
|
||||
First successful connection will be used to resolve the management cluster `xds`.
|
||||
port number defaults to 443, if not specified. These endpoints will be tried in the order given.
|
||||
|
||||
The extended syntax is available if you want more control.
|
||||
|
||||
@@ -66,7 +73,6 @@ traffic TO... {
|
||||
id ID
|
||||
tls CERT KEY CA
|
||||
tls_servername NAME
|
||||
ignore_health
|
||||
}
|
||||
~~~
|
||||
|
||||
@@ -93,8 +99,6 @@ traffic TO... {
|
||||
* `tls_servername` **NAME** allows you to set a server name in the TLS configuration. This is
|
||||
needed because *traffic* connects to an IP address, so it can't infer the server name from it.
|
||||
|
||||
* `ignore_health` can be enabled to ignore endpoint health status, this can aid in debugging.
|
||||
|
||||
## Naming Clusters
|
||||
|
||||
When a cluster is named this usually consists out of a single word, i.e. "cluster-v0", or "web".
|
||||
|
||||
@@ -61,8 +61,6 @@ func setup(c *caddy.Controller) error {
|
||||
i++
|
||||
goto redo
|
||||
}
|
||||
// err == nil, we are connected
|
||||
break
|
||||
}
|
||||
}()
|
||||
metrics.MustRegister(c, xds.ClusterGauge)
|
||||
@@ -132,8 +130,6 @@ func parseTraffic(c *caddy.Controller) (*Traffic, error) {
|
||||
return nil, c.ArgErr()
|
||||
}
|
||||
tlsServerName = c.Val()
|
||||
case "ignore_health":
|
||||
t.health = true
|
||||
default:
|
||||
return nil, c.Errf("unknown property '%s'", c.Val())
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/coredns/coredns/plugin/traffic/xds"
|
||||
"github.com/coredns/coredns/request"
|
||||
|
||||
corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
@@ -25,7 +26,6 @@ type Traffic struct {
|
||||
hosts []string
|
||||
|
||||
id string
|
||||
health bool
|
||||
origins []string
|
||||
|
||||
Next plugin.Handler
|
||||
@@ -48,7 +48,8 @@ func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
m.SetReply(r)
|
||||
m.Authoritative = true
|
||||
|
||||
sockaddr, ok := t.c.Select(cluster, t.health)
|
||||
healthy := state.QType() == dns.TypeTXT
|
||||
sockaddr, ok := t.c.Select(cluster, healthy)
|
||||
if !ok {
|
||||
// ok this cluster doesn't exist, potentially due to extra labels, which may be garbage or legit queries:
|
||||
// legit is:
|
||||
@@ -69,14 +70,14 @@ func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
if strings.HasPrefix(strings.ToLower(labels[0]), "endpoint-") {
|
||||
// recheck if the cluster exist.
|
||||
cluster = labels[1]
|
||||
sockaddr, ok = t.c.Select(cluster, t.health)
|
||||
sockaddr, ok = t.c.Select(cluster, healthy)
|
||||
if !ok {
|
||||
m.Ns = soa(state.Zone)
|
||||
m.Rcode = dns.RcodeNameError
|
||||
w.WriteMsg(m)
|
||||
return 0, nil
|
||||
}
|
||||
return t.serveEndpoint(ctx, state, labels[0], cluster)
|
||||
return t.serveEndpoint(ctx, state, labels[0], cluster, healthy)
|
||||
}
|
||||
case 3:
|
||||
if strings.ToLower(labels[0]) != "_grpclb" || strings.ToLower(labels[1]) != "_tcp" {
|
||||
@@ -88,7 +89,7 @@ func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
// OK, _grcplb._tcp query; we need to return the endpoint for the mgmt cluster *NOT* the cluster
|
||||
// we got the query for. This should exist, but we'll check later anyway.
|
||||
cluster = t.mgmt
|
||||
sockaddr, _ = t.c.Select(cluster, t.health)
|
||||
sockaddr, _ = t.c.Select(cluster, healthy)
|
||||
break
|
||||
default:
|
||||
m.Ns = soa(state.Zone)
|
||||
@@ -120,7 +121,7 @@ func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
}
|
||||
m.Answer = []dns.RR{&dns.AAAA{Hdr: dns.RR_Header{Name: state.QName(), Rrtype: dns.TypeAAAA, Class: dns.ClassINET, Ttl: 5}, AAAA: sockaddr.Address()}}
|
||||
case dns.TypeSRV:
|
||||
sockaddrs, _ := t.c.All(cluster, t.health)
|
||||
sockaddrs, _ := t.c.All(cluster, true)
|
||||
m.Answer = make([]dns.RR, 0, len(sockaddrs))
|
||||
m.Extra = make([]dns.RR, 0, len(sockaddrs))
|
||||
for i, sa := range sockaddrs {
|
||||
@@ -136,6 +137,18 @@ func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
m.Extra = append(m.Extra, &dns.A{Hdr: dns.RR_Header{Name: target, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: 5}, A: sa.Address()})
|
||||
}
|
||||
}
|
||||
case dns.TypeTXT:
|
||||
sockaddrs, _ := t.c.All(cluster, false)
|
||||
m.Answer = make([]dns.RR, 0, len(sockaddrs))
|
||||
m.Extra = make([]dns.RR, 0, len(sockaddrs))
|
||||
for i, sa := range sockaddrs {
|
||||
target := fmt.Sprintf("endpoint-%d.%s.%s", i, cluster, state.Zone)
|
||||
|
||||
m.Answer = append(m.Answer, &dns.TXT{
|
||||
Hdr: dns.RR_Header{Name: state.QName(), Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: 5},
|
||||
Txt: []string{"100", "100", strconv.Itoa(int(sa.Port())), target, corepb.HealthStatus_name[int32(sa.Health)]}})
|
||||
m.Extra = append(m.Extra, &dns.TXT{Hdr: dns.RR_Header{Name: target, Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: 5}, Txt: []string{sa.Address().String()}})
|
||||
}
|
||||
default:
|
||||
m.Ns = soa(state.Zone)
|
||||
}
|
||||
@@ -144,7 +157,7 @@ func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (t *Traffic) serveEndpoint(ctx context.Context, state request.Request, endpoint, cluster string) (int, error) {
|
||||
func (t *Traffic) serveEndpoint(ctx context.Context, state request.Request, endpoint, cluster string, healthy bool) (int, error) {
|
||||
m := new(dns.Msg)
|
||||
m.SetReply(state.Req)
|
||||
m.Authoritative = true
|
||||
@@ -167,7 +180,7 @@ func (t *Traffic) serveEndpoint(ctx context.Context, state request.Request, endp
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
sockaddrs, _ := t.c.All(cluster, t.health)
|
||||
sockaddrs, _ := t.c.All(cluster, healthy)
|
||||
if len(sockaddrs) < nr {
|
||||
m.Ns = soa(state.Zone)
|
||||
m.Rcode = dns.RcodeNameError
|
||||
|
||||
@@ -76,6 +76,16 @@ func TestTraffic(t *testing.T) {
|
||||
},
|
||||
cluster: "web", qtype: dns.TypeA, rcode: dns.RcodeSuccess, answer: "127.0.0.2",
|
||||
},
|
||||
// unknown endpoint and healthy endpoint, TXT query
|
||||
{
|
||||
cla: &endpointpb.ClusterLoadAssignment{
|
||||
ClusterName: "web",
|
||||
Endpoints: endpoints([]EndpointHealth{
|
||||
{"127.0.0.1", 18008, corepb.HealthStatus_UNKNOWN},
|
||||
}),
|
||||
},
|
||||
cluster: "web", qtype: dns.TypeTXT, rcode: dns.RcodeSuccess, answer: "endpoint-0.web.lb.example.org.",
|
||||
},
|
||||
// SRV query healthy endpoint
|
||||
{
|
||||
cla: &endpointpb.ClusterLoadAssignment{
|
||||
@@ -144,6 +154,8 @@ func TestTraffic(t *testing.T) {
|
||||
addr = x.AAAA.String()
|
||||
case *dns.SRV:
|
||||
addr = x.Target
|
||||
case *dns.TXT:
|
||||
addr = x.Txt[3]
|
||||
}
|
||||
if tc.answer != addr {
|
||||
t.Errorf("Test %d: Expected answer %s, but got %s", i, tc.answer, addr)
|
||||
|
||||
@@ -9,9 +9,10 @@ import (
|
||||
endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
|
||||
)
|
||||
|
||||
// SocketAddress holds a corepb.SocketAddress.
|
||||
// SocketAddress holds a corepb.SocketAddress and a health status
|
||||
type SocketAddress struct {
|
||||
*corepb.SocketAddress
|
||||
Health corepb.HealthStatus
|
||||
}
|
||||
|
||||
// Address returns the address from s.
|
||||
@@ -71,38 +72,38 @@ func (a *assignment) clusters() []string {
|
||||
}
|
||||
|
||||
// Select selects a endpoint from cluster load assignments, using weighted random selection. It only selects endpoints that are reporting healthy.
|
||||
func (a *assignment) Select(cluster string, ignore bool) (*SocketAddress, bool) {
|
||||
func (a *assignment) Select(cluster string, healthy bool) (*SocketAddress, bool) {
|
||||
cla := a.ClusterLoadAssignment(cluster)
|
||||
if cla == nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
weight := 0
|
||||
healthy := 0
|
||||
health := 0
|
||||
for _, ep := range cla.Endpoints {
|
||||
for _, lb := range ep.GetLbEndpoints() {
|
||||
if !ignore && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
if healthy && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
continue
|
||||
}
|
||||
weight += int(lb.GetLoadBalancingWeight().GetValue())
|
||||
healthy++
|
||||
health++
|
||||
}
|
||||
}
|
||||
if healthy == 0 {
|
||||
if health == 0 {
|
||||
return nil, true
|
||||
}
|
||||
|
||||
// all weights are 0, randomly select one of the endpoints,
|
||||
if weight == 0 {
|
||||
r := rand.Intn(healthy)
|
||||
r := rand.Intn(health)
|
||||
i := 0
|
||||
for _, ep := range cla.Endpoints {
|
||||
for _, lb := range ep.GetLbEndpoints() {
|
||||
if !ignore && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
if healthy && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
continue
|
||||
}
|
||||
if r == i {
|
||||
return &SocketAddress{lb.GetEndpoint().GetAddress().GetSocketAddress()}, true
|
||||
return &SocketAddress{lb.GetEndpoint().GetAddress().GetSocketAddress(), lb.GetHealthStatus()}, true
|
||||
}
|
||||
i++
|
||||
}
|
||||
@@ -110,15 +111,15 @@ func (a *assignment) Select(cluster string, ignore bool) (*SocketAddress, bool)
|
||||
return nil, true
|
||||
}
|
||||
|
||||
r := rand.Intn(healthy) + 1
|
||||
r := rand.Intn(health) + 1
|
||||
for _, ep := range cla.Endpoints {
|
||||
for _, lb := range ep.GetLbEndpoints() {
|
||||
if !ignore && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
if healthy && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
continue
|
||||
}
|
||||
r -= int(lb.GetLoadBalancingWeight().GetValue())
|
||||
if r <= 0 {
|
||||
return &SocketAddress{lb.GetEndpoint().GetAddress().GetSocketAddress()}, true
|
||||
return &SocketAddress{lb.GetEndpoint().GetAddress().GetSocketAddress(), lb.GetHealthStatus()}, true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -126,7 +127,7 @@ func (a *assignment) Select(cluster string, ignore bool) (*SocketAddress, bool)
|
||||
}
|
||||
|
||||
// All returns all healthy endpoints.
|
||||
func (a *assignment) All(cluster string, ignore bool) ([]*SocketAddress, bool) {
|
||||
func (a *assignment) All(cluster string, healthy bool) ([]*SocketAddress, bool) {
|
||||
cla := a.ClusterLoadAssignment(cluster)
|
||||
if cla == nil {
|
||||
return nil, false
|
||||
@@ -135,10 +136,10 @@ func (a *assignment) All(cluster string, ignore bool) ([]*SocketAddress, bool) {
|
||||
sa := []*SocketAddress{}
|
||||
for _, ep := range cla.Endpoints {
|
||||
for _, lb := range ep.GetLbEndpoints() {
|
||||
if !ignore && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
if healthy && lb.GetHealthStatus() != corepb.HealthStatus_HEALTHY {
|
||||
continue
|
||||
}
|
||||
sa = append(sa, &SocketAddress{lb.GetEndpoint().GetAddress().GetSocketAddress()})
|
||||
sa = append(sa, &SocketAddress{lb.GetEndpoint().GetAddress().GetSocketAddress(), lb.GetHealthStatus()})
|
||||
}
|
||||
}
|
||||
return sa, true
|
||||
|
||||
@@ -199,19 +199,19 @@ func (c *Client) receive(stream adsStream) error {
|
||||
|
||||
// Select returns an address that is deemed to be the correct one for this cluster. The returned
|
||||
// boolean indicates if the cluster exists.
|
||||
func (c *Client) Select(cluster string, ignore bool) (*SocketAddress, bool) {
|
||||
func (c *Client) Select(cluster string, healty bool) (*SocketAddress, bool) {
|
||||
if cluster == "" {
|
||||
return nil, false
|
||||
}
|
||||
return c.assignments.Select(cluster, ignore)
|
||||
return c.assignments.Select(cluster, healty)
|
||||
}
|
||||
|
||||
// All returns all endpoints.
|
||||
func (c *Client) All(cluster string, ignore bool) ([]*SocketAddress, bool) {
|
||||
func (c *Client) All(cluster string, healty bool) ([]*SocketAddress, bool) {
|
||||
if cluster == "" {
|
||||
return nil, false
|
||||
}
|
||||
return c.assignments.All(cluster, ignore)
|
||||
return c.assignments.All(cluster, healty)
|
||||
}
|
||||
|
||||
// Locality holds the locality for this server. It contains a Region, Zone and SubZone.
|
||||
|
||||
Reference in New Issue
Block a user