mirror of
				https://github.com/coredns/coredns.git
				synced 2025-10-31 02:03:20 -04:00 
			
		
		
		
	| @@ -44,10 +44,24 @@ The extended syntax is available is you want more control. | ||||
| traffic TO... { | ||||
|     server SERVER [SERVER]... | ||||
|     node ID | ||||
|     tls CERT KEY CA | ||||
|     tls_servername NAME | ||||
|  } | ||||
| ~~~ | ||||
|  | ||||
|  *  node **ID** is how *traffic* identifies itself to the control plane. This defaults to `coredns`. | ||||
| *  node **ID** is how *traffic* identifies itself to the control plane. This defaults to `coredns`. | ||||
| * `tls` **CERT** **KEY** **CA** define the TLS properties for gRPC connection. If this is omitted an | ||||
|   insecure connection is attempted. From 0 to 3 arguments can be provided with the meaning as described below | ||||
|  | ||||
|   * `tls` - no client authentication is used, and the system CAs are used to verify the server certificate | ||||
|   * `tls` **CA** - no client authentication is used, and the file CA is used to verify the server certificate | ||||
|   * `tls` **CERT** **KEY** - client authentication is used with the specified cert/key pair. | ||||
|     The server certificate is verified with the system CAs. | ||||
|   * `tls` **CERT** **KEY**  **CA** - client authentication is used with the specified cert/key pair. | ||||
|     The server certificate is verified using the specified CA file. | ||||
|  | ||||
| * `tls_servername` **NAME** allows you to set a server name in the TLS configuration. This is needed | ||||
|   because *traffic* connects to an IP address, so it can't infer the server name from it. | ||||
|  | ||||
| ## Naming Clusters | ||||
|  | ||||
| @@ -57,6 +71,15 @@ domain names. For example if the Server Block specifies `lb.example.org` as one | ||||
| and "cluster-v0" is one of the load balanced cluster, *traffic* will respond to query asking for | ||||
| `cluster-v0.lb.example.org.` and the same goes for `web`; `web.lb.example.org`. | ||||
|  | ||||
| ## Metrics | ||||
|  | ||||
| What metrics should we do? | ||||
|  | ||||
| ## Ready | ||||
|  | ||||
| Should this plugin implement readyness? | ||||
|  | ||||
|  | ||||
| ## Examples | ||||
|  | ||||
| ~~~ | ||||
| @@ -70,7 +93,7 @@ lb.example.org { | ||||
| ~~~ | ||||
|  | ||||
| This will load balance any names under `lb.example.org` using the data from the manager running on | ||||
| localhost on port 18000. The node ID will default to `coredns`. | ||||
| localhost on port 18000. The node ID will be `test-id` and no TLS will be used. | ||||
|  | ||||
| ## Also See | ||||
|  | ||||
| @@ -94,3 +117,12 @@ use this resolver. So reporting a load of +1 on the CoreDNS side can be anything | ||||
| making the load reporting highly inaccurate. | ||||
|  | ||||
| Multiple **TO** addresses is not implemented. | ||||
|  | ||||
| ## TODO | ||||
|  | ||||
| * reconnecting the stream | ||||
| * acking responses | ||||
| * correctly tracking versions and pruning old clusters. | ||||
| * metrics? | ||||
| * testing | ||||
| * credentials (other than TLS) | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| package traffic | ||||
|  | ||||
| import ( | ||||
| 	"crypto/tls" | ||||
| 	"fmt" | ||||
| 	"math/rand" | ||||
| 	"strings" | ||||
| @@ -10,10 +11,13 @@ import ( | ||||
| 	"github.com/coredns/coredns/plugin" | ||||
| 	clog "github.com/coredns/coredns/plugin/pkg/log" | ||||
| 	"github.com/coredns/coredns/plugin/pkg/parse" | ||||
| 	pkgtls "github.com/coredns/coredns/plugin/pkg/tls" | ||||
| 	"github.com/coredns/coredns/plugin/pkg/transport" | ||||
| 	"github.com/coredns/coredns/plugin/traffic/xds" | ||||
|  | ||||
| 	"github.com/caddyserver/caddy" | ||||
| 	"google.golang.org/grpc" | ||||
| 	"google.golang.org/grpc/credentials" | ||||
| ) | ||||
|  | ||||
| var log = clog.NewWithPlugin("traffic") | ||||
| @@ -32,23 +36,11 @@ func setup(c *caddy.Controller) error { | ||||
| 		return t | ||||
| 	}) | ||||
|  | ||||
| 	stream, err := t.c.Run() | ||||
| 	if err != nil { | ||||
| 		return plugin.Error("traffic", err) | ||||
| 	} | ||||
|  | ||||
| 	if err := t.c.ClusterDiscovery(stream, "", "", []string{}); err != nil { | ||||
| 		log.Error(err) | ||||
| 	} | ||||
|  | ||||
| 	go func() { | ||||
| 		err = t.c.Receive(stream) | ||||
| 		if err != nil { | ||||
| 			// can't do log debug in setup functions | ||||
| 			log.Debug(err) | ||||
| 		} | ||||
| 	}() | ||||
|  | ||||
| 	c.OnStartup(func() error { | ||||
| 		go t.c.Run() | ||||
| 		return nil | ||||
| 	}) | ||||
| 	c.OnShutdown(func() error { return t.c.Stop() }) | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| @@ -56,7 +48,11 @@ func parseTraffic(c *caddy.Controller) (*Traffic, error) { | ||||
| 	node := "coredns" | ||||
| 	toHosts := []string{} | ||||
| 	t := &Traffic{} | ||||
| 	var err error | ||||
| 	var ( | ||||
| 		err           error | ||||
| 		tlsConfig     *tls.Config | ||||
| 		tlsServerName string | ||||
| 	) | ||||
|  | ||||
| 	t.origins = make([]string, len(c.ServerBlockKeys)) | ||||
| 	for i := range c.ServerBlockKeys { | ||||
| @@ -88,14 +84,37 @@ func parseTraffic(c *caddy.Controller) (*Traffic, error) { | ||||
| 					return nil, c.ArgErr() | ||||
| 				} | ||||
| 				node = args[0] | ||||
| 			case "tls": | ||||
| 				args := c.RemainingArgs() | ||||
| 				if len(args) > 3 { | ||||
| 					return nil, c.ArgErr() | ||||
| 				} | ||||
|  | ||||
| 				tlsConfig, err = pkgtls.NewTLSConfigFromArgs(args...) | ||||
| 				if err != nil { | ||||
| 					return nil, err | ||||
| 				} | ||||
| 			case "tls_servername": | ||||
| 				if !c.NextArg() { | ||||
| 					return nil, c.ArgErr() | ||||
| 				} | ||||
| 				tlsServerName = c.Val() | ||||
| 			default: | ||||
| 				return nil, c.Errf("unknown property '%s'", c.Val()) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	opts := []grpc.DialOption{grpc.WithInsecure()} | ||||
| 	if tlsConfig != nil { | ||||
| 		if tlsServerName != "" { | ||||
| 			tlsConfig.ServerName = tlsServerName | ||||
| 		} | ||||
| 		opts = []grpc.DialOption{grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))} | ||||
| 	} | ||||
|  | ||||
| 	// TODO: only the first host is used, need to figure out how to reconcile multiple upstream providers. | ||||
| 	if t.c, err = xds.New(toHosts[0], node); err != nil { | ||||
| 	if t.c, err = xds.New(toHosts[0], node, opts...); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -18,11 +18,9 @@ type Traffic struct { | ||||
| 	c       *xds.Client | ||||
| 	id      string | ||||
| 	origins []string | ||||
| 	Next    plugin.Handler | ||||
| } | ||||
|  | ||||
| // shutdown closes the connection to the managment endpoints and stops any running goroutines. | ||||
| func (t *Traffic) shutdown() { t.c.Close() } | ||||
| 	Next plugin.Handler | ||||
| } | ||||
|  | ||||
| // ServeDNS implements the plugin.Handler interface. | ||||
| func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { | ||||
| @@ -38,7 +36,6 @@ func (t *Traffic) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg | ||||
| 		} | ||||
| 	} | ||||
| 	if cluster == "" { | ||||
| 		// TODO(miek): can this actually happen? | ||||
| 		return plugin.NextOrFailure(t.Name(), t.Next, ctx, w, r) | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -14,7 +14,7 @@ type assignment struct { | ||||
| 	version int // not sure what do with and if we should discard all clusters. | ||||
| } | ||||
|  | ||||
| func (a *assignment) SetClusterLoadAssignment(cluster string, cla *xdspb.ClusterLoadAssignment) { | ||||
| func (a *assignment) setClusterLoadAssignment(cluster string, cla *xdspb.ClusterLoadAssignment) { | ||||
| 	// If cla is nil we just found a cluster, check if we already know about it, or if we need to make a new entry. | ||||
| 	a.mu.Lock() | ||||
| 	defer a.mu.Unlock() | ||||
| @@ -30,8 +30,7 @@ func (a *assignment) SetClusterLoadAssignment(cluster string, cla *xdspb.Cluster | ||||
|  | ||||
| } | ||||
|  | ||||
| // ClusterLoadAssignment returns the healthy endpoints and their weight. | ||||
| func (a *assignment) ClusterLoadAssignment(cluster string) *xdspb.ClusterLoadAssignment { | ||||
| func (a *assignment) clusterLoadAssignment(cluster string) *xdspb.ClusterLoadAssignment { | ||||
| 	a.mu.RLock() | ||||
| 	cla, ok := a.cla[cluster] | ||||
| 	a.mu.RUnlock() | ||||
| @@ -41,7 +40,7 @@ func (a *assignment) ClusterLoadAssignment(cluster string) *xdspb.ClusterLoadAss | ||||
| 	return cla | ||||
| } | ||||
|  | ||||
| func (a *assignment) Clusters() []string { | ||||
| func (a *assignment) clusters() []string { | ||||
| 	a.mu.RLock() | ||||
| 	defer a.mu.RUnlock() | ||||
| 	clusters := make([]string, len(a.cla)) | ||||
| @@ -56,7 +55,7 @@ func (a *assignment) Clusters() []string { | ||||
| // Select selects a backend from cla, using weighted random selection. It only selects | ||||
| // backends that are reporting healthy. | ||||
| func (a *assignment) Select(cluster string) net.IP { | ||||
| 	cla := a.ClusterLoadAssignment(cluster) | ||||
| 	cla := a.clusterLoadAssignment(cluster) | ||||
| 	if cla == nil { | ||||
| 		return nil | ||||
| 	} | ||||
|   | ||||
| @@ -22,8 +22,10 @@ package xds | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"net" | ||||
| 	"os" | ||||
| 	"sync" | ||||
| 	"time" | ||||
|  | ||||
| 	"github.com/coredns/coredns/coremain" | ||||
| @@ -54,12 +56,12 @@ type Client struct { | ||||
| 	node        *corepb.Node | ||||
| 	cancel      context.CancelFunc | ||||
| 	stop        chan struct{} | ||||
| 	mu          sync.RWMutex | ||||
| 	nonce       string | ||||
| } | ||||
|  | ||||
| // New returns a new client that's dialed to addr using node as the local identifier. | ||||
| func New(addr, node string) (*Client, error) { | ||||
| 	// todo credentials! | ||||
| 	opts := []grpc.DialOption{grpc.WithInsecure()} | ||||
| func New(addr, node string, opts ...grpc.DialOption) (*Client, error) { | ||||
| 	cc, err := grpc.Dial(addr, opts...) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| @@ -82,21 +84,54 @@ func New(addr, node string) (*Client, error) { | ||||
| 	return c, nil | ||||
| } | ||||
|  | ||||
| // Close closes a client performs cleanups. | ||||
| func (c *Client) Close() { c.cancel(); c.cc.Close() } | ||||
| // Stop stops all goroutines and closes the connection to the upstream manager. | ||||
| func (c *Client) Stop() error { c.cancel(); return c.cc.Close() } | ||||
|  | ||||
| // Run runs the gRPC stream to the manager. | ||||
| func (c *Client) Run() (adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient, error) { | ||||
| 	cli := adsgrpc.NewAggregatedDiscoveryServiceClient(c.cc) | ||||
| 	stream, err := cli.StreamAggregatedResources(c.ctx) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| // Run starts all goroutines and gathers the clusters and endpoint information from the upstream manager. | ||||
| func (c *Client) Run() { | ||||
| 	for { | ||||
| 		select { | ||||
| 		case <-c.ctx.Done(): | ||||
| 			return | ||||
| 		default: | ||||
| 		} | ||||
|  | ||||
| 		cli := adsgrpc.NewAggregatedDiscoveryServiceClient(c.cc) | ||||
| 		stream, err := cli.StreamAggregatedResources(c.ctx) | ||||
| 		if err != nil { | ||||
| 			log.Debug(err) | ||||
| 			time.Sleep(2 * time.Second) // grpc's client.go does more spiffy exp. backoff, do we really need that? | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		done := make(chan struct{}) | ||||
| 		go func() { | ||||
| 			tick := time.NewTicker(10 * time.Second) | ||||
| 			for { | ||||
| 				select { | ||||
| 				case <-tick.C: | ||||
| 					// send empty list for cluster discovery again and again | ||||
| 					log.Debugf("Requesting cluster list, nonce %q:", c.Nonce()) | ||||
| 					if err := c.clusterDiscovery(stream, "", c.Nonce(), []string{}); err != nil { | ||||
| 						log.Debug(err) | ||||
| 					} | ||||
|  | ||||
| 				case <-done: | ||||
| 					tick.Stop() | ||||
| 					return | ||||
| 				} | ||||
| 			} | ||||
| 		}() | ||||
|  | ||||
| 		if err := c.Receive(stream); err != nil { | ||||
| 			log.Debug(err) | ||||
| 		} | ||||
| 		close(done) | ||||
| 	} | ||||
| 	return stream, nil | ||||
| } | ||||
|  | ||||
| // ClusterDiscovery sends a cluster DiscoveryRequest on the stream. | ||||
| func (c *Client) ClusterDiscovery(stream adsStream, version, nonce string, clusters []string) error { | ||||
| // clusterDiscovery sends a cluster DiscoveryRequest on the stream. | ||||
| func (c *Client) clusterDiscovery(stream adsStream, version, nonce string, clusters []string) error { | ||||
| 	req := &xdspb.DiscoveryRequest{ | ||||
| 		Node:          c.node, | ||||
| 		TypeUrl:       cdsURL, | ||||
| @@ -107,8 +142,8 @@ func (c *Client) ClusterDiscovery(stream adsStream, version, nonce string, clust | ||||
| 	return stream.Send(req) | ||||
| } | ||||
|  | ||||
| // EndpointDiscovery sends a endpoint DiscoveryRequest on the stream. | ||||
| func (c *Client) EndpointDiscovery(stream adsStream, version, nonce string, clusters []string) error { | ||||
| // endpointDiscovery sends a endpoint DiscoveryRequest on the stream. | ||||
| func (c *Client) endpointDiscovery(stream adsStream, version, nonce string, clusters []string) error { | ||||
| 	req := &xdspb.DiscoveryRequest{ | ||||
| 		Node:          c.node, | ||||
| 		TypeUrl:       edsURL, | ||||
| @@ -124,8 +159,7 @@ func (c *Client) Receive(stream adsStream) error { | ||||
| 	for { | ||||
| 		resp, err := stream.Recv() | ||||
| 		if err != nil { | ||||
| 			log.Warningf("Trouble receiving from the gRPC connection: %s", err) | ||||
| 			time.Sleep(10 * time.Second) // better. | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		switch resp.GetTypeUrl() { | ||||
| @@ -133,25 +167,30 @@ func (c *Client) Receive(stream adsStream) error { | ||||
| 			for _, r := range resp.GetResources() { | ||||
| 				var any ptypes.DynamicAny | ||||
| 				if err := ptypes.UnmarshalAny(r, &any); err != nil { | ||||
| 					log.Debugf("Failed to unmarshal cluster discovery: %s", err) | ||||
| 					continue | ||||
| 				} | ||||
| 				cluster, ok := any.Message.(*xdspb.Cluster) | ||||
| 				if !ok { | ||||
| 					continue | ||||
| 				} | ||||
| 				c.assignments.SetClusterLoadAssignment(cluster.GetName(), nil) | ||||
| 				c.assignments.setClusterLoadAssignment(cluster.GetName(), nil) | ||||
| 			} | ||||
| 			log.Debugf("Cluster discovery processed with %d resources", len(resp.GetResources())) | ||||
|  | ||||
| 			// ack the CDS proto, with we we've got. (empty version would be NACK) | ||||
| 			if err := c.ClusterDiscovery(stream, resp.GetVersionInfo(), resp.GetNonce(), c.assignments.Clusters()); err != nil { | ||||
| 				log.Warningf("Failed to acknowledge cluster discovery: %s", err) | ||||
| 			if err := c.clusterDiscovery(stream, resp.GetVersionInfo(), resp.GetNonce(), c.assignments.clusters()); err != nil { | ||||
| 				log.Debug(err) | ||||
| 				continue | ||||
| 			} | ||||
| 			// need to figure out how to handle the versions and nounces exactly. | ||||
|  | ||||
| 			// now kick off discovery for endpoints | ||||
| 			if err := c.EndpointDiscovery(stream, "", "", c.assignments.Clusters()); err != nil { | ||||
| 				log.Warningf("Failed to perform endpoint discovery: %s", err) | ||||
| 			if err := c.endpointDiscovery(stream, "", resp.GetNonce(), c.assignments.clusters()); err != nil { | ||||
| 				log.Debug(err) | ||||
| 				continue | ||||
| 			} | ||||
| 			c.SetNonce(resp.GetNonce()) | ||||
|  | ||||
| 		case edsURL: | ||||
| 			for _, r := range resp.GetResources() { | ||||
| @@ -162,17 +201,15 @@ func (c *Client) Receive(stream adsStream) error { | ||||
| 				} | ||||
| 				cla, ok := any.Message.(*xdspb.ClusterLoadAssignment) | ||||
| 				if !ok { | ||||
| 					log.Debugf("Unexpected resource type: %T in endpoint discovery", any.Message) | ||||
| 					continue | ||||
| 				} | ||||
| 				c.assignments.SetClusterLoadAssignment(cla.GetClusterName(), cla) | ||||
| 				c.assignments.setClusterLoadAssignment(cla.GetClusterName(), cla) | ||||
| 				// ack the bloody thing | ||||
| 			} | ||||
| 			log.Debugf("Endpoint discovery processed with %d resources", len(resp.GetResources())) | ||||
|  | ||||
| 		default: | ||||
| 			log.Warningf("Unknown response URL for discovery: %q", resp.GetTypeUrl()) | ||||
| 			continue | ||||
| 			return fmt.Errorf("unknown response URL for discovery: %q", resp.GetTypeUrl()) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user