2019-10-05 11:45:45 +01:00
/ *
This package contains code copied from github . com / grpc / grpc - co . The license for that code is :
Copyright 2019 gRPC authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
// Package xds implements a bidirectional stream to an envoy ADS management endpoint. It will stream
// updates (CDS and EDS) from there to help load balance responses to DNS clients.
package xds
import (
"context"
"sync"
"github.com/coredns/coredns/coremain"
clog "github.com/coredns/coredns/plugin/pkg/log"
2020-06-04 13:36:27 +02:00
xdspb2 "github.com/envoyproxy/go-control-plane/envoy/api/v2"
corepb2 "github.com/envoyproxy/go-control-plane/envoy/api/v2/core"
adspb2 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v2"
2019-10-05 11:45:45 +01:00
"github.com/golang/protobuf/ptypes"
"google.golang.org/grpc"
)
var log = clog . NewWithPlugin ( "traffic: xds" )
const (
2020-06-04 13:36:27 +02:00
clusterType = "type.googleapis.com/envoy.api.v2.Cluster"
endpointType = "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment"
2019-10-05 11:45:45 +01:00
)
2020-06-04 13:36:27 +02:00
type adsStream adspb2 . AggregatedDiscoveryService_StreamAggregatedResourcesClient
2019-10-05 11:45:45 +01:00
// Client talks to the grpc manager's endpoint to get load assignments.
type Client struct {
cc * grpc . ClientConn
ctx context . Context
2020-06-04 13:36:27 +02:00
node * corepb2 . Node
2019-10-05 11:45:45 +01:00
cancel context . CancelFunc
stop chan struct { }
2020-01-18 07:54:32 +01:00
to string // upstream hosts, mostly here for logging purposes
mu sync . RWMutex // protects everything below
assignments * assignment // assignments contains the current clusters and endpoints
version map [ string ] string
nonce map [ string ] string
synced bool // true when we first successfully got a stream
2019-10-05 11:45:45 +01:00
}
// New returns a new client that's dialed to addr using node as the local identifier.
func New ( addr , node string , opts ... grpc . DialOption ) ( * Client , error ) {
cc , err := grpc . Dial ( addr , opts ... )
if err != nil {
return nil , err
}
2020-06-04 13:36:27 +02:00
c := & Client { cc : cc , to : addr , node : & corepb2 . Node { Id : node , UserAgentName : "CoreDNS" , UserAgentVersionType : & corepb2 . Node_UserAgentVersion { UserAgentVersion : coremain . CoreVersion } } }
c . assignments = & assignment { cla : make ( map [ string ] * xdspb2 . ClusterLoadAssignment ) }
2019-10-05 11:45:45 +01:00
c . version , c . nonce = make ( map [ string ] string ) , make ( map [ string ] string )
c . ctx , c . cancel = context . WithCancel ( context . Background ( ) )
return c , nil
}
// Stop stops all goroutines and closes the connection to the upstream manager.
func ( c * Client ) Stop ( ) error { c . cancel ( ) ; return c . cc . Close ( ) }
// Run starts all goroutines and gathers the clusters and endpoint information from the upstream manager.
2020-02-05 16:10:58 +01:00
func ( c * Client ) Run ( ) error {
2020-01-18 07:54:32 +01:00
first := true
2019-10-05 11:45:45 +01:00
for {
select {
case <- c . ctx . Done ( ) :
2020-02-05 16:10:58 +01:00
return nil
2019-10-05 11:45:45 +01:00
default :
}
2020-06-04 13:36:27 +02:00
cli := adspb2 . NewAggregatedDiscoveryServiceClient ( c . cc )
2019-10-05 11:45:45 +01:00
stream , err := cli . StreamAggregatedResources ( c . ctx )
if err != nil {
2020-02-05 16:10:58 +01:00
return err
2019-10-05 11:45:45 +01:00
}
2020-01-18 07:54:32 +01:00
if first {
2020-02-05 14:58:14 +01:00
// send first request, to create stream, then wait for ADS to send us updates.
2020-06-04 13:36:27 +02:00
if err := c . clusterDiscovery ( stream , c . Version ( clusterType ) , c . Nonce ( clusterType ) , [ ] string { } ) ; err != nil {
2020-03-05 11:02:54 +01:00
return err
2019-10-05 11:45:45 +01:00
}
2020-02-05 14:58:14 +01:00
log . Infof ( "gRPC stream established to %q" , c . to ) // might fail??
c . setSynced ( )
first = false
}
2019-10-05 11:45:45 +01:00
2020-01-18 07:54:32 +01:00
if err := c . receive ( stream ) ; err != nil {
2020-02-05 16:10:58 +01:00
return err
2019-10-05 11:45:45 +01:00
}
}
}
// clusterDiscovery sends a cluster DiscoveryRequest on the stream.
func ( c * Client ) clusterDiscovery ( stream adsStream , version , nonce string , clusters [ ] string ) error {
2020-06-04 13:36:27 +02:00
req := & xdspb2 . DiscoveryRequest {
2019-10-05 11:45:45 +01:00
Node : c . node ,
2020-06-04 13:36:27 +02:00
TypeUrl : clusterType ,
2019-10-05 11:45:45 +01:00
ResourceNames : clusters , // empty for all
VersionInfo : version ,
ResponseNonce : nonce ,
}
return stream . Send ( req )
}
// endpointDiscovery sends a endpoint DiscoveryRequest on the stream.
func ( c * Client ) endpointDiscovery ( stream adsStream , version , nonce string , clusters [ ] string ) error {
2020-06-04 13:36:27 +02:00
req := & xdspb2 . DiscoveryRequest {
2019-10-05 11:45:45 +01:00
Node : c . node ,
2020-06-04 13:36:27 +02:00
TypeUrl : endpointType ,
2019-10-05 11:45:45 +01:00
ResourceNames : clusters ,
VersionInfo : version ,
ResponseNonce : nonce ,
}
return stream . Send ( req )
}
2020-02-03 19:46:41 +01:00
// receive receives from the stream, it handles both cluster and endpoint DiscoveryResponses.
2020-01-18 07:54:32 +01:00
func ( c * Client ) receive ( stream adsStream ) error {
2019-10-05 11:45:45 +01:00
for {
resp , err := stream . Recv ( )
if err != nil {
return err
}
switch resp . GetTypeUrl ( ) {
2020-06-04 13:36:27 +02:00
case clusterType :
2019-10-05 11:45:45 +01:00
a := NewAssignment ( )
for _ , r := range resp . GetResources ( ) {
var any ptypes . DynamicAny
if err := ptypes . UnmarshalAny ( r , & any ) ; err != nil {
log . Debugf ( "Failed to unmarshal cluster discovery: %s" , err )
continue
}
2020-06-04 13:36:27 +02:00
cluster , ok := any . Message . ( * xdspb2 . Cluster )
2019-10-05 11:45:45 +01:00
if ! ok {
continue
}
a . SetClusterLoadAssignment ( cluster . GetName ( ) , nil )
}
// set our local administration and ack the reply. Empty version would signal NACK.
2020-06-04 13:36:27 +02:00
c . SetNonce ( clusterType , resp . GetNonce ( ) )
c . SetVersion ( clusterType , resp . GetVersionInfo ( ) )
2019-10-05 11:45:45 +01:00
c . SetAssignments ( a )
c . clusterDiscovery ( stream , resp . GetVersionInfo ( ) , resp . GetNonce ( ) , a . clusters ( ) )
2020-06-04 13:36:27 +02:00
log . Debugf ( "Cluster discovery processed with %d resources, version %q and nonce %q" , len ( resp . GetResources ( ) ) , c . Version ( clusterType ) , c . Nonce ( clusterType ) )
2020-02-03 20:45:26 +01:00
ClusterGauge . Set ( float64 ( len ( resp . GetResources ( ) ) ) )
2019-10-05 11:45:45 +01:00
// now kick off discovery for endpoints
2020-06-04 13:36:27 +02:00
if err := c . endpointDiscovery ( stream , c . Version ( endpointType ) , c . Nonce ( endpointType ) , a . clusters ( ) ) ; err != nil {
2019-10-05 11:45:45 +01:00
log . Debug ( err )
}
2020-06-04 13:36:27 +02:00
case endpointType :
2019-10-05 11:45:45 +01:00
for _ , r := range resp . GetResources ( ) {
var any ptypes . DynamicAny
if err := ptypes . UnmarshalAny ( r , & any ) ; err != nil {
log . Debugf ( "Failed to unmarshal endpoint discovery: %s" , err )
continue
}
2020-06-04 13:36:27 +02:00
cla , ok := any . Message . ( * xdspb2 . ClusterLoadAssignment )
2019-10-05 11:45:45 +01:00
if ! ok {
2020-02-05 14:58:14 +01:00
// TODO warn/err here?
2019-10-05 11:45:45 +01:00
continue
}
c . assignments . SetClusterLoadAssignment ( cla . GetClusterName ( ) , cla )
2020-02-05 14:58:14 +01:00
2019-10-05 11:45:45 +01:00
}
// set our local administration and ack the reply. Empty version would signal NACK.
2020-06-04 13:36:27 +02:00
c . SetNonce ( endpointType , resp . GetNonce ( ) )
c . SetVersion ( endpointType , resp . GetVersionInfo ( ) )
2019-10-05 11:45:45 +01:00
2020-06-04 13:36:27 +02:00
log . Debugf ( "Endpoint discovery processed with %d resources, version %q and nonce %q" , len ( resp . GetResources ( ) ) , c . Version ( endpointType ) , c . Nonce ( endpointType ) )
2020-02-03 20:45:26 +01:00
EndpointGauge . Set ( float64 ( len ( resp . GetResources ( ) ) ) )
2019-10-05 11:45:45 +01:00
default :
2020-06-04 13:36:27 +02:00
// ignore anything we don't know how to process. Probably should NACK these properly.
2019-10-05 11:45:45 +01:00
}
}
}
// Select returns an address that is deemed to be the correct one for this cluster. The returned
// boolean indicates if the cluster exists.
2020-03-06 09:13:27 +01:00
func ( c * Client ) Select ( cluster string , healty bool ) ( * SocketAddress , bool ) {
2019-10-05 11:45:45 +01:00
if cluster == "" {
2020-01-19 08:30:13 +01:00
return nil , false
2019-10-05 11:45:45 +01:00
}
2020-03-06 09:13:27 +01:00
return c . assignments . Select ( cluster , healty )
2019-10-05 11:45:45 +01:00
}
2020-01-19 08:30:13 +01:00
// All returns all endpoints.
2020-07-24 10:33:52 +02:00
func ( c * Client ) All ( cluster string , healty bool ) ( [ ] * SocketAddress , [ ] uint32 , bool ) {
2020-01-19 08:30:13 +01:00
if cluster == "" {
2020-07-24 10:33:52 +02:00
return nil , nil , false
2020-01-19 08:30:13 +01:00
}
2020-03-06 09:13:27 +01:00
return c . assignments . All ( cluster , healty )
2020-01-24 13:34:59 +01:00
}
// Locality holds the locality for this server. It contains a Region, Zone and SubZone.
2020-03-05 17:33:35 +01:00
// Currently this is not used.
2020-01-24 13:34:59 +01:00
type Locality struct {
Region string
Zone string
SubZone string
2020-01-19 08:30:13 +01:00
}