From 9683de0feb7c2180bab187cb62feda04963b6bb4 Mon Sep 17 00:00:00 2001 From: Fitz_dev Date: Sat, 13 Sep 2025 05:45:01 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20No=20failover=20to=20next=20upstream=20w?= =?UTF-8?q?hen=20receiving=20SERVFAIL=20or=20REFUSED=20response=20codes?= =?UTF-8?q?=EF=BC=88#7457=EF=BC=89=20(#7458)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/forward/README.md | 13 +++++++ plugin/forward/forward.go | 16 ++++++++ plugin/forward/setup.go | 21 +++++++++++ plugin/forward/setup_test.go | 73 ++++++++++++++++++++++++++++++++++++ plugin/pkg/dnstest/server.go | 43 +++++++++++++++++++++ 5 files changed, 166 insertions(+) diff --git a/plugin/forward/README.md b/plugin/forward/README.md index 33f4e60f6..692ad8f39 100644 --- a/plugin/forward/README.md +++ b/plugin/forward/README.md @@ -52,6 +52,7 @@ forward FROM TO... { max_concurrent MAX next RCODE_1 [RCODE_2] [RCODE_3...] failfast_all_unhealthy_upstreams + failover RCODE_1 [RCODE_2] [RCODE_3...] } ~~~ @@ -99,6 +100,7 @@ forward FROM TO... { As an upper bound for **MAX**, consider that each concurrent query will use about 2kb of memory. * `next` If the `RCODE` (i.e. `NXDOMAIN`) is returned by the remote then execute the next plugin. If no next plugin is defined, or the next plugin is not a `forward` plugin, this setting is ignored * `failfast_all_unhealthy_upstreams` - determines the handling of requests when all upstream servers are unhealthy and unresponsive to health checks. Enabling this option will immediately return SERVFAIL responses for all requests. By default, requests are sent to a random upstream. +* `failover` - By default when a DNS lookup fails to return a DNS response (e.g. timeout), _forward_ will attempt a lookup on the next upstream server. The `failover` option will make _forward_ do the same for any response with a response code matching an `RCODE` ( e.g. `SERVFAIL`、`REFUSED`). If all upstreams have been tried, the response from the last attempt is returned. Also note the TLS config is "global" for the whole forwarding proxy if you need a different `tls_servername` for different upstreams you're out of luck. @@ -287,6 +289,17 @@ The following would try 1.2.3.4 first. If the response is `NXDOMAIN`, try 5.6.7. } ~~~ +In the following example, if the response from `1.2.3.4` is `SERVFAIL` or `REFUSED`, it will try `5.6.7.8`. If the response from `5.6.7.8` is `SERVFAIL ` or `REFUSED`, it will try `9.0.1.2`. + +~~~ corefile +. { + forward . 1.2.3.4 5.6.7.8 9.0.1.2 { + policy sequential + failover SERVFAIL REFUSED + } +} +~~~ + ## See Also [RFC 7858](https://tools.ietf.org/html/rfc7858) for DNS over TLS. diff --git a/plugin/forward/forward.go b/plugin/forward/forward.go index 0c95bafdf..cec1adb9c 100644 --- a/plugin/forward/forward.go +++ b/plugin/forward/forward.go @@ -51,6 +51,7 @@ type Forward struct { expire time.Duration maxConcurrent int64 failfastUnhealthyUpstreams bool + failoverRcodes []int opts proxyPkg.Options // also here for testing @@ -206,6 +207,21 @@ func (f *Forward) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg return 0, nil } + // Check if we have a failover Rcode defined, check if we match on the code + tryNext := false + for _, failoverRcode := range f.failoverRcodes { + // if we match, we continue to the next upstream in the list + if failoverRcode == ret.Rcode { + if fails < len(f.proxies) { + tryNext = true + } + } + } + if tryNext { + fails++ + continue + } + // Check if we have an alternate Rcode defined, check if we match on the code for _, alternateRcode := range f.nextAlternateRcodes { if alternateRcode == ret.Rcode && f.Next != nil { // In case we do not have a Next handler, just continue normally diff --git a/plugin/forward/setup.go b/plugin/forward/setup.go index f0301c343..7ffcef0d1 100644 --- a/plugin/forward/setup.go +++ b/plugin/forward/setup.go @@ -312,6 +312,27 @@ func parseBlock(c *caddy.Controller, f *Forward) error { return c.ArgErr() } f.failfastUnhealthyUpstreams = true + case "failover": + args := c.RemainingArgs() + if len(args) == 0 { + return c.ArgErr() + } + toRcode := dns.StringToRcode + + for _, rcode := range args { + var rc int + var ok bool + + if rc, ok = toRcode[strings.ToUpper(rcode)]; !ok { + if rc == dns.RcodeSuccess { + return fmt.Errorf("NoError cannot be used in failover") + } + + return fmt.Errorf("%s is not a valid rcode", rcode) + } + + f.failoverRcodes = append(f.failoverRcodes, rc) + } default: return c.Errf("unknown property '%s'", c.Val()) } diff --git a/plugin/forward/setup_test.go b/plugin/forward/setup_test.go index c45ccce85..bfa51e161 100644 --- a/plugin/forward/setup_test.go +++ b/plugin/forward/setup_test.go @@ -1,6 +1,8 @@ package forward import ( + "context" + "fmt" "os" "reflect" "strings" @@ -8,7 +10,9 @@ import ( "github.com/coredns/caddy" "github.com/coredns/coredns/core/dnsserver" + "github.com/coredns/coredns/plugin/pkg/dnstest" "github.com/coredns/coredns/plugin/pkg/proxy" + "github.com/coredns/coredns/plugin/test" "github.com/miekg/dns" ) @@ -423,3 +427,72 @@ func TestFailfastAllUnhealthyUpstreams(t *testing.T) { } } } + +func TestFailover(t *testing.T) { + + server_fail_s := dnstest.NewMultipleServer(func(w dns.ResponseWriter, r *dns.Msg) { + ret := new(dns.Msg) + ret.SetRcode(r, dns.RcodeServerFailure) + w.WriteMsg(ret) + }) + defer server_fail_s.Close() + + server_refused_s := dnstest.NewMultipleServer(func(w dns.ResponseWriter, r *dns.Msg) { + ret := new(dns.Msg) + ret.SetRcode(r, dns.RcodeRefused) + w.WriteMsg(ret) + }) + defer server_refused_s.Close() + + s := dnstest.NewMultipleServer(func(w dns.ResponseWriter, r *dns.Msg) { + ret := new(dns.Msg) + ret.SetReply(r) + ret.Answer = append(ret.Answer, test.A("example.org. IN A 127.0.0.1")) + w.WriteMsg(ret) + }) + defer s.Close() + + tests := []struct { + input string + hasRecord bool + failMsg string + }{ + {fmt.Sprintf( + `forward . %s %s %s { + policy sequential + failover ServFail Refused + }`, server_fail_s.Addr, server_refused_s.Addr, s.Addr), true, "If failover is set, records should be returned as long as one of the upstreams is work"}, + {fmt.Sprintf( + `forward . %s %s %s { + policy sequential + }`, server_fail_s.Addr, server_refused_s.Addr, s.Addr), false, "If failover is not set and the first upstream is not work, no records should be returned"}, + {fmt.Sprintf( + `forward . %s %s %s { + policy sequential + }`, s.Addr, server_fail_s.Addr, server_refused_s.Addr), true, "Although failover is not set, as long as the first upstream is work, there should be has a record return"}, + } + + for _, testCase := range tests { + c := caddy.NewTestController("dns", testCase.input) + fs, err := parseForward(c) + + f := fs[0] + if err != nil { + t.Errorf("Failed to create forwarder: %s", err) + } + f.OnStartup() + defer f.OnShutdown() + + m := new(dns.Msg) + m.SetQuestion("example.org.", dns.TypeA) + rec := dnstest.NewRecorder(&test.ResponseWriter{}) + + if _, err := f.ServeDNS(context.TODO(), rec, m); err != nil { + t.Fatal("Expected to receive reply, but didn't") + } + + if (len(rec.Msg.Answer) > 0) != testCase.hasRecord { + t.Errorf(" %s: \n %s", testCase.failMsg, testCase.input) + } + } +} diff --git a/plugin/pkg/dnstest/server.go b/plugin/pkg/dnstest/server.go index 78f2b8135..f247f7466 100644 --- a/plugin/pkg/dnstest/server.go +++ b/plugin/pkg/dnstest/server.go @@ -58,6 +58,49 @@ func NewServer(f dns.HandlerFunc) *Server { return &Server{s1: s1, s2: s2, Addr: s2.Listener.Addr().String()} } +// NewMultipleServer starts and returns a new Server(multiple). The caller should call Close when +// finished, to shut it down. +func NewMultipleServer(f dns.HandlerFunc) *Server { + ch1 := make(chan bool) + ch2 := make(chan bool) + + s1 := &dns.Server{ + Handler: dns.HandlerFunc(f), + } // udp + s2 := &dns.Server{ + Handler: dns.HandlerFunc(f), + } // tcp + + for range 5 { // 5 attempts + s2.Listener, _ = reuseport.Listen("tcp", ":0") + if s2.Listener == nil { + continue + } + + s1.PacketConn, _ = net.ListenPacket("udp", s2.Listener.Addr().String()) + if s1.PacketConn != nil { + break + } + + // perhaps UPD port is in use, try again + s2.Listener.Close() + s2.Listener = nil + } + if s2.Listener == nil { + panic("dnstest.NewServer(): failed to create new server") + } + + s1.NotifyStartedFunc = func() { close(ch1) } + s2.NotifyStartedFunc = func() { close(ch2) } + go s1.ActivateAndServe() + go s2.ActivateAndServe() + + <-ch1 + <-ch2 + + return &Server{s1: s1, s2: s2, Addr: s2.Listener.Addr().String()} +} + // Close shuts down the server. func (s *Server) Close() { s.s1.Shutdown()