mirror of
https://github.com/coredns/coredns.git
synced 2025-12-11 12:55:11 -05:00
Add optional show_first flag to consolidate directive that logs the first error immediately and then consolidates subsequent errors. When show_first is enabled: - The first matching error is logged immediately with full details (rcode, domain, type, error message) using the configured log level - Subsequent matching errors are consolidated during the period - At period end: - If only one error occurred, no summary is printed (already logged) - If multiple errors occurred, summary shows the total count Syntax: consolidate DURATION REGEXP [LEVEL] [show_first] Example with 3 errors: [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout [WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s Example with 1 error: [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout Implementation details: - Add showFirst bool to pattern struct - Rename inc() to consolidateError(), return false for showFirst case - Use function pointer in ServeDNS to unify log calls with proper level - Simplify logPattern() with single condition (cnt > 1 || !showFirst) - Refactor parseLogLevel() to parseOptionalParams() with map-based dispatch - Validate parameter order: log level must come before show_first - Update README.md with show_first documentation and examples - Add comprehensive test cases for show_first functionality Signed-off-by: cangming <cangming@cangming.app>
This commit is contained in:
@@ -23,7 +23,7 @@ Extra knobs are available with an expanded syntax:
|
||||
~~~
|
||||
errors {
|
||||
stacktrace
|
||||
consolidate DURATION REGEXP [LEVEL]
|
||||
consolidate DURATION REGEXP [LEVEL] [show_first]
|
||||
}
|
||||
~~~
|
||||
|
||||
@@ -35,6 +35,21 @@ log level, which is configurable by optional option **LEVEL**. Supported options
|
||||
2 errors like '^read udp .* i/o timeout$' occurred in last 30s
|
||||
~~~
|
||||
|
||||
If the optional `show_first` flag is specified, the first error will be logged immediately when it occurs, and then subsequent matching errors will be consolidated. When the consolidation period ends:
|
||||
- If only one error occurred, no summary is printed (since it was already logged)
|
||||
- If multiple errors occurred, a summary is printed showing the total count
|
||||
|
||||
Example with 3 errors:
|
||||
~~~
|
||||
[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout
|
||||
[WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s
|
||||
~~~
|
||||
|
||||
Example with 1 error:
|
||||
~~~
|
||||
[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout
|
||||
~~~
|
||||
|
||||
Multiple `consolidate` options with different **DURATION** and **REGEXP** are allowed. In case if some error message corresponds to several defined regular expressions the message will be associated with the first appropriate **REGEXP**.
|
||||
|
||||
For better performance, it's recommended to use the `^` or `$` metacharacters in regular expression when filtering error messages by prefix or suffix, e.g. `^failed to .*`, or `.* timeout$`.
|
||||
@@ -63,3 +78,16 @@ and errors with prefix "Failed to " as errors.
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
Use the *forward* plugin and consolidate timeout errors with `show_first` option to see both
|
||||
the summary and the first occurrence of the error:
|
||||
|
||||
~~~ corefile
|
||||
. {
|
||||
forward . 8.8.8.8
|
||||
errors {
|
||||
consolidate 5m ".* i/o timeout$" warning show_first
|
||||
consolidate 30s "^Failed to .+" error show_first
|
||||
}
|
||||
}
|
||||
~~~
|
||||
|
||||
@@ -23,6 +23,7 @@ type pattern struct {
|
||||
period time.Duration
|
||||
pattern *regexp.Regexp
|
||||
logCallback func(format string, v ...any)
|
||||
showFirst bool
|
||||
}
|
||||
|
||||
func (p *pattern) timer() *time.Timer {
|
||||
@@ -46,17 +47,24 @@ func newErrorHandler() *errorHandler {
|
||||
|
||||
func (h *errorHandler) logPattern(i int) {
|
||||
cnt := atomic.SwapUint32(&h.patterns[i].count, 0)
|
||||
if cnt > 0 {
|
||||
if cnt == 0 {
|
||||
return
|
||||
}
|
||||
if cnt > 1 || !h.patterns[i].showFirst {
|
||||
h.patterns[i].logCallback("%d errors like '%s' occurred in last %s",
|
||||
cnt, h.patterns[i].pattern.String(), h.patterns[i].period)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *errorHandler) inc(i int) bool {
|
||||
// consolidateError records an error occurrence for pattern i.
|
||||
// Returns false when cnt == 1 and showFirst is configured, so the error
|
||||
// will be printed by the caller using the pattern's logCallback.
|
||||
func (h *errorHandler) consolidateError(i int) bool {
|
||||
if atomic.LoadUint32(&h.stopFlag) > 0 {
|
||||
return false
|
||||
}
|
||||
if atomic.AddUint32(&h.patterns[i].count, 1) == 1 {
|
||||
cnt := atomic.AddUint32(&h.patterns[i].count, 1)
|
||||
if cnt == 1 {
|
||||
ind := i
|
||||
t := time.AfterFunc(h.patterns[ind].period, func() {
|
||||
h.logPattern(ind)
|
||||
@@ -65,6 +73,9 @@ func (h *errorHandler) inc(i int) bool {
|
||||
if atomic.LoadUint32(&h.stopFlag) > 0 && t.Stop() {
|
||||
h.logPattern(ind)
|
||||
}
|
||||
// If showFirst is enabled, return false so the first error
|
||||
// will be printed by the caller using the pattern's logCallback
|
||||
return !h.patterns[i].showFirst
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -85,16 +96,26 @@ func (h *errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dn
|
||||
|
||||
if err != nil {
|
||||
strErr := err.Error()
|
||||
state := request.Request{W: w, Req: r}
|
||||
|
||||
// Default to error logging
|
||||
logFunc := log.Errorf
|
||||
|
||||
for i := range h.patterns {
|
||||
if h.patterns[i].pattern.MatchString(strErr) {
|
||||
if h.inc(i) {
|
||||
if h.consolidateError(i) {
|
||||
// Error is consolidated, no need to log
|
||||
return rcode, err
|
||||
}
|
||||
// consolidateError returned false (showFirst case)
|
||||
// Use the pattern's configured log level
|
||||
logFunc = h.patterns[i].logCallback
|
||||
break
|
||||
}
|
||||
}
|
||||
state := request.Request{W: w, Req: r}
|
||||
log.Errorf("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr)
|
||||
|
||||
// Log with the appropriate log level
|
||||
logFunc("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr)
|
||||
}
|
||||
|
||||
return rcode, err
|
||||
|
||||
@@ -134,13 +134,13 @@ func TestInc(t *testing.T) {
|
||||
}},
|
||||
}
|
||||
|
||||
ret := h.inc(0)
|
||||
ret := h.consolidateError(0)
|
||||
if ret {
|
||||
t.Error("Unexpected return value, expected false, actual true")
|
||||
}
|
||||
|
||||
h.stopFlag = 0
|
||||
ret = h.inc(0)
|
||||
ret = h.consolidateError(0)
|
||||
if !ret {
|
||||
t.Error("Unexpected return value, expected true, actual false")
|
||||
}
|
||||
@@ -156,7 +156,7 @@ func TestInc(t *testing.T) {
|
||||
t.Error("Unexpected 'timer', expected not nil")
|
||||
}
|
||||
|
||||
ret = h.inc(0)
|
||||
ret = h.consolidateError(0)
|
||||
if !ret {
|
||||
t.Error("Unexpected return value, expected true, actual false")
|
||||
}
|
||||
@@ -194,13 +194,14 @@ func TestStop(t *testing.T) {
|
||||
}},
|
||||
}
|
||||
|
||||
h.inc(0)
|
||||
h.inc(0)
|
||||
h.inc(0)
|
||||
h.consolidateError(0)
|
||||
h.consolidateError(0)
|
||||
h.consolidateError(0)
|
||||
expCnt := uint32(3)
|
||||
actCnt := atomic.LoadUint32(&h.patterns[0].count)
|
||||
if actCnt != expCnt {
|
||||
t.Fatalf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt)
|
||||
t.Errorf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt)
|
||||
return
|
||||
}
|
||||
|
||||
h.stop()
|
||||
@@ -230,6 +231,94 @@ func TestStop(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestShowFirst(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
errorCount int
|
||||
expectSummary string
|
||||
shouldHaveSummary bool
|
||||
}{
|
||||
{
|
||||
name: "multiple errors",
|
||||
errorCount: 3,
|
||||
expectSummary: "3 errors like '^error.*!$' occurred in last 2s",
|
||||
shouldHaveSummary: true,
|
||||
},
|
||||
{
|
||||
name: "single error",
|
||||
errorCount: 1,
|
||||
expectSummary: "",
|
||||
shouldHaveSummary: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
buf := bytes.Buffer{}
|
||||
clog.D.Set()
|
||||
golog.SetOutput(&buf)
|
||||
|
||||
h := &errorHandler{
|
||||
patterns: []*pattern{{
|
||||
count: 0,
|
||||
period: 2 * time.Second,
|
||||
pattern: regexp.MustCompile("^error.*!$"),
|
||||
logCallback: log.Errorf,
|
||||
showFirst: true,
|
||||
}},
|
||||
}
|
||||
|
||||
// Add errors and verify return values
|
||||
for i := range tt.errorCount {
|
||||
ret := h.consolidateError(0)
|
||||
if i == 0 {
|
||||
// First call should return false (showFirst enabled)
|
||||
if ret {
|
||||
t.Errorf("First consolidateError call: expected false, got true")
|
||||
}
|
||||
// Simulate ServeDNS logging with pattern's logCallback
|
||||
h.patterns[0].logCallback("2 example.org. A: error %d!", i+1)
|
||||
} else {
|
||||
// Subsequent calls should return true (consolidated)
|
||||
if !ret {
|
||||
t.Errorf("consolidateError call %d: expected true, got false", i+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check count
|
||||
expCnt := uint32(tt.errorCount)
|
||||
actCnt := atomic.LoadUint32(&h.patterns[0].count)
|
||||
if actCnt != expCnt {
|
||||
t.Errorf("Unexpected 'count', expected %d, actual %d", expCnt, actCnt)
|
||||
return
|
||||
}
|
||||
|
||||
// Check that first error was logged
|
||||
output1 := buf.String()
|
||||
if !strings.Contains(output1, "2 example.org. A: error 1!") {
|
||||
t.Errorf("Expected first error to be logged, but got: %q", output1)
|
||||
}
|
||||
|
||||
// Clear buffer and trigger log pattern
|
||||
buf.Reset()
|
||||
h.logPattern(0)
|
||||
|
||||
// Verify summary in logPattern output
|
||||
output2 := buf.String()
|
||||
if tt.shouldHaveSummary {
|
||||
if !strings.Contains(output2, tt.expectSummary) {
|
||||
t.Errorf("Expected summary %q not found in logPattern output: %q", tt.expectSummary, output2)
|
||||
}
|
||||
} else {
|
||||
if strings.Contains(output2, "errors like") {
|
||||
t.Errorf("Did not expect summary for single error, but got: %q", output2)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func genErrorHandler(rcode int, err error) plugin.Handler {
|
||||
return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
|
||||
return rcode, err
|
||||
|
||||
@@ -71,7 +71,7 @@ func errorsParse(c *caddy.Controller) (*errorHandler, error) {
|
||||
|
||||
func parseConsolidate(c *caddy.Controller) (*pattern, error) {
|
||||
args := c.RemainingArgs()
|
||||
if len(args) < 2 || len(args) > 3 {
|
||||
if len(args) < 2 || len(args) > 4 {
|
||||
return nil, c.ArgErr()
|
||||
}
|
||||
p, err := time.ParseDuration(args[0])
|
||||
@@ -82,28 +82,48 @@ func parseConsolidate(c *caddy.Controller) (*pattern, error) {
|
||||
if err != nil {
|
||||
return nil, c.Err(err.Error())
|
||||
}
|
||||
lc, err := parseLogLevel(c, args)
|
||||
|
||||
lc, showFirst, err := parseOptionalParams(c, args[2:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &pattern{period: p, pattern: re, logCallback: lc}, nil
|
||||
|
||||
return &pattern{period: p, pattern: re, logCallback: lc, showFirst: showFirst}, nil
|
||||
}
|
||||
|
||||
func parseLogLevel(c *caddy.Controller, args []string) (func(format string, v ...any), error) {
|
||||
if len(args) != 3 {
|
||||
return log.Errorf, nil
|
||||
// parseOptionalParams parses optional parameters (log level and show_first flag).
|
||||
// Order: log level (optional) must come before show_first (optional).
|
||||
func parseOptionalParams(c *caddy.Controller, args []string) (func(format string, v ...any), bool, error) {
|
||||
logLevels := map[string]func(format string, v ...any){
|
||||
"warning": log.Warningf,
|
||||
"error": log.Errorf,
|
||||
"info": log.Infof,
|
||||
"debug": log.Debugf,
|
||||
}
|
||||
|
||||
switch args[2] {
|
||||
case "warning":
|
||||
return log.Warningf, nil
|
||||
case "error":
|
||||
return log.Errorf, nil
|
||||
case "info":
|
||||
return log.Infof, nil
|
||||
case "debug":
|
||||
return log.Debugf, nil
|
||||
default:
|
||||
return nil, c.Errf("unknown log level argument in consolidate: %s", args[2])
|
||||
var logCallback func(format string, v ...any) // nil means not set yet
|
||||
showFirst := false
|
||||
|
||||
for _, arg := range args {
|
||||
if callback, isLogLevel := logLevels[arg]; isLogLevel {
|
||||
if logCallback != nil {
|
||||
return nil, false, c.Errf("multiple log levels specified in consolidate")
|
||||
}
|
||||
if showFirst {
|
||||
return nil, false, c.Errf("log level must come before show_first in consolidate")
|
||||
}
|
||||
logCallback = callback
|
||||
} else if arg == "show_first" {
|
||||
showFirst = true
|
||||
} else {
|
||||
return nil, false, c.Errf("unknown option in consolidate: %s", arg)
|
||||
}
|
||||
}
|
||||
|
||||
// Use default log level if not specified
|
||||
if logCallback == nil {
|
||||
logCallback = log.Errorf
|
||||
}
|
||||
|
||||
return logCallback, showFirst, nil
|
||||
}
|
||||
|
||||
@@ -65,6 +65,27 @@ func TestErrorsParse(t *testing.T) {
|
||||
consolidate 1m error1
|
||||
consolidate 5s error2
|
||||
}`, false, 2, false},
|
||||
{`errors {
|
||||
consolidate 1m error show_first
|
||||
}`, false, 1, false},
|
||||
{`errors {
|
||||
consolidate 1m error warning show_first
|
||||
}`, false, 1, false},
|
||||
{`errors {
|
||||
consolidate 1m error invalid_option
|
||||
}`, true, 0, false},
|
||||
{`errors {
|
||||
consolidate 1m error warning error
|
||||
}`, true, 0, false},
|
||||
{`errors {
|
||||
consolidate 1m error info debug
|
||||
}`, true, 0, false},
|
||||
{`errors {
|
||||
consolidate 1m error show_first warning
|
||||
}`, true, 0, false},
|
||||
{`errors {
|
||||
consolidate 1m error show_first info
|
||||
}`, true, 0, false},
|
||||
}
|
||||
for i, test := range tests {
|
||||
c := caddy.NewTestController("dns", test.inputErrorsRules)
|
||||
@@ -146,3 +167,86 @@ func TestProperLogCallbackIsSet(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestShowFirstOption(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
inputErrorsRules string
|
||||
wantShowFirst bool
|
||||
wantLogLevel string
|
||||
}{
|
||||
{
|
||||
name: "show_first without log level",
|
||||
inputErrorsRules: `errors {
|
||||
consolidate 1m .* show_first
|
||||
}`,
|
||||
wantShowFirst: true,
|
||||
wantLogLevel: "[ERROR]", // default
|
||||
},
|
||||
{
|
||||
name: "show_first with warning log level",
|
||||
inputErrorsRules: `errors {
|
||||
consolidate 1m .* warning show_first
|
||||
}`,
|
||||
wantShowFirst: true,
|
||||
wantLogLevel: "[WARNING]",
|
||||
},
|
||||
{
|
||||
name: "show_first with error log level",
|
||||
inputErrorsRules: `errors {
|
||||
consolidate 1m .* error show_first
|
||||
}`,
|
||||
wantShowFirst: true,
|
||||
wantLogLevel: "[ERROR]",
|
||||
},
|
||||
{
|
||||
name: "no show_first",
|
||||
inputErrorsRules: `errors {
|
||||
consolidate 1m .*
|
||||
}`,
|
||||
wantShowFirst: false,
|
||||
wantLogLevel: "[ERROR]",
|
||||
},
|
||||
{
|
||||
name: "no show_first with log level",
|
||||
inputErrorsRules: `errors {
|
||||
consolidate 1m .* info
|
||||
}`,
|
||||
wantShowFirst: false,
|
||||
wantLogLevel: "[INFO]",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
buf := bytes.Buffer{}
|
||||
golog.SetOutput(&buf)
|
||||
clog.D.Set()
|
||||
|
||||
c := caddy.NewTestController("dns", tt.inputErrorsRules)
|
||||
h, err := errorsParse(c)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Failed to parse: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(h.patterns) != 1 {
|
||||
t.Errorf("Expected 1 pattern, got %d", len(h.patterns))
|
||||
return
|
||||
}
|
||||
|
||||
if h.patterns[0].showFirst != tt.wantShowFirst {
|
||||
t.Errorf("Expected showFirst=%v, got %v", tt.wantShowFirst, h.patterns[0].showFirst)
|
||||
}
|
||||
|
||||
// Test log level
|
||||
l := h.patterns[0].logCallback
|
||||
l("test log")
|
||||
|
||||
if log := buf.String(); !strings.Contains(log, tt.wantLogLevel) {
|
||||
t.Errorf("Expected log level %q, but got %q", tt.wantLogLevel, log)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user