plugin/errors: add show_first option to consolidate (#7702) (#7703)

Add optional show_first flag to consolidate directive that logs
the first error immediately and then consolidates subsequent errors.

When show_first is enabled:
- The first matching error is logged immediately with full details
  (rcode, domain, type, error message) using the configured log level
- Subsequent matching errors are consolidated during the period
- At period end:
  - If only one error occurred, no summary is printed (already logged)
  - If multiple errors occurred, summary shows the total count

Syntax:
  consolidate DURATION REGEXP [LEVEL] [show_first]

Example with 3 errors:
  [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout
  [WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s

Example with 1 error:
  [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout

Implementation details:
- Add showFirst bool to pattern struct
- Rename inc() to consolidateError(), return false for showFirst case
- Use function pointer in ServeDNS to unify log calls with proper level
- Simplify logPattern() with single condition (cnt > 1 || !showFirst)
- Refactor parseLogLevel() to parseOptionalParams() with map-based dispatch
- Validate parameter order: log level must come before show_first
- Update README.md with show_first documentation and examples
- Add comprehensive test cases for show_first functionality

Signed-off-by: cangming <cangming@cangming.app>
This commit is contained in:
cangming
2025-12-10 10:15:49 +08:00
committed by GitHub
parent 0233f3e7c6
commit cad961f75f
5 changed files with 293 additions and 31 deletions

View File

@@ -23,7 +23,7 @@ Extra knobs are available with an expanded syntax:
~~~
errors {
stacktrace
consolidate DURATION REGEXP [LEVEL]
consolidate DURATION REGEXP [LEVEL] [show_first]
}
~~~
@@ -35,6 +35,21 @@ log level, which is configurable by optional option **LEVEL**. Supported options
2 errors like '^read udp .* i/o timeout$' occurred in last 30s
~~~
If the optional `show_first` flag is specified, the first error will be logged immediately when it occurs, and then subsequent matching errors will be consolidated. When the consolidation period ends:
- If only one error occurred, no summary is printed (since it was already logged)
- If multiple errors occurred, a summary is printed showing the total count
Example with 3 errors:
~~~
[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout
[WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s
~~~
Example with 1 error:
~~~
[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout
~~~
Multiple `consolidate` options with different **DURATION** and **REGEXP** are allowed. In case if some error message corresponds to several defined regular expressions the message will be associated with the first appropriate **REGEXP**.
For better performance, it's recommended to use the `^` or `$` metacharacters in regular expression when filtering error messages by prefix or suffix, e.g. `^failed to .*`, or `.* timeout$`.
@@ -63,3 +78,16 @@ and errors with prefix "Failed to " as errors.
}
}
~~~
Use the *forward* plugin and consolidate timeout errors with `show_first` option to see both
the summary and the first occurrence of the error:
~~~ corefile
. {
forward . 8.8.8.8
errors {
consolidate 5m ".* i/o timeout$" warning show_first
consolidate 30s "^Failed to .+" error show_first
}
}
~~~

View File

@@ -23,6 +23,7 @@ type pattern struct {
period time.Duration
pattern *regexp.Regexp
logCallback func(format string, v ...any)
showFirst bool
}
func (p *pattern) timer() *time.Timer {
@@ -46,17 +47,24 @@ func newErrorHandler() *errorHandler {
func (h *errorHandler) logPattern(i int) {
cnt := atomic.SwapUint32(&h.patterns[i].count, 0)
if cnt > 0 {
if cnt == 0 {
return
}
if cnt > 1 || !h.patterns[i].showFirst {
h.patterns[i].logCallback("%d errors like '%s' occurred in last %s",
cnt, h.patterns[i].pattern.String(), h.patterns[i].period)
}
}
func (h *errorHandler) inc(i int) bool {
// consolidateError records an error occurrence for pattern i.
// Returns false when cnt == 1 and showFirst is configured, so the error
// will be printed by the caller using the pattern's logCallback.
func (h *errorHandler) consolidateError(i int) bool {
if atomic.LoadUint32(&h.stopFlag) > 0 {
return false
}
if atomic.AddUint32(&h.patterns[i].count, 1) == 1 {
cnt := atomic.AddUint32(&h.patterns[i].count, 1)
if cnt == 1 {
ind := i
t := time.AfterFunc(h.patterns[ind].period, func() {
h.logPattern(ind)
@@ -65,6 +73,9 @@ func (h *errorHandler) inc(i int) bool {
if atomic.LoadUint32(&h.stopFlag) > 0 && t.Stop() {
h.logPattern(ind)
}
// If showFirst is enabled, return false so the first error
// will be printed by the caller using the pattern's logCallback
return !h.patterns[i].showFirst
}
return true
}
@@ -85,16 +96,26 @@ func (h *errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dn
if err != nil {
strErr := err.Error()
state := request.Request{W: w, Req: r}
// Default to error logging
logFunc := log.Errorf
for i := range h.patterns {
if h.patterns[i].pattern.MatchString(strErr) {
if h.inc(i) {
if h.consolidateError(i) {
// Error is consolidated, no need to log
return rcode, err
}
// consolidateError returned false (showFirst case)
// Use the pattern's configured log level
logFunc = h.patterns[i].logCallback
break
}
}
state := request.Request{W: w, Req: r}
log.Errorf("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr)
// Log with the appropriate log level
logFunc("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr)
}
return rcode, err

View File

@@ -134,13 +134,13 @@ func TestInc(t *testing.T) {
}},
}
ret := h.inc(0)
ret := h.consolidateError(0)
if ret {
t.Error("Unexpected return value, expected false, actual true")
}
h.stopFlag = 0
ret = h.inc(0)
ret = h.consolidateError(0)
if !ret {
t.Error("Unexpected return value, expected true, actual false")
}
@@ -156,7 +156,7 @@ func TestInc(t *testing.T) {
t.Error("Unexpected 'timer', expected not nil")
}
ret = h.inc(0)
ret = h.consolidateError(0)
if !ret {
t.Error("Unexpected return value, expected true, actual false")
}
@@ -194,13 +194,14 @@ func TestStop(t *testing.T) {
}},
}
h.inc(0)
h.inc(0)
h.inc(0)
h.consolidateError(0)
h.consolidateError(0)
h.consolidateError(0)
expCnt := uint32(3)
actCnt := atomic.LoadUint32(&h.patterns[0].count)
if actCnt != expCnt {
t.Fatalf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt)
t.Errorf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt)
return
}
h.stop()
@@ -230,6 +231,94 @@ func TestStop(t *testing.T) {
}
}
func TestShowFirst(t *testing.T) {
tests := []struct {
name string
errorCount int
expectSummary string
shouldHaveSummary bool
}{
{
name: "multiple errors",
errorCount: 3,
expectSummary: "3 errors like '^error.*!$' occurred in last 2s",
shouldHaveSummary: true,
},
{
name: "single error",
errorCount: 1,
expectSummary: "",
shouldHaveSummary: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
buf := bytes.Buffer{}
clog.D.Set()
golog.SetOutput(&buf)
h := &errorHandler{
patterns: []*pattern{{
count: 0,
period: 2 * time.Second,
pattern: regexp.MustCompile("^error.*!$"),
logCallback: log.Errorf,
showFirst: true,
}},
}
// Add errors and verify return values
for i := range tt.errorCount {
ret := h.consolidateError(0)
if i == 0 {
// First call should return false (showFirst enabled)
if ret {
t.Errorf("First consolidateError call: expected false, got true")
}
// Simulate ServeDNS logging with pattern's logCallback
h.patterns[0].logCallback("2 example.org. A: error %d!", i+1)
} else {
// Subsequent calls should return true (consolidated)
if !ret {
t.Errorf("consolidateError call %d: expected true, got false", i+1)
}
}
}
// Check count
expCnt := uint32(tt.errorCount)
actCnt := atomic.LoadUint32(&h.patterns[0].count)
if actCnt != expCnt {
t.Errorf("Unexpected 'count', expected %d, actual %d", expCnt, actCnt)
return
}
// Check that first error was logged
output1 := buf.String()
if !strings.Contains(output1, "2 example.org. A: error 1!") {
t.Errorf("Expected first error to be logged, but got: %q", output1)
}
// Clear buffer and trigger log pattern
buf.Reset()
h.logPattern(0)
// Verify summary in logPattern output
output2 := buf.String()
if tt.shouldHaveSummary {
if !strings.Contains(output2, tt.expectSummary) {
t.Errorf("Expected summary %q not found in logPattern output: %q", tt.expectSummary, output2)
}
} else {
if strings.Contains(output2, "errors like") {
t.Errorf("Did not expect summary for single error, but got: %q", output2)
}
}
})
}
}
func genErrorHandler(rcode int, err error) plugin.Handler {
return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
return rcode, err

View File

@@ -71,7 +71,7 @@ func errorsParse(c *caddy.Controller) (*errorHandler, error) {
func parseConsolidate(c *caddy.Controller) (*pattern, error) {
args := c.RemainingArgs()
if len(args) < 2 || len(args) > 3 {
if len(args) < 2 || len(args) > 4 {
return nil, c.ArgErr()
}
p, err := time.ParseDuration(args[0])
@@ -82,28 +82,48 @@ func parseConsolidate(c *caddy.Controller) (*pattern, error) {
if err != nil {
return nil, c.Err(err.Error())
}
lc, err := parseLogLevel(c, args)
lc, showFirst, err := parseOptionalParams(c, args[2:])
if err != nil {
return nil, err
}
return &pattern{period: p, pattern: re, logCallback: lc}, nil
return &pattern{period: p, pattern: re, logCallback: lc, showFirst: showFirst}, nil
}
func parseLogLevel(c *caddy.Controller, args []string) (func(format string, v ...any), error) {
if len(args) != 3 {
return log.Errorf, nil
// parseOptionalParams parses optional parameters (log level and show_first flag).
// Order: log level (optional) must come before show_first (optional).
func parseOptionalParams(c *caddy.Controller, args []string) (func(format string, v ...any), bool, error) {
logLevels := map[string]func(format string, v ...any){
"warning": log.Warningf,
"error": log.Errorf,
"info": log.Infof,
"debug": log.Debugf,
}
switch args[2] {
case "warning":
return log.Warningf, nil
case "error":
return log.Errorf, nil
case "info":
return log.Infof, nil
case "debug":
return log.Debugf, nil
default:
return nil, c.Errf("unknown log level argument in consolidate: %s", args[2])
var logCallback func(format string, v ...any) // nil means not set yet
showFirst := false
for _, arg := range args {
if callback, isLogLevel := logLevels[arg]; isLogLevel {
if logCallback != nil {
return nil, false, c.Errf("multiple log levels specified in consolidate")
}
if showFirst {
return nil, false, c.Errf("log level must come before show_first in consolidate")
}
logCallback = callback
} else if arg == "show_first" {
showFirst = true
} else {
return nil, false, c.Errf("unknown option in consolidate: %s", arg)
}
}
// Use default log level if not specified
if logCallback == nil {
logCallback = log.Errorf
}
return logCallback, showFirst, nil
}

View File

@@ -65,6 +65,27 @@ func TestErrorsParse(t *testing.T) {
consolidate 1m error1
consolidate 5s error2
}`, false, 2, false},
{`errors {
consolidate 1m error show_first
}`, false, 1, false},
{`errors {
consolidate 1m error warning show_first
}`, false, 1, false},
{`errors {
consolidate 1m error invalid_option
}`, true, 0, false},
{`errors {
consolidate 1m error warning error
}`, true, 0, false},
{`errors {
consolidate 1m error info debug
}`, true, 0, false},
{`errors {
consolidate 1m error show_first warning
}`, true, 0, false},
{`errors {
consolidate 1m error show_first info
}`, true, 0, false},
}
for i, test := range tests {
c := caddy.NewTestController("dns", test.inputErrorsRules)
@@ -146,3 +167,86 @@ func TestProperLogCallbackIsSet(t *testing.T) {
})
}
}
func TestShowFirstOption(t *testing.T) {
tests := []struct {
name string
inputErrorsRules string
wantShowFirst bool
wantLogLevel string
}{
{
name: "show_first without log level",
inputErrorsRules: `errors {
consolidate 1m .* show_first
}`,
wantShowFirst: true,
wantLogLevel: "[ERROR]", // default
},
{
name: "show_first with warning log level",
inputErrorsRules: `errors {
consolidate 1m .* warning show_first
}`,
wantShowFirst: true,
wantLogLevel: "[WARNING]",
},
{
name: "show_first with error log level",
inputErrorsRules: `errors {
consolidate 1m .* error show_first
}`,
wantShowFirst: true,
wantLogLevel: "[ERROR]",
},
{
name: "no show_first",
inputErrorsRules: `errors {
consolidate 1m .*
}`,
wantShowFirst: false,
wantLogLevel: "[ERROR]",
},
{
name: "no show_first with log level",
inputErrorsRules: `errors {
consolidate 1m .* info
}`,
wantShowFirst: false,
wantLogLevel: "[INFO]",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
buf := bytes.Buffer{}
golog.SetOutput(&buf)
clog.D.Set()
c := caddy.NewTestController("dns", tt.inputErrorsRules)
h, err := errorsParse(c)
if err != nil {
t.Errorf("Failed to parse: %v", err)
return
}
if len(h.patterns) != 1 {
t.Errorf("Expected 1 pattern, got %d", len(h.patterns))
return
}
if h.patterns[0].showFirst != tt.wantShowFirst {
t.Errorf("Expected showFirst=%v, got %v", tt.wantShowFirst, h.patterns[0].showFirst)
}
// Test log level
l := h.patterns[0].logCallback
l("test log")
if log := buf.String(); !strings.Contains(log, tt.wantLogLevel) {
t.Errorf("Expected log level %q, but got %q", tt.wantLogLevel, log)
}
})
}
}