Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,14 @@ linters:
path: pkg/acquisition/modules/victorialogs/internal/vlclient/vl_client.go
text: 'confusing-naming: Method ''QueryRange'' differs only by capitalization to method ''queryRange'' in the same source file'

# pkg/dnscache lookups are deliberately detached from the request
# context: their results are shared across requests (singleflight +
# cache), so one caller's cancellation must not abort the lookup for
# the others; a fixed timeout bounds them instead.
- linters:
- contextcheck
text: '(lookupPTR|lookupForward)` should pass the context parameter'

# tolerate complex functions in tests for now
- linters:
- maintidx
Expand Down
28 changes: 28 additions & 0 deletions cmd/crowdsec/crowdsec.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/crowdsecurity/crowdsec/pkg/apiclient"
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
"github.com/crowdsecurity/crowdsec/pkg/cwhub"
"github.com/crowdsecurity/crowdsec/pkg/dnscache"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
"github.com/crowdsecurity/crowdsec/pkg/leakybucket"
"github.com/crowdsecurity/crowdsec/pkg/metrics"
Expand All @@ -31,6 +32,8 @@ func initCrowdsec(ctx context.Context, cConfig *csconfig.Config, hub *cwhub.Hub,
return nil, nil, fmt.Errorf("while loading context: %w", err)
}

configureDNSCache(cConfig.Crowdsec.DNSCache)

err = exprhelpers.GeoIPInit(hub.GetDataDir())
if err != nil {
// GeoIP databases are not mandatory, do not make crowdsec fail if they are not present
Expand Down Expand Up @@ -70,6 +73,31 @@ func initCrowdsec(ctx context.Context, cConfig *csconfig.Config, hub *cwhub.Hub,
return csParsers, datasources, nil
}

func configureDNSCache(cfg *csconfig.DNSCacheCfg) {
if cfg == nil {
return
}

var (
ttl, negTTL time.Duration
size int
)

if cfg.TTL != nil {
ttl = *cfg.TTL
}

if cfg.NegativeTTL != nil {
negTTL = *cfg.NegativeTTL
}

if cfg.Size != nil {
size = *cfg.Size
}

dnscache.Configure(ttl, negTTL, size)
}

func startParserRoutines(ctx context.Context, g *errgroup.Group, cConfig *csconfig.Config, parsers *parser.Parsers, stageCollector *parser.StageParseCollector) {
for idx := range cConfig.Crowdsec.ParserRoutinesCount {
log.WithField("idx", idx).Info("Starting parser routine")
Expand Down
90 changes: 90 additions & 0 deletions pkg/acquisition/modules/appsec/appsec_hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"testing"

Expand All @@ -14,6 +16,7 @@ import (
"github.com/crowdsecurity/crowdsec/pkg/appsec"
"github.com/crowdsecurity/crowdsec/pkg/appsec/appsec_rule"
"github.com/crowdsecurity/crowdsec/pkg/appsec/challenge"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
"github.com/crowdsecurity/crowdsec/pkg/pipeline"
)

Expand Down Expand Up @@ -1804,3 +1807,90 @@ func mustParseURL(raw string) *url.URL {
}
return u
}

// TestAppsecLegitimateBotHooks exercises the IsLegitimateBot/SetLegitimateBot
// helpers through the full runner: a bot definition is loaded from a
// legit_bots directory (as AppsecConfig.Build does with the hub data dir),
// an in-band rule matches the request, and a pre_eval hook downgrades the
// remediation for verified bots only.
func TestAppsecLegitimateBotHooks(t *testing.T) {
require.NoError(t, exprhelpers.Init(nil))

datadir := t.TempDir()
require.NoError(t, os.Mkdir(filepath.Join(datadir, "legit_bots"), 0o755))
require.NoError(t, os.WriteFile(filepath.Join(datadir, "legit_bots", "bots.json"),
[]byte(`{"name":"testbot","user_agent":"testbot","ranges":["192.0.2.0/24"]}`), 0o644))
require.NoError(t, exprhelpers.LoadBotFilesFromDir(datadir))

banRule := appsec_rule.CustomRule{
Name: "rule1",
Zones: []string{"ARGS"},
Variables: []string{"foo"},
Match: appsec_rule.Match{Type: "regex", Value: "^toto"},
Transform: []string{"lowercase"},
}
bypassHook := appsec.Hook{
Filter: `IsLegitimateBot(req.RemoteAddr, req.UserAgent(), req.URL.Path)`,
Apply: []string{`SetRemediation("allow")`},
}
botRequest := func(remoteAddr string, ua string) appsec.ParsedRequest {
return appsec.ParsedRequest{
RemoteAddr: remoteAddr,
Method: "GET",
URI: "/crawl",
Args: url.Values{"foo": []string{"toto"}},
HTTPRequest: &http.Request{
Host: "example.com",
RemoteAddr: remoteAddr,
URL: mustParseURL("http://example.com/crawl"),
Header: http.Header{"User-Agent": []string{ua}},
},
}
}

tests := []appsecRuleTest{
{
name: "verified bot bypasses the ban",
expected_load_ok: true,
inband_rules: []appsec_rule.CustomRule{banRule},
pre_eval: []appsec.Hook{bypassHook},
// the IP is in the declared range; ip:port proves address normalization end to end
input_request: botRequest("192.0.2.10:34567", "Mozilla/5.0 (compatible; TestBot/1.0)"),
output_asserts: func(events []pipeline.Event, responses []appsec.AppsecTempResponse, appsecResponse appsec.BodyResponse, statusCode int) {
require.Len(t, responses, 1)
require.True(t, responses[0].InBandInterrupt, "the rule still matches, only the remediation changes")
require.Equal(t, appsec.AllowRemediation, responses[0].Action)
},
},
{
name: "spoofed UA from a foreign IP is still banned",
expected_load_ok: true,
inband_rules: []appsec_rule.CustomRule{banRule},
pre_eval: []appsec.Hook{bypassHook},
input_request: botRequest("203.0.113.9:34567", "Mozilla/5.0 (compatible; TestBot/1.0)"),
output_asserts: func(events []pipeline.Event, responses []appsec.AppsecTempResponse, appsecResponse appsec.BodyResponse, statusCode int) {
require.Len(t, responses, 1)
require.True(t, responses[0].InBandInterrupt)
require.Equal(t, appsec.BanRemediation, responses[0].Action)
},
},
{
name: "SetLegitimateBot escape hatch short-circuits the checks",
expected_load_ok: true,
inband_rules: []appsec_rule.CustomRule{banRule},
pre_eval: []appsec.Hook{
{Apply: []string{"SetLegitimateBot()"}},
bypassHook,
},
// neither the UA nor the IP matches any definition
input_request: botRequest("203.0.113.9:34567", "curl/8.0"),
output_asserts: func(events []pipeline.Event, responses []appsec.AppsecTempResponse, appsecResponse appsec.BodyResponse, statusCode int) {
require.Len(t, responses, 1)
require.True(t, responses[0].InBandInterrupt)
require.Equal(t, appsec.AllowRemediation, responses[0].Action)
},
},
}

runTests(t, tests)
}
24 changes: 24 additions & 0 deletions pkg/appsec/appsec.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,14 @@ type AppsecRequestState struct {
// the allowlist cookie itself, not by this flag.
ChallengeBypassed bool

// LegitimateBot is set by the SetLegitimateBot expr helper to tag the
// request as coming from a known good bot; once set, every later
// IsLegitimateBot call short-circuits to true without datafile or DNS
// checks. It is a verdict on the request identity, not on the response,
// so like HookVars it persists across in-band/out-of-band phases and is
// NOT cleared by ResetResponse.
LegitimateBot bool

// HooksHalted is flipped by terminal hook actions (currently
// RejectSubmission and the inline GrantChallengeCookie variant
// exposed in on_challenge_submit) to short-circuit later rules in
Expand Down Expand Up @@ -795,6 +803,20 @@ func (wc *AppsecConfig) Load(configName string, hub *cwhub.Hub) error {
return fmt.Errorf("no appsec-config found for %s", configName)
}

// setupLegitBots loads the known-good bot definitions from
// <datadir>/legit_bots/, before any hook calling IsLegitimateBot runs.
// (The DNS cache they rely on is configured from the main crowdsec config,
// see crowdsec_service.dns_cache.)
func (wc *AppsecConfig) setupLegitBots(hub *cwhub.Hub) {
// hub is nil or bare in tests that build a standalone config; there is
// no data dir to scan in that case.
if hub != nil && hub.GetDataDir() != "" {
if err := exprhelpers.LoadBotFilesFromDir(hub.GetDataDir()); err != nil {
wc.Logger.Errorf("unable to load legitimate bot files: %s", err)
}
}
}

func (wc *AppsecConfig) Build(ctx context.Context, hub *cwhub.Hub) (*AppsecRuntimeConfig, error) {
ret := &AppsecRuntimeConfig{Logger: wc.Logger.WithField("component", "appsec_runtime_config")}

Expand Down Expand Up @@ -841,6 +863,8 @@ func (wc *AppsecConfig) Build(ctx context.Context, hub *cwhub.Hub) (*AppsecRunti
Action: BodySizeActionDrop,
}

wc.setupLegitBots(hub)

wc.Logger.Tracef("Loading config %+v", wc)
// load rules
for _, rule := range wc.OutOfBandRules {
Expand Down
20 changes: 18 additions & 2 deletions pkg/appsec/waf_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,19 @@ import (

"github.com/crowdsecurity/crowdsec/pkg/appsec/challenge"
"github.com/crowdsecurity/crowdsec/pkg/appsec/cookie"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
"github.com/crowdsecurity/crowdsec/pkg/pipeline"
)

// isLegitimateBotHelper wraps exprhelpers.IsLegitimateBot with the
// per-request escape hatch: once SetLegitimateBot was called, the verdict
// is true without datafile or DNS checks.
func isLegitimateBotHelper(state *AppsecRequestState) func(string, string, string) bool {
return func(ip string, ua string, path string) bool {
return state.LegitimateBot || exprhelpers.IsLegitimateBot(ip, ua, path)
}
}

// parseLogVerbosity maps an optional expr-side verbosity argument
// ("minimal", "info", "verbose") to a FingerprintLogVerbosity. Empty /
// missing returns FingerprintLogInfo (the default tier). Unknown values
Expand Down Expand Up @@ -132,6 +142,8 @@ func GetPreEvalEnv(ctx context.Context, w *AppsecRuntimeConfig, state *AppsecReq
return w.ValidateRequestWithSchema(ctx, state, request, ref)
},
"DisableBodyInspection": func() error { return w.DisableBodyInspection(state) },
"IsLegitimateBot": isLegitimateBotHelper(state),
"SetLegitimateBot": func() error { state.LegitimateBot = true; return nil },
}
}

Expand All @@ -157,8 +169,10 @@ func GetPostEvalEnv(ctx context.Context, w *AppsecRuntimeConfig, state *AppsecRe
"DumpFingerprint": func(label string) string {
return DumpFingerprint(w.FingerprintDumpDir, label, state.Fingerprint, request)
},
"fingerprint": state.Fingerprint,
"hook_vars": state.HookVars,
"fingerprint": state.Fingerprint,
"hook_vars": state.HookVars,
"IsLegitimateBot": isLegitimateBotHelper(state),
"SetLegitimateBot": func() error { state.LegitimateBot = true; return nil },
}
}

Expand Down Expand Up @@ -292,5 +306,7 @@ func GetOnMatchEnv(w *AppsecRuntimeConfig, state *AppsecRequestState, request *P
"SetChallengeBody": func(body string) error { return w.SetChallengeBody(state, body) },
"SetChallengeCookie": func(cookie cookie.AppsecCookie) error { return w.SetChallengeCookie(state, cookie) },
"AppsecCookie": cookie.NewAppsecCookie,
"IsLegitimateBot": isLegitimateBotHelper(state),
"SetLegitimateBot": func() error { state.LegitimateBot = true; return nil },
}
}
33 changes: 33 additions & 0 deletions pkg/appsec/waf_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,39 @@ import (
"github.com/stretchr/testify/require"
)

// TestLegitimateBotHooksCompile guards the env maps: a hook referencing
// IsLegitimateBot/SetLegitimateBot must compile in every phase exposing them.
func TestLegitimateBotHooksCompile(t *testing.T) {
for _, stage := range []hookStage{hookPreEval, hookPostEval, hookOnMatch} {
h := &Hook{
Filter: `IsLegitimateBot(req.RemoteAddr, req.UserAgent(), req.URL.Path)`,
Apply: []string{`SetLegitimateBot()`},
}
require.NoError(t, h.Build(t.Context(), stage, nil), "stage %v", stage)
}
}

// TestSetLegitimateBotEscapeHatch verifies the per-request escape hatch:
// once SetLegitimateBot was called, IsLegitimateBot returns true without
// consulting datafiles (none are loaded here) or DNS.
func TestSetLegitimateBotEscapeHatch(t *testing.T) {
state := &AppsecRequestState{HookVars: map[string]string{}}
env := GetPreEvalEnv(t.Context(), &AppsecRuntimeConfig{}, state, &ParsedRequest{})

isLegit := env["IsLegitimateBot"].(func(string, string, string) bool)
setLegit := env["SetLegitimateBot"].(func() error)

assert.False(t, isLegit("1.2.3.4", "googlebot", "/"))

require.NoError(t, setLegit())
assert.True(t, isLegit("1.2.3.4", "googlebot", "/"))
assert.True(t, isLegit("garbage-ip", "", ""), "escape hatch bypasses all checks")

// the flag is per-request state: a fresh state starts clean
freshEnv := GetPreEvalEnv(t.Context(), &AppsecRuntimeConfig{}, &AppsecRequestState{HookVars: map[string]string{}}, &ParsedRequest{})
assert.False(t, freshEnv["IsLegitimateBot"].(func(string, string, string) bool)("1.2.3.4", "googlebot", "/"))
}

// TestParseChallengeCookieTTLArg covers the GrantChallengeCookie optional TTL
// argument parsing: no args / empty string yield a nil override (use runtime
// default), a parseable duration yields a positive pointer, and malformed /
Expand Down
9 changes: 9 additions & 0 deletions pkg/csconfig/crowdsec_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io/fs"
"os"
"path/filepath"
"time"

log "github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
Expand All @@ -26,11 +27,19 @@ type CrowdsecServiceCfg struct {
BucketStateFile string `yaml:"state_input_file,omitempty"` // if we need to unserialize buckets at start
BucketStateDumpDir string `yaml:"state_output_dir,omitempty"` // if we need to unserialize buckets on shutdown
BucketsGCEnabled bool `yaml:"-"` // we need to garbage collect buckets when in forensic mode
DNSCache *DNSCacheCfg `yaml:"dns_cache,omitempty"`

SimulationFilePath string `yaml:"-"`
ContextToSend map[string][]string `yaml:"-"`
}

// Cache config for DNS lookups (legit bots, rdns PO)
type DNSCacheCfg struct {
TTL *time.Duration `yaml:"ttl,omitempty"`
NegativeTTL *time.Duration `yaml:"negative_ttl,omitempty"`
Size *int `yaml:"size,omitempty"`
}

var ErrNoAcquisitionDefined = errors.New("no acquisition_path or acquisition_dir specified")

func (c *CrowdsecServiceCfg) CollectAcquisitionFiles() ([]string, error) {
Expand Down
29 changes: 29 additions & 0 deletions pkg/csconfig/crowdsec_service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package csconfig
import (
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"

"github.com/crowdsecurity/go-cs-lib/cstest"
)
Expand Down Expand Up @@ -195,3 +198,29 @@ func TestLoadCrowdsec(t *testing.T) {
})
}
}

func TestDNSCacheCfg(t *testing.T) {
yamlConfig := `
acquisition_path: ./testdata/acquis.yaml
dns_cache:
ttl: 2h
negative_ttl: 30s
size: 4096
`

cfg := CrowdsecServiceCfg{}
require.NoError(t, yaml.Unmarshal([]byte(yamlConfig), &cfg))

require.NotNil(t, cfg.DNSCache)
require.NotNil(t, cfg.DNSCache.TTL)
assert.Equal(t, 2*time.Hour, *cfg.DNSCache.TTL)
require.NotNil(t, cfg.DNSCache.NegativeTTL)
assert.Equal(t, 30*time.Second, *cfg.DNSCache.NegativeTTL)
require.NotNil(t, cfg.DNSCache.Size)
assert.Equal(t, 4096, *cfg.DNSCache.Size)

// the section is optional
bare := CrowdsecServiceCfg{}
require.NoError(t, yaml.Unmarshal([]byte("acquisition_path: ./testdata/acquis.yaml"), &bare))
assert.Nil(t, bare.DNSCache)
}
5 changes: 5 additions & 0 deletions pkg/cwhub/hub.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ type Hub struct {
}

// GetDataDir returns the data directory, where data sets are installed.
// Empty for a hub created without local configuration (e.g. zero-value Hub in tests).
func (h *Hub) GetDataDir() string {
if h.local == nil {
return ""
}

return h.local.InstallDataDir
}

Expand Down
Loading
Loading