From 487680a9fe37f35c749e3a09f46c01bfd67e8066 Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Sat, 15 Nov 2025 21:21:15 -0800
Subject: [PATCH 01/13] feat: add click callback to parse sequence of regexes

---
 dandi/cli/base.py            | 60 ++++++++++++++++++++++++++
 dandi/cli/tests/test_base.py | 82 ++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)
 create mode 100644 dandi/cli/tests/test_base.py

diff --git a/dandi/cli/base.py b/dandi/cli/base.py
index 9d8bc6ace..a826c75ed 100644
--- a/dandi/cli/base.py
+++ b/dandi/cli/base.py
@@ -1,5 +1,7 @@
 from functools import wraps
 import os
+import re
+from typing import Optional
 
 import click
 
@@ -147,3 +149,61 @@ def wrapper(obj, *args, **kwargs):
 map_to_click_exceptions._do_map = not bool(  # type: ignore[attr-defined]
     os.environ.get("DANDI_DEVEL", None)
 )
+
+
+def _compile_regex(regex: str) -> re.Pattern:
+    """
+    Helper to compile a regex pattern expressed as an `str` into a `re.Pattern`
+
+    Parameters
+    ----------
+    regex : str
+        The regex pattern expressed as a string.
+
+    Returns
+    -------
+    re.Pattern
+        The compiled regex pattern.
+    """
+    try:
+        compiled_regex = re.compile(regex)
+    except re.error as e:
+        raise click.BadParameter(f"Invalid regex pattern {regex!r}: {e}") from e
+
+    return compiled_regex
+
+
+def parse_regexes(
+    _ctx: click.Context, _param: click.Parameter, value: Optional[str]
+) -> Optional[list[re.Pattern]]:
+    """
+    Callback to parse a string of comma-separated regex patterns
+
+    Parameters
+    ----------
+    _ctx : click.Context
+        The Click context (not used).
+
+    _param : click.Parameter
+        The Click parameter (not used).
+
+    value : str | None
+        The input string containing comma-separated regex patterns. It is assumed
+        that none of the patterns contain commas themselves.
+
+    Returns
+    -------
+    list[re.Pattern]
+        A list of compiled regex patterns.
+
+    Notes
+    -----
+        This callback is only suitable to parse patterns that do not contain commas.
+    """
+    if value is None:
+        # Handle the case where no value is provided
+        return None
+
+    regexes = set(value.split(","))
+
+    return [_compile_regex(regex) for regex in regexes]
diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py
new file mode 100644
index 000000000..5c5cec7b8
--- /dev/null
+++ b/dandi/cli/tests/test_base.py
@@ -0,0 +1,82 @@
+import re
+
+import click
+import pytest
+
+from dandi.cli.base import _compile_regex, parse_regexes
+
+DUMMY_CTX = click.Context(click.Command("dummy"))
+DUMMY_PARAM = click.Option(["--dummy"])
+
+
+class TestCompileRegex:
+    @pytest.mark.parametrize(
+        "pattern",
+        [
+            "abc",
+            "[a-z]+",
+            "^start$",
+            r"a\.b",
+        ],
+    )
+    def test_valid_patterns_return_pattern(self, pattern):
+        compiled = _compile_regex(pattern)
+        assert isinstance(compiled, re.Pattern)
+        assert compiled.pattern == pattern
+
+    @pytest.mark.parametrize("pattern", ["(", "[a-z", "\\"])
+    def test_invalid_patterns_raise_bad_parameter(self, pattern):
+        with pytest.raises(click.BadParameter) as exc_info:
+            _compile_regex(pattern)
+        msg = str(exc_info.value)
+        assert "Invalid regex pattern" in msg
+        assert repr(pattern) in msg
+
+
+class TestParseRegexes:
+    def test_none_returns_none(self):
+        assert parse_regexes(DUMMY_CTX, DUMMY_PARAM, None) is None
+
+    @pytest.mark.parametrize(
+        "value",
+        [
+            "abc",
+            "[a-z]+",
+            r"a\.b",
+            r"",
+        ],
+    )
+    def test_single_pattern(self, value):
+
+        result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
+        assert isinstance(result, list)
+        assert len(result) == 1
+
+        (compiled,) = result
+        assert isinstance(compiled, re.Pattern)
+        assert compiled.pattern == value
+
+    @pytest.mark.parametrize(
+        "value, expected_patterns_in_strs",
+        [
+            ("foo,,bar", ["foo", "", "bar"]),
+            ("^start$,end$", ["^start$", "end$"]),
+            (r"a\.b,c+d", [r"a\.b", r"c+d"]),
+            # duplicates should be collapsed by the internal set()
+            ("foo,foo,bar", ["foo", "bar"]),
+        ],
+    )
+    def test_multiple_patterns(self, value: str, expected_patterns_in_strs: list[str]):
+        result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
+        assert isinstance(result, list)
+
+        # Order is not guaranteed due to de-duplication via set
+        # So we just check that all expected patterns are present
+        assert {p.pattern for p in result} == set(expected_patterns_in_strs)
+
+    @pytest.mark.parametrize(
+        "value, bad_pattern", [("(", "("), ("foo,(", "("), ("good,[a-z", "[a-z")]
+    )
+    def test_invalid_pattern_raises_bad_parameter(self, value: str, bad_pattern: str):
+        with pytest.raises(click.BadParameter, match=re.escape(bad_pattern)):
+            parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)

From 40670eb5940836c4017646a213ffa5a186d28e5a Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Mon, 17 Nov 2025 13:48:30 -0800
Subject: [PATCH 02/13] feat: provide util func to filter validation result by
 ID with regex

---
 dandi/tests/test_utils.py | 148 ++++++++++++++++++++++++++++++++++++++
 dandi/utils.py            |  31 +++++++-
 2 files changed, 178 insertions(+), 1 deletion(-)

diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py
index 9f7eb8dfc..838a75db6 100644
--- a/dandi/tests/test_utils.py
+++ b/dandi/tests/test_utils.py
@@ -5,6 +5,7 @@
 import logging
 import os.path as op
 from pathlib import Path
+import re
 import time
 
 import pytest
@@ -20,6 +21,7 @@
     _get_instance,
     ensure_datetime,
     ensure_strtime,
+    filter_by_id_patterns,
     find_files,
     flatten,
     flattened,
@@ -33,6 +35,14 @@
     post_upload_size_check,
     under_paths,
 )
+from ..validate_types import (
+    Origin,
+    OriginType,
+    Scope,
+    Severity,
+    ValidationResult,
+    Validator,
+)
 
 
 def test_find_files() -> None:
@@ -589,3 +599,141 @@ def test_post_upload_size_check_erroring(
         logging.ERROR,
         f"Size of {p} was 42 at start of upload but is now 19 after upload",
     ) in caplog.record_tuples
+
+
+class TestFilterByIdPatterns:
+    """Test the _filter_by_id_patterns function."""
+
+    @pytest.fixture
+    def sample_validation_results(self) -> list[ValidationResult]:
+        """Create sample validation results for testing."""
+        origin_validation_nwbinspector = Origin(
+            type=OriginType.VALIDATION,
+            validator=Validator.nwbinspector,
+            validator_version="",
+        )
+        origin_validation_dandi = Origin(
+            type=OriginType.VALIDATION,
+            validator=Validator.dandi,
+            validator_version="",
+        )
+
+        return [
+            ValidationResult(
+                id="NWBI.check_data_orientation",
+                origin=origin_validation_nwbinspector,
+                scope=Scope.FILE,
+                message="Data may be in the wrong orientation.",
+                path=Path("sub-mouse001/sub-mouse001.nwb"),
+                severity=Severity.WARNING,
+            ),
+            ValidationResult(
+                id="NWBI.check_missing_unit",
+                origin=origin_validation_nwbinspector,
+                scope=Scope.FILE,
+                message="Missing text for attribute 'unit'.",
+                path=Path("sub-mouse002/sub-mouse002.nwb"),
+                severity=Severity.WARNING,
+            ),
+            ValidationResult(
+                id="DANDI.NO_DANDISET_FOUND",
+                origin=origin_validation_dandi,
+                scope=Scope.FILE,
+                message="No dandiset.yaml found.",
+                path=Path("sub-mouse003/sub-mouse003.nwb"),
+                severity=Severity.ERROR,
+            ),
+            ValidationResult(
+                id="DANDI.METADATA_MISSING",
+                origin=origin_validation_dandi,
+                scope=Scope.FILE,
+                message="Required metadata is missing.",
+                path=Path("sub-mouse004/sub-mouse004.nwb"),
+                severity=Severity.ERROR,
+            ),
+        ]
+
+    @pytest.mark.parametrize(
+        "pattern_strs,expected_ids",
+        [
+            # Single pattern matching one result
+            ([r"NWBI\.check_data_orientation"], ["NWBI.check_data_orientation"]),
+            ([r".NO_DANDISET_FOUND"], ["DANDI.NO_DANDISET_FOUND"]),
+            # Single pattern matching multiple results with prefix
+            (
+                [r"NWBI\.\S+"],
+                ["NWBI.check_data_orientation", "NWBI.check_missing_unit"],
+            ),
+            # Single pattern matching multiple results with different prefix
+            ([r"DANDI"], ["DANDI.NO_DANDISET_FOUND", "DANDI.METADATA_MISSING"]),
+            # Multiple patterns matching different results
+            (
+                [r"NWBI\.check_data_orientation", r"DANDI\.NO_DANDISET_FOUND"],
+                ["NWBI.check_data_orientation", "DANDI.NO_DANDISET_FOUND"],
+            ),
+            # Multiple patterns matching different subsets
+            (
+                [r"NWBI\.\S+", r"DANDI\.\S+"],
+                [
+                    "NWBI.check_data_orientation",
+                    "NWBI.check_missing_unit",
+                    "DANDI.NO_DANDISET_FOUND",
+                    "DANDI.METADATA_MISSING",
+                ],
+            ),
+            # Multiple patterns with some overlap
+            (
+                [r".*check.*", r"NWBI\..*"],
+                [
+                    "NWBI.check_data_orientation",
+                    "NWBI.check_missing_unit",
+                ],
+            ),
+            # Multiple patterns with complete overlap (should not duplicate)
+            (
+                [r"NWBI\.check_data_orientation", r"data_orientation"],
+                [
+                    "NWBI.check_data_orientation",
+                ],
+            ),
+            # Multiple patterns where one is subset of other
+            (
+                [r"\S+", r"NWBI\.\S+"],
+                [
+                    "NWBI.check_data_orientation",
+                    "NWBI.check_missing_unit",
+                    "DANDI.NO_DANDISET_FOUND",
+                    "DANDI.METADATA_MISSING",
+                ],
+            ),
+            # Pattern that matches nothing
+            ([r"NONEXISTENT_PATTERN"], []),
+            # Empty patterns list
+            ([], []),
+        ],
+    )
+    def test_pattern_filtering(
+        self,
+        sample_validation_results: list[ValidationResult],
+        pattern_strs: list[str],
+        expected_ids: list[str],
+    ) -> None:
+        """Test filtering with various pattern combinations and edge cases."""
+        patterns = [re.compile(p) for p in pattern_strs]
+        filtered = filter_by_id_patterns(sample_validation_results, patterns)
+        filtered_ids = [result.id for result in filtered]
+        assert filtered_ids == expected_ids
+
+    def test_empty_validation_results(self) -> None:
+        """Test filtering with an empty validation results list."""
+        patterns = [re.compile(r".*")]
+        filtered = filter_by_id_patterns([], patterns)
+        assert filtered == []
+
+    def test_preserves_order(
+        self, sample_validation_results: list[ValidationResult]
+    ) -> None:
+        """Test that filtering preserves the original order of results."""
+        patterns = [re.compile(r".*")]
+        filtered = filter_by_id_patterns(sample_validation_results, patterns)
+        assert filtered == sample_validation_results
diff --git a/dandi/utils.py b/dandi/utils.py
index 93a0e138c..9d34bec06 100644
--- a/dandi/utils.py
+++ b/dandi/utils.py
@@ -24,7 +24,7 @@
 from time import sleep
 import traceback
 import types
-from typing import IO, Any, List, Optional, Protocol, TypeVar, Union
+from typing import IO, TYPE_CHECKING, Any, List, Optional, Protocol, TypeVar, Union
 
 import dateutil.parser
 from multidict import MultiDict  # dependency of yarl
@@ -38,6 +38,9 @@
 from .consts import DandiInstance, known_instances, known_instances_rev
 from .exceptions import BadCliVersionError, CliVersionTooOldError
 
+if TYPE_CHECKING:
+    from .validate_types import ValidationResult
+
 AnyPath = Union[str, Path]
 
 
@@ -972,3 +975,29 @@ class StrEnum(str, Enum):
     @staticmethod
     def _generate_next_value_(name, _start, _count, _last_values):
         return name
+
+
+def filter_by_id_patterns(
+    validation_results: Iterable[ValidationResult], patterns: list[re.Pattern[str]]
+) -> list[ValidationResult]:
+    """
+    Filter validation results by matching their IDs against provided regex patterns.
+
+    Parameters
+    ----------
+    validation_results : Iterable[ValidationResult]
+        The iterable of validation results to filter.
+    patterns : list[re.Pattern[str]]
+        The list of regex patterns to match validation result IDs against.
+
+    Returns
+    -------
+    list[ValidationResult]
+        The filtered list of validation results whose IDs match any of the provided patterns.
+    """
+
+    filtered_results = []
+    for result in validation_results:
+        if any(re.search(pattern, result.id) for pattern in patterns):
+            filtered_results.append(result)
+    return filtered_results

From 2d9f2c6eeb9a6cbbb0427ab5ce42678195d167a6 Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Tue, 18 Nov 2025 13:21:23 -0800
Subject: [PATCH 03/13] feat: add --match option to `dandi validate` command

To filter validation results by ID using regex patterns
---
 dandi/cli/cmd_validate.py            |  31 ++++-
 dandi/cli/tests/test_cmd_validate.py | 170 +++++++++++++++++++++++++++
 docs/source/cmdline/validate.rst     |   7 ++
 3 files changed, 204 insertions(+), 4 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index b0b5012f5..24ad0c25f 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -4,13 +4,18 @@
 import logging
 import os
 import re
-from typing import cast
+from typing import Optional, cast
 import warnings
 
 import click
 
-from .base import devel_debug_option, devel_option, map_to_click_exceptions
-from ..utils import pluralize
+from .base import (
+    devel_debug_option,
+    devel_option,
+    map_to_click_exceptions,
+    parse_regexes,
+)
+from ..utils import filter_by_id_patterns, pluralize
 from ..validate import validate as validate_
 from ..validate_types import Severity, ValidationResult
 
@@ -80,6 +85,17 @@ def validate_bids(
     default="none",
 )
 @click.option("--ignore", metavar="REGEX", help="Regex matching error IDs to ignore")
+@click.option(
+    "--match",
+    metavar="REGEX,REGEX,...",
+    help=(
+        "Comma-separated regex patterns used to filter issues in validation results "
+        "by their ID. Only issues with an ID matching at least one of the given "
+        "patterns are included in the eventual result. "
+        "(No pattern should contain a comma.)"
+    ),
+    callback=parse_regexes,
+)
 @click.option(
     "--min-severity",
     help="Only display issues with severities above this level.",
@@ -92,6 +108,7 @@ def validate_bids(
 def validate(
     paths: tuple[str, ...],
     ignore: str | None,
+    match: Optional[list[re.Pattern]],
     grouping: str,
     min_severity: str,
     schema: str | None = None,
@@ -134,17 +151,23 @@ def validate(
         if i.severity is not None and i.severity.value >= min_severity_value
     ]
 
-    _process_issues(filtered_results, grouping, ignore)
+    _process_issues(filtered_results, grouping, ignore, match)
 
 
 def _process_issues(
     validator_result: Iterable[ValidationResult],
     grouping: str,
     ignore: str | None = None,
+    match: Optional[list[re.Pattern]] = None,
 ) -> None:
     issues = [i for i in validator_result if i.severity is not None]
     if ignore is not None:
         issues = [i for i in issues if not re.search(ignore, i.id)]
+
+    # Filter issues by ID patterns if provided
+    if match is not None:
+        issues = filter_by_id_patterns(issues, match)
+
     purviews = [i.purview for i in issues]
     if grouping == "none":
         display_errors(
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 6694ef3ec..035ab17b6 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -2,6 +2,7 @@
 
 from click.testing import CliRunner
 import pytest
+import pytest_mock
 
 from ..cmd_validate import _process_issues, validate
 from ...tests.xfail import mark_xfail_windows_python313_posixsubprocess
@@ -149,3 +150,172 @@ def test_validate_bids_error_grouping_notification(
     # Does it notify the user correctly?
     notification_substring = "Invalid value for '--grouping'"
     assert notification_substring in r.output
+
+
+class TestValidateMatchOption:
+    """Test the --match option for filtering validation results."""
+
+    @staticmethod
+    def _mock_validate(*paths, **kwargs):
+        """Mock validation function that returns controlled ValidationResult objects."""
+        origin = Origin(
+            type=OriginType.VALIDATION,
+            validator=Validator.dandi,
+            validator_version="test",
+        )
+
+        # Return a set of validation results with different IDs
+        results = [
+            ValidationResult(
+                id="BIDS.DATATYPE_MISMATCH",
+                origin=origin,
+                severity=Severity.ERROR,
+                scope=Scope.FILE,
+                message="Datatype mismatch error",
+                path=Path(paths[0]) / "file1.nii",
+            ),
+            ValidationResult(
+                id="BIDS.EXTENSION_MISMATCH",
+                origin=origin,
+                severity=Severity.ERROR,
+                scope=Scope.FILE,
+                message="Extension mismatch error",
+                path=Path(paths[0]) / "file2.jpg",
+            ),
+            ValidationResult(
+                id="DANDI.NO_DANDISET_FOUND",
+                origin=origin,
+                severity=Severity.ERROR,
+                scope=Scope.DANDISET,
+                message="No dandiset found",
+                path=Path(paths[0]),
+            ),
+            ValidationResult(
+                id="NWBI.check_data_orientation",
+                origin=origin,
+                severity=Severity.WARNING,
+                scope=Scope.FILE,
+                message="Data orientation warning",
+                path=Path(paths[0]) / "file3.nwb",
+            ),
+        ]
+        return iter(results)
+
+    @pytest.mark.parametrize(
+        "match_patterns,parsed_patterns,should_contain,should_not_contain",
+        [
+            # Single pattern matching specific validation ID
+            (
+                r"BIDS\.DATATYPE_MISMATCH",
+                [r"BIDS\.DATATYPE_MISMATCH"],
+                ["BIDS.DATATYPE_MISMATCH"],
+                [
+                    "BIDS.EXTENSION_MISMATCH",
+                    "DANDI.NO_DANDISET_FOUND",
+                    "NWBI.check_data_orientation",
+                ],
+            ),
+            # Single pattern matching by prefix
+            (
+                r"^BIDS\.",
+                [r"^BIDS\."],
+                ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"],
+                ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"],
+            ),
+            # Single pattern that matches nothing (should show "No errors found")
+            (
+                r"NONEXISTENT_ID",
+                [r"NONEXISTENT_ID"],
+                ["No errors found"],
+                ["BIDS", "DANDI", "NWBI"],
+            ),
+            # Multiple patterns separated by comma
+            (
+                r"BIDS\.DATATYPE_MISMATCH,BIDS\.EXTENSION_MISMATCH",
+                [r"BIDS\.DATATYPE_MISMATCH", r"BIDS\.EXTENSION_MISMATCH"],
+                ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"],
+                ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"],
+            ),
+            # Multiple patterns with wildcard
+            (
+                r"BIDS\.\S+,DANDI\.\S+",
+                [r"BIDS\.\S+", r"DANDI\.\S+"],
+                [
+                    "BIDS.DATATYPE_MISMATCH",
+                    "BIDS.EXTENSION_MISMATCH",
+                    "DANDI.NO_DANDISET_FOUND",
+                ],
+                ["NWBI"],
+            ),
+        ],
+    )
+    def test_match_patterns(
+        self,
+        tmp_path: Path,
+        match_patterns: str,
+        parsed_patterns: list[str],
+        should_contain: list[str],
+        should_not_contain: list[str],
+        monkeypatch: pytest.MonkeyPatch,
+        mocker: pytest_mock.MockerFixture,
+    ) -> None:
+        """Test --match option with single or multiple comma-separated patterns."""
+        from .. import cmd_validate
+
+        # Use to monitor what compiled patterns are passed by the CLI
+        process_issues_spy = mocker.spy(cmd_validate, "_process_issues")
+
+        monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate)
+
+        r = CliRunner().invoke(validate, [f"--match={match_patterns}", str(tmp_path)])
+
+        process_issues_spy.assert_called_once()
+        call_args = process_issues_spy.call_args
+
+        # Ensure the patterns are parsed and passed correctly
+        passed_patterns = call_args.kwargs.get(
+            "match", call_args.args[3] if len(call_args.args) > 3 else None
+        )
+        assert (
+            passed_patterns is not None
+        ), "No match patterns were passed to _process_issues"
+        # We don't really care about the order of the patterns
+        assert {p.pattern for p in passed_patterns} == set(parsed_patterns)
+
+        for text in should_contain:
+            assert text in r.output, f"Expected '{text}' in output but not found"
+
+        for text in should_not_contain:
+            assert text not in r.output, f"Expected '{text}' NOT in output but found"
+
+    def test_match_invalid_regex(self, tmp_path: Path) -> None:
+        """Test --match option with invalid regex pattern."""
+        # Invalid regex pattern with unmatched parenthesis
+        r = CliRunner(mix_stderr=False).invoke(
+            validate, [r"--match=(INVALID", str(tmp_path)], catch_exceptions=False
+        )
+
+        # Should fail with an error about invalid regex
+        assert r.exit_code != 0
+        assert "error" in r.stderr.lower() and "--match" in r.stderr
+
+    def test_match_with_ignore_combination(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """Test --match and --ignore options used together."""
+        from .. import cmd_validate
+
+        monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate)
+
+        # Then use both match and ignore
+        r = CliRunner().invoke(
+            validate,
+            [
+                r"--match=BIDS\.DATATYPE_MISMATCH",
+                r"--ignore=DATATYPE_MISMATCH",
+                str(tmp_path),
+            ],
+        )
+
+        assert "BIDS.DATATYPE_MISMATCH" not in r.output
+        assert "No errors found" in r.output
diff --git a/docs/source/cmdline/validate.rst b/docs/source/cmdline/validate.rst
index 111ccd441..1a86775e1 100644
--- a/docs/source/cmdline/validate.rst
+++ b/docs/source/cmdline/validate.rst
@@ -22,6 +22,13 @@ Options
     Ignore any validation errors & warnings whose ID matches the given regular
     expression
 
+.. option:: --match REGEX,REGEX,...
+
+    Comma-separated regex patterns used to filter issues in validation results by their
+    ID. Only issues with an ID matching at least one of the given patterns are included
+    in the eventual result. Note: The separator used to separate the patterns is a
+    comma (`,`), so no pattern should contain a comma.
+
 .. option:: --min-severity [HINT|WARNING|ERROR]
 
     Only display issues with severities above this level (HINT by default)

From 2e5b2bcdb6fa00b7df531ef444c2bdb0aaae1184 Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Tue, 18 Nov 2025 14:09:30 -0800
Subject: [PATCH 04/13] feat: change return type of `parse_regexes` callback

This allows the return type to reflect
the fact that the order of the elements
in the return doesn't matter.
---
 dandi/cli/base.py                    |  8 ++++----
 dandi/cli/cmd_validate.py            |  4 ++--
 dandi/cli/tests/test_base.py         | 18 ++++++++----------
 dandi/cli/tests/test_cmd_validate.py | 15 +++++++--------
 dandi/tests/test_utils.py            |  6 +++---
 dandi/utils.py                       |  6 +++---
 6 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/dandi/cli/base.py b/dandi/cli/base.py
index a826c75ed..298bfbbb2 100644
--- a/dandi/cli/base.py
+++ b/dandi/cli/base.py
@@ -175,7 +175,7 @@ def _compile_regex(regex: str) -> re.Pattern:
 
 def parse_regexes(
     _ctx: click.Context, _param: click.Parameter, value: Optional[str]
-) -> Optional[list[re.Pattern]]:
+) -> Optional[set[re.Pattern]]:
     """
     Callback to parse a string of comma-separated regex patterns
 
@@ -193,8 +193,8 @@ def parse_regexes(
 
     Returns
     -------
-    list[re.Pattern]
-        A list of compiled regex patterns.
+    set[re.Pattern]
+        A set of compiled regex patterns.
 
     Notes
     -----
@@ -206,4 +206,4 @@ def parse_regexes(
 
     regexes = set(value.split(","))
 
-    return [_compile_regex(regex) for regex in regexes]
+    return {_compile_regex(regex) for regex in regexes}
diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index 24ad0c25f..b73af4ed5 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -108,7 +108,7 @@ def validate_bids(
 def validate(
     paths: tuple[str, ...],
     ignore: str | None,
-    match: Optional[list[re.Pattern]],
+    match: Optional[set[re.Pattern]],
     grouping: str,
     min_severity: str,
     schema: str | None = None,
@@ -158,7 +158,7 @@ def _process_issues(
     validator_result: Iterable[ValidationResult],
     grouping: str,
     ignore: str | None = None,
-    match: Optional[list[re.Pattern]] = None,
+    match: Optional[set[re.Pattern]] = None,
 ) -> None:
     issues = [i for i in validator_result if i.severity is not None]
     if ignore is not None:
diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py
index 5c5cec7b8..c436893d9 100644
--- a/dandi/cli/tests/test_base.py
+++ b/dandi/cli/tests/test_base.py
@@ -49,7 +49,7 @@ def test_none_returns_none(self):
     def test_single_pattern(self, value):
 
         result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
-        assert isinstance(result, list)
+        assert isinstance(result, set)
         assert len(result) == 1
 
         (compiled,) = result
@@ -59,20 +59,18 @@ def test_single_pattern(self, value):
     @pytest.mark.parametrize(
         "value, expected_patterns_in_strs",
         [
-            ("foo,,bar", ["foo", "", "bar"]),
-            ("^start$,end$", ["^start$", "end$"]),
-            (r"a\.b,c+d", [r"a\.b", r"c+d"]),
+            ("foo,,bar", {"foo", "", "bar"}),
+            ("^start$,end$", {"^start$", "end$"}),
+            (r"a\.b,c+d", {r"a\.b", r"c+d"}),
             # duplicates should be collapsed by the internal set()
-            ("foo,foo,bar", ["foo", "bar"]),
+            ("foo,foo,bar", {"foo", "bar"}),
         ],
     )
-    def test_multiple_patterns(self, value: str, expected_patterns_in_strs: list[str]):
+    def test_multiple_patterns(self, value: str, expected_patterns_in_strs: set[str]):
         result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
-        assert isinstance(result, list)
+        assert isinstance(result, set)
 
-        # Order is not guaranteed due to de-duplication via set
-        # So we just check that all expected patterns are present
-        assert {p.pattern for p in result} == set(expected_patterns_in_strs)
+        assert {p.pattern for p in result} == expected_patterns_in_strs
 
     @pytest.mark.parametrize(
         "value, bad_pattern", [("(", "("), ("foo,(", "("), ("good,[a-z", "[a-z")]
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 035ab17b6..684b2751c 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -207,7 +207,7 @@ def _mock_validate(*paths, **kwargs):
             # Single pattern matching specific validation ID
             (
                 r"BIDS\.DATATYPE_MISMATCH",
-                [r"BIDS\.DATATYPE_MISMATCH"],
+                {r"BIDS\.DATATYPE_MISMATCH"},
                 ["BIDS.DATATYPE_MISMATCH"],
                 [
                     "BIDS.EXTENSION_MISMATCH",
@@ -218,28 +218,28 @@ def _mock_validate(*paths, **kwargs):
             # Single pattern matching by prefix
             (
                 r"^BIDS\.",
-                [r"^BIDS\."],
+                {r"^BIDS\."},
                 ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"],
                 ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"],
             ),
             # Single pattern that matches nothing (should show "No errors found")
             (
                 r"NONEXISTENT_ID",
-                [r"NONEXISTENT_ID"],
+                {r"NONEXISTENT_ID"},
                 ["No errors found"],
                 ["BIDS", "DANDI", "NWBI"],
             ),
             # Multiple patterns separated by comma
             (
                 r"BIDS\.DATATYPE_MISMATCH,BIDS\.EXTENSION_MISMATCH",
-                [r"BIDS\.DATATYPE_MISMATCH", r"BIDS\.EXTENSION_MISMATCH"],
+                {r"BIDS\.DATATYPE_MISMATCH", r"BIDS\.EXTENSION_MISMATCH"},
                 ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"],
                 ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"],
             ),
             # Multiple patterns with wildcard
             (
                 r"BIDS\.\S+,DANDI\.\S+",
-                [r"BIDS\.\S+", r"DANDI\.\S+"],
+                {r"BIDS\.\S+", r"DANDI\.\S+"},
                 [
                     "BIDS.DATATYPE_MISMATCH",
                     "BIDS.EXTENSION_MISMATCH",
@@ -253,7 +253,7 @@ def test_match_patterns(
         self,
         tmp_path: Path,
         match_patterns: str,
-        parsed_patterns: list[str],
+        parsed_patterns: set[str],
         should_contain: list[str],
         should_not_contain: list[str],
         monkeypatch: pytest.MonkeyPatch,
@@ -279,8 +279,7 @@ def test_match_patterns(
         assert (
             passed_patterns is not None
         ), "No match patterns were passed to _process_issues"
-        # We don't really care about the order of the patterns
-        assert {p.pattern for p in passed_patterns} == set(parsed_patterns)
+        assert {p.pattern for p in passed_patterns} == parsed_patterns
 
         for text in should_contain:
             assert text in r.output, f"Expected '{text}' in output but not found"
diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py
index 838a75db6..b264e3e4c 100644
--- a/dandi/tests/test_utils.py
+++ b/dandi/tests/test_utils.py
@@ -719,14 +719,14 @@ def test_pattern_filtering(
         expected_ids: list[str],
     ) -> None:
         """Test filtering with various pattern combinations and edge cases."""
-        patterns = [re.compile(p) for p in pattern_strs]
+        patterns = {re.compile(p) for p in pattern_strs}
         filtered = filter_by_id_patterns(sample_validation_results, patterns)
         filtered_ids = [result.id for result in filtered]
         assert filtered_ids == expected_ids
 
     def test_empty_validation_results(self) -> None:
         """Test filtering with an empty validation results list."""
-        patterns = [re.compile(r".*")]
+        patterns = {re.compile(r".*")}
         filtered = filter_by_id_patterns([], patterns)
         assert filtered == []
 
@@ -734,6 +734,6 @@ def test_preserves_order(
         self, sample_validation_results: list[ValidationResult]
     ) -> None:
         """Test that filtering preserves the original order of results."""
-        patterns = [re.compile(r".*")]
+        patterns = {re.compile(r".*")}
         filtered = filter_by_id_patterns(sample_validation_results, patterns)
         assert filtered == sample_validation_results
diff --git a/dandi/utils.py b/dandi/utils.py
index 9d34bec06..72156e924 100644
--- a/dandi/utils.py
+++ b/dandi/utils.py
@@ -978,7 +978,7 @@ def _generate_next_value_(name, _start, _count, _last_values):
 
 
 def filter_by_id_patterns(
-    validation_results: Iterable[ValidationResult], patterns: list[re.Pattern[str]]
+    validation_results: Iterable[ValidationResult], patterns: set[re.Pattern]
 ) -> list[ValidationResult]:
     """
     Filter validation results by matching their IDs against provided regex patterns.
@@ -987,8 +987,8 @@ def filter_by_id_patterns(
     ----------
     validation_results : Iterable[ValidationResult]
         The iterable of validation results to filter.
-    patterns : list[re.Pattern[str]]
-        The list of regex patterns to match validation result IDs against.
+    patterns : set[re.Pattern]
+        The set of regex patterns to match validation result IDs against.
 
     Returns
     -------

From a80987a9f94287d802f4d7a11324abb70ca497de Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Tue, 18 Nov 2025 15:36:19 -0800
Subject: [PATCH 05/13] test: refactor two tests in `TestParseRegexes` into one

---
 dandi/cli/tests/test_base.py | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py
index c436893d9..cd25ba737 100644
--- a/dandi/cli/tests/test_base.py
+++ b/dandi/cli/tests/test_base.py
@@ -37,28 +37,15 @@ class TestParseRegexes:
     def test_none_returns_none(self):
         assert parse_regexes(DUMMY_CTX, DUMMY_PARAM, None) is None
 
-    @pytest.mark.parametrize(
-        "value",
-        [
-            "abc",
-            "[a-z]+",
-            r"a\.b",
-            r"",
-        ],
-    )
-    def test_single_pattern(self, value):
-
-        result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
-        assert isinstance(result, set)
-        assert len(result) == 1
-
-        (compiled,) = result
-        assert isinstance(compiled, re.Pattern)
-        assert compiled.pattern == value
-
     @pytest.mark.parametrize(
         "value, expected_patterns_in_strs",
         [
+            # Single patterns
+            ("abc", {"abc"}),
+            ("[a-z]+", {"[a-z]+"}),
+            (r"a\.b", {r"a\.b"}),
+            (r"", {r""}),
+            # Multiple patterns
             ("foo,,bar", {"foo", "", "bar"}),
             ("^start$,end$", {"^start$", "end$"}),
             (r"a\.b,c+d", {r"a\.b", r"c+d"}),
@@ -66,7 +53,7 @@ def test_single_pattern(self, value):
             ("foo,foo,bar", {"foo", "bar"}),
         ],
     )
-    def test_multiple_patterns(self, value: str, expected_patterns_in_strs: set[str]):
+    def test_parse_patterns(self, value: str, expected_patterns_in_strs: set[str]):
         result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
         assert isinstance(result, set)
 

From 09c1489e1042aae2db06ac643265e3b35624b5ed Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Tue, 18 Nov 2025 15:44:06 -0800
Subject: [PATCH 06/13] fix: provide type annotation to fix mypy errs

---
 dandi/cli/tests/test_base.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py
index cd25ba737..9fa4971eb 100644
--- a/dandi/cli/tests/test_base.py
+++ b/dandi/cli/tests/test_base.py
@@ -53,7 +53,9 @@ def test_none_returns_none(self):
             ("foo,foo,bar", {"foo", "bar"}),
         ],
     )
-    def test_parse_patterns(self, value: str, expected_patterns_in_strs: set[str]):
+    def test_parse_patterns(
+        self, value: str, expected_patterns_in_strs: set[str]
+    ) -> None:
         result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
         assert isinstance(result, set)
 
@@ -62,6 +64,8 @@ def test_parse_patterns(self, value: str, expected_patterns_in_strs: set[str]):
     @pytest.mark.parametrize(
         "value, bad_pattern", [("(", "("), ("foo,(", "("), ("good,[a-z", "[a-z")]
     )
-    def test_invalid_pattern_raises_bad_parameter(self, value: str, bad_pattern: str):
+    def test_invalid_pattern_raises_bad_parameter(
+        self, value: str, bad_pattern: str
+    ) -> None:
         with pytest.raises(click.BadParameter, match=re.escape(bad_pattern)):
             parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)

From cdf9dea60e2aad6e8fff214402180d0a3f9da8ee Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Thu, 20 Nov 2025 13:45:07 -0800
Subject: [PATCH 07/13] feat: add helper func to filter validation results by
 associated paths

---
 dandi/utils.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/dandi/utils.py b/dandi/utils.py
index 72156e924..3b37e93f2 100644
--- a/dandi/utils.py
+++ b/dandi/utils.py
@@ -1001,3 +1001,50 @@ def filter_by_id_patterns(
         if any(re.search(pattern, result.id) for pattern in patterns):
             filtered_results.append(result)
     return filtered_results
+
+
+def filter_by_paths(
+    validation_results: Iterable[ValidationResult], paths: tuple[Path, ...]
+) -> list[ValidationResult]:
+    """
+    Filter validation results by matching their associated paths against provided paths.
+
+    Parameters
+    ----------
+    validation_results : Iterable[ValidationResult]
+        The iterable of validation results to filter.
+    paths : tuple[Path, ...]
+        The tuple of paths to match validation result paths against.
+
+    Returns
+    -------
+    list[ValidationResult]
+        The filtered list of validation results whose associated paths match the
+        provided paths. 'Matching' refers to cases where an associated path of a
+        validation result is the same path as, or falls under,
+        one of the provided paths.
+
+    Raises
+    ------
+    ValueError
+        If any of the provided paths doesn't exist in the filesystem.
+    """
+    for p in paths:
+        if not p.exists():
+            raise ValueError(f"Provided path {p} does not exist")
+
+    # Ensure all provided paths are resolved to their absolute forms
+    paths = tuple(p.resolve(strict=True) for p in paths)
+
+    filtered_results = []
+    for r in validation_results:
+        associated_paths = [
+            p.resolve(strict=True)
+            for p in (r.path, r.dataset_path)
+            if p is not None and p.exists()
+        ]
+        for assoc_path in associated_paths:
+            if any(assoc_path.is_relative_to(p) for p in paths):
+                filtered_results.append(r)
+                break
+    return filtered_results

From 0530591fee16181e90f09327b7b0f7a055ad3eac Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Thu, 20 Nov 2025 16:18:50 -0800
Subject: [PATCH 08/13] feat: add helper func to filter validation results by
 associated paths

---
 dandi/tests/test_utils.py | 236 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 236 insertions(+)

diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py
index b264e3e4c..3ba63cfa3 100644
--- a/dandi/tests/test_utils.py
+++ b/dandi/tests/test_utils.py
@@ -22,6 +22,7 @@
     ensure_datetime,
     ensure_strtime,
     filter_by_id_patterns,
+    filter_by_paths,
     find_files,
     flatten,
     flattened,
@@ -737,3 +738,238 @@ def test_preserves_order(
         patterns = {re.compile(r".*")}
         filtered = filter_by_id_patterns(sample_validation_results, patterns)
         assert filtered == sample_validation_results
+
+
+class TestFilterByPaths:
+    """Test the filter_by_paths function."""
+
+    @pytest.fixture
+    def sample_validation_results(self, tmp_path: Path) -> list[ValidationResult]:
+        """Create sample validation results with file paths for testing."""
+        # Create a directory structure for testing
+        (tmp_path / "sub-mouse001").mkdir()
+        (tmp_path / "sub-mouse002").mkdir()
+        (tmp_path / "sub-mouse003").mkdir()
+        (tmp_path / "data" / "nested").mkdir(parents=True)
+        (tmp_path / "dataset").mkdir()
+
+        # Create test files
+        file1 = tmp_path / "sub-mouse001" / "data.nwb"
+        file2 = tmp_path / "sub-mouse002" / "data.nwb"
+        file3 = tmp_path / "sub-mouse003" / "data.nwb"
+        file4 = tmp_path / "data" / "nested" / "experiment.nwb"
+        file5 = tmp_path / "file_with_both.nwb"
+        file1.touch()
+        file2.touch()
+        file3.touch()
+        file4.touch()
+        file5.touch()
+
+        origin_nwbinspector = Origin(
+            type=OriginType.VALIDATION,
+            validator=Validator.nwbinspector,
+            validator_version="",
+        )
+        origin_dandi = Origin(
+            type=OriginType.VALIDATION,
+            validator=Validator.dandi,
+            validator_version="",
+        )
+
+        return [
+            # Results with only 'path'
+            ValidationResult(
+                id="NWBI.check_1",
+                origin=origin_nwbinspector,
+                scope=Scope.FILE,
+                message="Issue 1",
+                path=file1,
+                severity=Severity.WARNING,
+            ),
+            ValidationResult(
+                id="NWBI.check_2",
+                origin=origin_nwbinspector,
+                scope=Scope.FILE,
+                message="Issue 2",
+                path=file2,
+                severity=Severity.WARNING,
+            ),
+            ValidationResult(
+                id="NWBI.check_3",
+                origin=origin_nwbinspector,
+                scope=Scope.FILE,
+                message="Issue 3",
+                path=file3,
+                severity=Severity.ERROR,
+            ),
+            ValidationResult(
+                id="NWBI.check_4",
+                origin=origin_nwbinspector,
+                scope=Scope.FILE,
+                message="Issue 4",
+                path=file4,
+                severity=Severity.WARNING,
+            ),
+            # Result with only 'dataset_path'
+            ValidationResult(
+                id="DANDI.dataset_only",
+                origin=origin_dandi,
+                scope=Scope.DANDISET,
+                message="Dataset issue",
+                dataset_path=tmp_path / "dataset",
+                severity=Severity.ERROR,
+            ),
+            # Result with both 'path' and 'dataset_path'
+            ValidationResult(
+                id="DANDI.both_paths",
+                origin=origin_dandi,
+                scope=Scope.FILE,
+                message="Issue with both paths",
+                path=file5,
+                dataset_path=tmp_path / "dataset",
+                severity=Severity.WARNING,
+            ),
+        ]
+
+    @pytest.mark.parametrize(
+        "path_specs,expected_ids",
+        [
+            # Single file path
+            (["sub-mouse001/data.nwb"], ["NWBI.check_1"]),
+            # Multiple file paths
+            (
+                ["sub-mouse001/data.nwb", "sub-mouse002/data.nwb"],
+                ["NWBI.check_1", "NWBI.check_2"],
+            ),
+            # Directory path matching all files under it
+            (["sub-mouse001"], ["NWBI.check_1"]),
+            # Parent directory path matching multiple subdirectories
+            (["data"], ["NWBI.check_4"]),
+            # Multiple directories
+            (
+                ["sub-mouse001", "sub-mouse003"],
+                ["NWBI.check_1", "NWBI.check_3"],
+            ),
+            # Nested directory
+            (["data/nested"], ["NWBI.check_4"]),
+            # All paths via parent directory (includes all results)
+            (
+                ["."],
+                [
+                    "NWBI.check_1",
+                    "NWBI.check_2",
+                    "NWBI.check_3",
+                    "NWBI.check_4",
+                    "DANDI.dataset_only",
+                    "DANDI.both_paths",
+                ],
+            ),
+            # Empty paths tuple
+            ([], []),
+            # No duplicates when result matches multiple filter paths (file + parent dir)
+            (["sub-mouse001/data.nwb", "sub-mouse001"], ["NWBI.check_1"]),
+            # Result with dataset_path
+            (["dataset"], ["DANDI.dataset_only", "DANDI.both_paths"]),
+            # Result with both path and dataset_path - filter by file path
+            (["file_with_both.nwb"], ["DANDI.both_paths"]),
+        ],
+    )
+    def test_path_filtering(
+        self,
+        sample_validation_results: list[ValidationResult],
+        tmp_path: Path,
+        path_specs: list[str],
+        expected_ids: list[str],
+    ) -> None:
+        """Test filtering with various path combinations and edge cases."""
+        paths = tuple(tmp_path / p for p in path_specs)
+        filtered = filter_by_paths(sample_validation_results, paths)
+        filtered_ids = [result.id for result in filtered]
+        assert filtered_ids == expected_ids
+
+    def test_nonexistent_path_raises_error(
+        self, sample_validation_results: list[ValidationResult], tmp_path: Path
+    ) -> None:
+        """Test that filtering with a non-existent path raises ValueError."""
+        nonexistent_path = tmp_path / "nonexistent"
+        with pytest.raises(ValueError, match="does not exist"):
+            filter_by_paths(sample_validation_results, (nonexistent_path,))
+
+    def test_empty_validation_results(self, tmp_path: Path) -> None:
+        """Test filtering with an empty validation results list."""
+        filtered = filter_by_paths([], (tmp_path,))
+        assert filtered == []
+
+    def test_preserves_order(
+        self, sample_validation_results: list[ValidationResult], tmp_path: Path
+    ) -> None:
+        """Test that filtering preserves the original order of results."""
+        filtered = filter_by_paths(sample_validation_results, (tmp_path,))
+        assert filtered == sample_validation_results
+
+    def test_result_with_nonexistent_path(self, tmp_path: Path) -> None:
+        """Test that results with non-existent paths are ignored during filtering."""
+        existing_file = tmp_path / "existing.nwb"
+        nonexistent_file = tmp_path / "nonexistent.nwb"
+        existing_file.touch()
+
+        origin = Origin(
+            type=OriginType.VALIDATION,
+            validator=Validator.nwbinspector,
+            validator_version="",
+        )
+
+        results = [
+            ValidationResult(
+                id="NWBI.check_1",
+                origin=origin,
+                scope=Scope.FILE,
+                message="Issue 1",
+                path=existing_file,
+                severity=Severity.WARNING,
+            ),
+            ValidationResult(
+                id="NWBI.check_2",
+                origin=origin,
+                scope=Scope.FILE,
+                message="Issue 2",
+                path=nonexistent_file,  # This file doesn't exist
+                severity=Severity.WARNING,
+            ),
+        ]
+
+        # Filter by parent directory - should only get the existing file's result
+        filtered = filter_by_paths(results, (tmp_path,))
+        assert len(filtered) == 1
+        assert filtered[0].id == "NWBI.check_1"
+
+    def test_relative_paths_resolved(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        """Test that relative paths are properly resolved to absolute paths."""
+        monkeypatch.chdir(tmp_path)
+
+        subdir = tmp_path / "subdir"
+        subdir.mkdir()
+        file1 = subdir / "data.nwb"
+        file1.touch()
+
+        origin = Origin(
+            type=OriginType.VALIDATION,
+            validator=Validator.nwbinspector,
+            validator_version="",
+        )
+
+        result = ValidationResult(
+            id="NWBI.check_1",
+            origin=origin,
+            scope=Scope.FILE,
+            message="Issue",
+            path=file1,
+            severity=Severity.WARNING,
+        )
+
+        # Use relative path
+        filtered = filter_by_paths([result], (Path("subdir"),))
+        assert len(filtered) == 1
+        assert filtered[0].id == "NWBI.check_1"

From 0f1b92440b34d4a8dad18404bf62440959b14874 Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Thu, 20 Nov 2025 16:41:40 -0800
Subject: [PATCH 09/13] feat: add `--include-path` option to `dandi validate`
 command

This option allows filtering of issues in the
validation results to only those associated with
the given path(s)
---
 dandi/cli/cmd_validate.py            |  20 +++-
 dandi/cli/tests/test_cmd_validate.py | 165 +++++++++++++++++++--------
 2 files changed, 135 insertions(+), 50 deletions(-)

diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
index b73af4ed5..dbebe96c9 100644
--- a/dandi/cli/cmd_validate.py
+++ b/dandi/cli/cmd_validate.py
@@ -3,6 +3,7 @@
 from collections.abc import Iterable
 import logging
 import os
+from pathlib import Path
 import re
 from typing import Optional, cast
 import warnings
@@ -15,7 +16,7 @@
     map_to_click_exceptions,
     parse_regexes,
 )
-from ..utils import filter_by_id_patterns, pluralize
+from ..utils import filter_by_id_patterns, filter_by_paths, pluralize
 from ..validate import validate as validate_
 from ..validate_types import Severity, ValidationResult
 
@@ -96,6 +97,15 @@ def validate_bids(
     ),
     callback=parse_regexes,
 )
+@click.option(
+    "--include-path",
+    multiple=True,
+    type=click.Path(exists=True, resolve_path=True, path_type=Path),
+    help=(
+        "Filter issues in the validation results to only those associated with the "
+        "given path(s). This option can be specified multiple times."
+    ),
+)
 @click.option(
     "--min-severity",
     help="Only display issues with severities above this level.",
@@ -109,6 +119,7 @@ def validate(
     paths: tuple[str, ...],
     ignore: str | None,
     match: Optional[set[re.Pattern]],
+    include_path: tuple[Path, ...],
     grouping: str,
     min_severity: str,
     schema: str | None = None,
@@ -151,7 +162,7 @@ def validate(
         if i.severity is not None and i.severity.value >= min_severity_value
     ]
 
-    _process_issues(filtered_results, grouping, ignore, match)
+    _process_issues(filtered_results, grouping, ignore, match, include_path)
 
 
 def _process_issues(
@@ -159,6 +170,7 @@ def _process_issues(
     grouping: str,
     ignore: str | None = None,
     match: Optional[set[re.Pattern]] = None,
+    include_path: tuple[Path, ...] = (),
 ) -> None:
     issues = [i for i in validator_result if i.severity is not None]
     if ignore is not None:
@@ -168,6 +180,10 @@ def _process_issues(
     if match is not None:
         issues = filter_by_id_patterns(issues, match)
 
+    # Filter issues by included paths if provided
+    if include_path:
+        issues = filter_by_paths(issues, include_path)
+
     purviews = [i.purview for i in issues]
     if grouping == "none":
         display_errors(
diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 684b2751c..5c4cd22f6 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -152,54 +152,54 @@ def test_validate_bids_error_grouping_notification(
     assert notification_substring in r.output
 
 
-class TestValidateMatchOption:
-    """Test the --match option for filtering validation results."""
+def _mock_validate(*paths, **kwargs):
+    """Mock validation function that returns controlled ValidationResult objects."""
+    origin = Origin(
+        type=OriginType.VALIDATION,
+        validator=Validator.dandi,
+        validator_version="test",
+    )
 
-    @staticmethod
-    def _mock_validate(*paths, **kwargs):
-        """Mock validation function that returns controlled ValidationResult objects."""
-        origin = Origin(
-            type=OriginType.VALIDATION,
-            validator=Validator.dandi,
-            validator_version="test",
-        )
+    # Return a set of validation results with different IDs
+    results = [
+        ValidationResult(
+            id="BIDS.DATATYPE_MISMATCH",
+            origin=origin,
+            severity=Severity.ERROR,
+            scope=Scope.FILE,
+            message="Datatype mismatch error",
+            path=Path(paths[0]) / "file1.nii",
+        ),
+        ValidationResult(
+            id="BIDS.EXTENSION_MISMATCH",
+            origin=origin,
+            severity=Severity.ERROR,
+            scope=Scope.FILE,
+            message="Extension mismatch error",
+            path=Path(paths[0]) / "file2.jpg",
+        ),
+        ValidationResult(
+            id="DANDI.NO_DANDISET_FOUND",
+            origin=origin,
+            severity=Severity.ERROR,
+            scope=Scope.DANDISET,
+            message="No dandiset found",
+            path=Path(paths[0]),
+        ),
+        ValidationResult(
+            id="NWBI.check_data_orientation",
+            origin=origin,
+            severity=Severity.WARNING,
+            scope=Scope.FILE,
+            message="Data orientation warning",
+            path=Path(paths[0]) / "file3.nwb",
+        ),
+    ]
+    return iter(results)
 
-        # Return a set of validation results with different IDs
-        results = [
-            ValidationResult(
-                id="BIDS.DATATYPE_MISMATCH",
-                origin=origin,
-                severity=Severity.ERROR,
-                scope=Scope.FILE,
-                message="Datatype mismatch error",
-                path=Path(paths[0]) / "file1.nii",
-            ),
-            ValidationResult(
-                id="BIDS.EXTENSION_MISMATCH",
-                origin=origin,
-                severity=Severity.ERROR,
-                scope=Scope.FILE,
-                message="Extension mismatch error",
-                path=Path(paths[0]) / "file2.jpg",
-            ),
-            ValidationResult(
-                id="DANDI.NO_DANDISET_FOUND",
-                origin=origin,
-                severity=Severity.ERROR,
-                scope=Scope.DANDISET,
-                message="No dandiset found",
-                path=Path(paths[0]),
-            ),
-            ValidationResult(
-                id="NWBI.check_data_orientation",
-                origin=origin,
-                severity=Severity.WARNING,
-                scope=Scope.FILE,
-                message="Data orientation warning",
-                path=Path(paths[0]) / "file3.nwb",
-            ),
-        ]
-        return iter(results)
+
+class TestValidateMatchOption:
+    """Test the --match option for filtering validation results."""
 
     @pytest.mark.parametrize(
         "match_patterns,parsed_patterns,should_contain,should_not_contain",
@@ -265,7 +265,7 @@ def test_match_patterns(
         # Use to monitor what compiled patterns are passed by the CLI
         process_issues_spy = mocker.spy(cmd_validate, "_process_issues")
 
-        monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate)
+        monkeypatch.setattr(cmd_validate, "validate_", _mock_validate)
 
         r = CliRunner().invoke(validate, [f"--match={match_patterns}", str(tmp_path)])
 
@@ -304,7 +304,7 @@ def test_match_with_ignore_combination(
         """Test --match and --ignore options used together."""
         from .. import cmd_validate
 
-        monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate)
+        monkeypatch.setattr(cmd_validate, "validate_", _mock_validate)
 
         # Then use both match and ignore
         r = CliRunner().invoke(
@@ -318,3 +318,72 @@ def test_match_with_ignore_combination(
 
         assert "BIDS.DATATYPE_MISMATCH" not in r.output
         assert "No errors found" in r.output
+
+
+class TestValidateIncludePathOption:
+    """Test the --include-path option for filtering validation results."""
+
+    def test_nonexistent_include_path(
+        self,
+        tmp_path: Path,
+    ) -> None:
+        """Test --include-path option with a non-existent path."""
+
+        non_existent_path = tmp_path / "nonexistent.nwb"
+
+        r = CliRunner().invoke(
+            validate,
+            [f"--include-path={non_existent_path}", str(tmp_path)],
+        )
+
+        # Should exit with an error about `--include-path`
+        assert r.exit_code != 0
+        assert "--include-path" in r.output
+
+    @pytest.mark.parametrize(
+        "include_paths",
+        [
+            ["path1.nwb"],
+            ["path1.nwb", "path2.nwb"],
+        ],
+    )
+    def test_validated_option_args(
+        self,
+        include_paths: list[str],
+        tmp_path: Path,
+        monkeypatch: pytest.MonkeyPatch,
+        mocker: pytest_mock.MockerFixture,
+    ) -> None:
+        """
+        Test that the validated arguments for --include-path correct
+        """
+        from .. import cmd_validate
+
+        # Use to monitor what compiled patterns are passed by the CLI
+        process_issues_spy = mocker.spy(cmd_validate, "_process_issues")
+
+        # We actually don't care about validation results here.
+        #   We are only mocking to avoid running actual validation logic.
+        monkeypatch.setattr(cmd_validate, "validate_", _mock_validate)
+
+        paths = [tmp_path / p for p in include_paths]
+        for p in paths:
+            p.touch()
+
+        cli_args = [f"--include-path={p}" for p in paths] + [str(tmp_path)]
+        CliRunner().invoke(
+            validate,
+            cli_args,
+        )
+
+        process_issues_spy.assert_called_once()
+        call_args = process_issues_spy.call_args
+
+        # Ensure the paths are parsed and passed correctly
+        passed_paths = call_args.kwargs.get(
+            "include_path",
+            call_args.args[4] if len(call_args.args) > 4 else None,
+        )
+        assert len(passed_paths) == len(include_paths)
+        assert all(isinstance(p, Path) for p in passed_paths)
+        assert all(p.is_absolute() for p in passed_paths)

From 5cbceb704017bd734dac38541be6f1002ca60d79 Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Thu, 20 Nov 2025 21:56:46 -0800
Subject: [PATCH 10/13] doc: document `--include-path` option in `dandi
 validate` command

---
 docs/source/cmdline/validate.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/source/cmdline/validate.rst b/docs/source/cmdline/validate.rst
index 1a86775e1..bfa68222e 100644
--- a/docs/source/cmdline/validate.rst
+++ b/docs/source/cmdline/validate.rst
@@ -29,6 +29,13 @@ Options
     in the eventual result. Note: The separator used to separate the patterns is a
     comma (`,`), so no pattern should contain a comma.
 
+.. option:: --include-path PATH
+
+    Filter issues in the validation results to only those associated with the
+    given path(s). A validation issue is associated with a path if its associated
+    path(s) are the same as or falls under the provided path. This option can be
+    specified multiple times to include multiple paths.
+
 .. option:: --min-severity [HINT|WARNING|ERROR]
 
     Only display issues with severities above this level (HINT by default)

From 05e0a5b5c88ff68378c3ca75e3423d881f9922cb Mon Sep 17 00:00:00 2001
From: Isaac To <candleindark@users.noreply.github.com>
Date: Thu, 20 Nov 2025 22:27:42 -0800
Subject: [PATCH 11/13] docs: correct typo in docs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 docs/source/cmdline/validate.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/cmdline/validate.rst b/docs/source/cmdline/validate.rst
index bfa68222e..372153f19 100644
--- a/docs/source/cmdline/validate.rst
+++ b/docs/source/cmdline/validate.rst
@@ -33,7 +33,7 @@ Options
 
     Filter issues in the validation results to only those associated with the
     given path(s). A validation issue is associated with a path if its associated
-    path(s) are the same as or falls under the provided path. This option can be
+    path(s) are the same as or fall under the provided path. This option can be
     specified multiple times to include multiple paths.
 
 .. option:: --min-severity [HINT|WARNING|ERROR]

From f0c6d86b71dd100b075e75c15a6096cfc155a928 Mon Sep 17 00:00:00 2001
From: Isaac To <candleindark@users.noreply.github.com>
Date: Thu, 20 Nov 2025 22:29:39 -0800
Subject: [PATCH 12/13] doc: correct typo in docstring

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 dandi/cli/tests/test_cmd_validate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py
index 5c4cd22f6..a380c37f7 100644
--- a/dandi/cli/tests/test_cmd_validate.py
+++ b/dandi/cli/tests/test_cmd_validate.py
@@ -355,7 +355,7 @@ def test_validated_option_args(
         mocker: pytest_mock.MockerFixture,
     ) -> None:
         """
-        Test that the validated arguments for --include-path correct
+        Test that the validated arguments for --include-path are correct
         """
         from .. import cmd_validate
 

From 9c67a464a6e0043d32cf29d4001b2311fdc9f274 Mon Sep 17 00:00:00 2001
From: Isaac To <candleindark@users.noreply.github.com>
Date: Thu, 20 Nov 2025 22:30:26 -0800
Subject: [PATCH 13/13] doc: correct typo in docstring

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 dandi/tests/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py
index 3ba63cfa3..66ca5304b 100644
--- a/dandi/tests/test_utils.py
+++ b/dandi/tests/test_utils.py
@@ -603,7 +603,7 @@ def test_post_upload_size_check_erroring(
 
 
 class TestFilterByIdPatterns:
-    """Test the _filter_by_id_patterns function."""
+    """Test the filter_by_id_patterns function."""
 
     @pytest.fixture
     def sample_validation_results(self) -> list[ValidationResult]: