From 487680a9fe37f35c749e3a09f46c01bfd67e8066 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Sat, 15 Nov 2025 21:21:15 -0800 Subject: [PATCH 01/13] feat: add click callback to parse sequence of regexes --- dandi/cli/base.py | 60 ++++++++++++++++++++++++++ dandi/cli/tests/test_base.py | 82 ++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 dandi/cli/tests/test_base.py diff --git a/dandi/cli/base.py b/dandi/cli/base.py index 9d8bc6ace..a826c75ed 100644 --- a/dandi/cli/base.py +++ b/dandi/cli/base.py @@ -1,5 +1,7 @@ from functools import wraps import os +import re +from typing import Optional import click @@ -147,3 +149,61 @@ def wrapper(obj, *args, **kwargs): map_to_click_exceptions._do_map = not bool( # type: ignore[attr-defined] os.environ.get("DANDI_DEVEL", None) ) + + +def _compile_regex(regex: str) -> re.Pattern: + """ + Helper to compile a regex pattern expressed as an `str` into a `re.Pattern` + + Parameters + ---------- + regex : str + The regex pattern expressed as a string. + + Returns + ------- + re.Pattern + The compiled regex pattern. + """ + try: + compiled_regex = re.compile(regex) + except re.error as e: + raise click.BadParameter(f"Invalid regex pattern {regex!r}: {e}") from e + + return compiled_regex + + +def parse_regexes( + _ctx: click.Context, _param: click.Parameter, value: Optional[str] +) -> Optional[list[re.Pattern]]: + """ + Callback to parse a string of comma-separated regex patterns + + Parameters + ---------- + _ctx : click.Context + The Click context (not used). + + _param : click.Parameter + The Click parameter (not used). + + value : str | None + The input string containing comma-separated regex patterns. It is assumed + that none of the patterns contain commas themselves. + + Returns + ------- + list[re.Pattern] + A list of compiled regex patterns. + + Notes + ----- + This callback is only suitable to parse patterns that do not contain commas. + """ + if value is None: + # Handle the case where no value is provided + return None + + regexes = set(value.split(",")) + + return [_compile_regex(regex) for regex in regexes] diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py new file mode 100644 index 000000000..5c5cec7b8 --- /dev/null +++ b/dandi/cli/tests/test_base.py @@ -0,0 +1,82 @@ +import re + +import click +import pytest + +from dandi.cli.base import _compile_regex, parse_regexes + +DUMMY_CTX = click.Context(click.Command("dummy")) +DUMMY_PARAM = click.Option(["--dummy"]) + + +class TestCompileRegex: + @pytest.mark.parametrize( + "pattern", + [ + "abc", + "[a-z]+", + "^start$", + r"a\.b", + ], + ) + def test_valid_patterns_return_pattern(self, pattern): + compiled = _compile_regex(pattern) + assert isinstance(compiled, re.Pattern) + assert compiled.pattern == pattern + + @pytest.mark.parametrize("pattern", ["(", "[a-z", "\\"]) + def test_invalid_patterns_raise_bad_parameter(self, pattern): + with pytest.raises(click.BadParameter) as exc_info: + _compile_regex(pattern) + msg = str(exc_info.value) + assert "Invalid regex pattern" in msg + assert repr(pattern) in msg + + +class TestParseRegexes: + def test_none_returns_none(self): + assert parse_regexes(DUMMY_CTX, DUMMY_PARAM, None) is None + + @pytest.mark.parametrize( + "value", + [ + "abc", + "[a-z]+", + r"a\.b", + r"", + ], + ) + def test_single_pattern(self, value): + + result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) + assert isinstance(result, list) + assert len(result) == 1 + + (compiled,) = result + assert isinstance(compiled, re.Pattern) + assert compiled.pattern == value + + @pytest.mark.parametrize( + "value, expected_patterns_in_strs", + [ + ("foo,,bar", ["foo", "", "bar"]), + ("^start$,end$", ["^start$", "end$"]), + (r"a\.b,c+d", [r"a\.b", r"c+d"]), + # duplicates should be collapsed by the internal set() + ("foo,foo,bar", ["foo", "bar"]), + ], + ) + def test_multiple_patterns(self, value: str, expected_patterns_in_strs: list[str]): + result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) + assert isinstance(result, list) + + # Order is not guaranteed due to de-duplication via set + # So we just check that all expected patterns are present + assert {p.pattern for p in result} == set(expected_patterns_in_strs) + + @pytest.mark.parametrize( + "value, bad_pattern", [("(", "("), ("foo,(", "("), ("good,[a-z", "[a-z")] + ) + def test_invalid_pattern_raises_bad_parameter(self, value: str, bad_pattern: str): + with pytest.raises(click.BadParameter, match=re.escape(bad_pattern)): + parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) From 40670eb5940836c4017646a213ffa5a186d28e5a Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 17 Nov 2025 13:48:30 -0800 Subject: [PATCH 02/13] feat: provide util func to filter validation result by ID with regex --- dandi/tests/test_utils.py | 148 ++++++++++++++++++++++++++++++++++++++ dandi/utils.py | 31 +++++++- 2 files changed, 178 insertions(+), 1 deletion(-) diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py index 9f7eb8dfc..838a75db6 100644 --- a/dandi/tests/test_utils.py +++ b/dandi/tests/test_utils.py @@ -5,6 +5,7 @@ import logging import os.path as op from pathlib import Path +import re import time import pytest @@ -20,6 +21,7 @@ _get_instance, ensure_datetime, ensure_strtime, + filter_by_id_patterns, find_files, flatten, flattened, @@ -33,6 +35,14 @@ post_upload_size_check, under_paths, ) +from ..validate_types import ( + Origin, + OriginType, + Scope, + Severity, + ValidationResult, + Validator, +) def test_find_files() -> None: @@ -589,3 +599,141 @@ def test_post_upload_size_check_erroring( logging.ERROR, f"Size of {p} was 42 at start of upload but is now 19 after upload", ) in caplog.record_tuples + + +class TestFilterByIdPatterns: + """Test the _filter_by_id_patterns function.""" + + @pytest.fixture + def sample_validation_results(self) -> list[ValidationResult]: + """Create sample validation results for testing.""" + origin_validation_nwbinspector = Origin( + type=OriginType.VALIDATION, + validator=Validator.nwbinspector, + validator_version="", + ) + origin_validation_dandi = Origin( + type=OriginType.VALIDATION, + validator=Validator.dandi, + validator_version="", + ) + + return [ + ValidationResult( + id="NWBI.check_data_orientation", + origin=origin_validation_nwbinspector, + scope=Scope.FILE, + message="Data may be in the wrong orientation.", + path=Path("sub-mouse001/sub-mouse001.nwb"), + severity=Severity.WARNING, + ), + ValidationResult( + id="NWBI.check_missing_unit", + origin=origin_validation_nwbinspector, + scope=Scope.FILE, + message="Missing text for attribute 'unit'.", + path=Path("sub-mouse002/sub-mouse002.nwb"), + severity=Severity.WARNING, + ), + ValidationResult( + id="DANDI.NO_DANDISET_FOUND", + origin=origin_validation_dandi, + scope=Scope.FILE, + message="No dandiset.yaml found.", + path=Path("sub-mouse003/sub-mouse003.nwb"), + severity=Severity.ERROR, + ), + ValidationResult( + id="DANDI.METADATA_MISSING", + origin=origin_validation_dandi, + scope=Scope.FILE, + message="Required metadata is missing.", + path=Path("sub-mouse004/sub-mouse004.nwb"), + severity=Severity.ERROR, + ), + ] + + @pytest.mark.parametrize( + "pattern_strs,expected_ids", + [ + # Single pattern matching one result + ([r"NWBI\.check_data_orientation"], ["NWBI.check_data_orientation"]), + ([r".NO_DANDISET_FOUND"], ["DANDI.NO_DANDISET_FOUND"]), + # Single pattern matching multiple results with prefix + ( + [r"NWBI\.\S+"], + ["NWBI.check_data_orientation", "NWBI.check_missing_unit"], + ), + # Single pattern matching multiple results with different prefix + ([r"DANDI"], ["DANDI.NO_DANDISET_FOUND", "DANDI.METADATA_MISSING"]), + # Multiple patterns matching different results + ( + [r"NWBI\.check_data_orientation", r"DANDI\.NO_DANDISET_FOUND"], + ["NWBI.check_data_orientation", "DANDI.NO_DANDISET_FOUND"], + ), + # Multiple patterns matching different subsets + ( + [r"NWBI\.\S+", r"DANDI\.\S+"], + [ + "NWBI.check_data_orientation", + "NWBI.check_missing_unit", + "DANDI.NO_DANDISET_FOUND", + "DANDI.METADATA_MISSING", + ], + ), + # Multiple patterns with some overlap + ( + [r".*check.*", r"NWBI\..*"], + [ + "NWBI.check_data_orientation", + "NWBI.check_missing_unit", + ], + ), + # Multiple patterns with complete overlap (should not duplicate) + ( + [r"NWBI\.check_data_orientation", r"data_orientation"], + [ + "NWBI.check_data_orientation", + ], + ), + # Multiple patterns where one is subset of other + ( + [r"\S+", r"NWBI\.\S+"], + [ + "NWBI.check_data_orientation", + "NWBI.check_missing_unit", + "DANDI.NO_DANDISET_FOUND", + "DANDI.METADATA_MISSING", + ], + ), + # Pattern that matches nothing + ([r"NONEXISTENT_PATTERN"], []), + # Empty patterns list + ([], []), + ], + ) + def test_pattern_filtering( + self, + sample_validation_results: list[ValidationResult], + pattern_strs: list[str], + expected_ids: list[str], + ) -> None: + """Test filtering with various pattern combinations and edge cases.""" + patterns = [re.compile(p) for p in pattern_strs] + filtered = filter_by_id_patterns(sample_validation_results, patterns) + filtered_ids = [result.id for result in filtered] + assert filtered_ids == expected_ids + + def test_empty_validation_results(self) -> None: + """Test filtering with an empty validation results list.""" + patterns = [re.compile(r".*")] + filtered = filter_by_id_patterns([], patterns) + assert filtered == [] + + def test_preserves_order( + self, sample_validation_results: list[ValidationResult] + ) -> None: + """Test that filtering preserves the original order of results.""" + patterns = [re.compile(r".*")] + filtered = filter_by_id_patterns(sample_validation_results, patterns) + assert filtered == sample_validation_results diff --git a/dandi/utils.py b/dandi/utils.py index 93a0e138c..9d34bec06 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -24,7 +24,7 @@ from time import sleep import traceback import types -from typing import IO, Any, List, Optional, Protocol, TypeVar, Union +from typing import IO, TYPE_CHECKING, Any, List, Optional, Protocol, TypeVar, Union import dateutil.parser from multidict import MultiDict # dependency of yarl @@ -38,6 +38,9 @@ from .consts import DandiInstance, known_instances, known_instances_rev from .exceptions import BadCliVersionError, CliVersionTooOldError +if TYPE_CHECKING: + from .validate_types import ValidationResult + AnyPath = Union[str, Path] @@ -972,3 +975,29 @@ class StrEnum(str, Enum): @staticmethod def _generate_next_value_(name, _start, _count, _last_values): return name + + +def filter_by_id_patterns( + validation_results: Iterable[ValidationResult], patterns: list[re.Pattern[str]] +) -> list[ValidationResult]: + """ + Filter validation results by matching their IDs against provided regex patterns. + + Parameters + ---------- + validation_results : Iterable[ValidationResult] + The iterable of validation results to filter. + patterns : list[re.Pattern[str]] + The list of regex patterns to match validation result IDs against. + + Returns + ------- + list[ValidationResult] + The filtered list of validation results whose IDs match any of the provided patterns. + """ + + filtered_results = [] + for result in validation_results: + if any(re.search(pattern, result.id) for pattern in patterns): + filtered_results.append(result) + return filtered_results From 2d9f2c6eeb9a6cbbb0427ab5ce42678195d167a6 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 18 Nov 2025 13:21:23 -0800 Subject: [PATCH 03/13] feat: add --match option to `dandi validate` command To filter validation results by ID using regex patterns --- dandi/cli/cmd_validate.py | 31 ++++- dandi/cli/tests/test_cmd_validate.py | 170 +++++++++++++++++++++++++++ docs/source/cmdline/validate.rst | 7 ++ 3 files changed, 204 insertions(+), 4 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index b0b5012f5..24ad0c25f 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -4,13 +4,18 @@ import logging import os import re -from typing import cast +from typing import Optional, cast import warnings import click -from .base import devel_debug_option, devel_option, map_to_click_exceptions -from ..utils import pluralize +from .base import ( + devel_debug_option, + devel_option, + map_to_click_exceptions, + parse_regexes, +) +from ..utils import filter_by_id_patterns, pluralize from ..validate import validate as validate_ from ..validate_types import Severity, ValidationResult @@ -80,6 +85,17 @@ def validate_bids( default="none", ) @click.option("--ignore", metavar="REGEX", help="Regex matching error IDs to ignore") +@click.option( + "--match", + metavar="REGEX,REGEX,...", + help=( + "Comma-separated regex patterns used to filter issues in validation results " + "by their ID. Only issues with an ID matching at least one of the given " + "patterns are included in the eventual result. " + "(No pattern should contain a comma.)" + ), + callback=parse_regexes, +) @click.option( "--min-severity", help="Only display issues with severities above this level.", @@ -92,6 +108,7 @@ def validate_bids( def validate( paths: tuple[str, ...], ignore: str | None, + match: Optional[list[re.Pattern]], grouping: str, min_severity: str, schema: str | None = None, @@ -134,17 +151,23 @@ def validate( if i.severity is not None and i.severity.value >= min_severity_value ] - _process_issues(filtered_results, grouping, ignore) + _process_issues(filtered_results, grouping, ignore, match) def _process_issues( validator_result: Iterable[ValidationResult], grouping: str, ignore: str | None = None, + match: Optional[list[re.Pattern]] = None, ) -> None: issues = [i for i in validator_result if i.severity is not None] if ignore is not None: issues = [i for i in issues if not re.search(ignore, i.id)] + + # Filter issues by ID patterns if provided + if match is not None: + issues = filter_by_id_patterns(issues, match) + purviews = [i.purview for i in issues] if grouping == "none": display_errors( diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 6694ef3ec..035ab17b6 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -2,6 +2,7 @@ from click.testing import CliRunner import pytest +import pytest_mock from ..cmd_validate import _process_issues, validate from ...tests.xfail import mark_xfail_windows_python313_posixsubprocess @@ -149,3 +150,172 @@ def test_validate_bids_error_grouping_notification( # Does it notify the user correctly? notification_substring = "Invalid value for '--grouping'" assert notification_substring in r.output + + +class TestValidateMatchOption: + """Test the --match option for filtering validation results.""" + + @staticmethod + def _mock_validate(*paths, **kwargs): + """Mock validation function that returns controlled ValidationResult objects.""" + origin = Origin( + type=OriginType.VALIDATION, + validator=Validator.dandi, + validator_version="test", + ) + + # Return a set of validation results with different IDs + results = [ + ValidationResult( + id="BIDS.DATATYPE_MISMATCH", + origin=origin, + severity=Severity.ERROR, + scope=Scope.FILE, + message="Datatype mismatch error", + path=Path(paths[0]) / "file1.nii", + ), + ValidationResult( + id="BIDS.EXTENSION_MISMATCH", + origin=origin, + severity=Severity.ERROR, + scope=Scope.FILE, + message="Extension mismatch error", + path=Path(paths[0]) / "file2.jpg", + ), + ValidationResult( + id="DANDI.NO_DANDISET_FOUND", + origin=origin, + severity=Severity.ERROR, + scope=Scope.DANDISET, + message="No dandiset found", + path=Path(paths[0]), + ), + ValidationResult( + id="NWBI.check_data_orientation", + origin=origin, + severity=Severity.WARNING, + scope=Scope.FILE, + message="Data orientation warning", + path=Path(paths[0]) / "file3.nwb", + ), + ] + return iter(results) + + @pytest.mark.parametrize( + "match_patterns,parsed_patterns,should_contain,should_not_contain", + [ + # Single pattern matching specific validation ID + ( + r"BIDS\.DATATYPE_MISMATCH", + [r"BIDS\.DATATYPE_MISMATCH"], + ["BIDS.DATATYPE_MISMATCH"], + [ + "BIDS.EXTENSION_MISMATCH", + "DANDI.NO_DANDISET_FOUND", + "NWBI.check_data_orientation", + ], + ), + # Single pattern matching by prefix + ( + r"^BIDS\.", + [r"^BIDS\."], + ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"], + ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"], + ), + # Single pattern that matches nothing (should show "No errors found") + ( + r"NONEXISTENT_ID", + [r"NONEXISTENT_ID"], + ["No errors found"], + ["BIDS", "DANDI", "NWBI"], + ), + # Multiple patterns separated by comma + ( + r"BIDS\.DATATYPE_MISMATCH,BIDS\.EXTENSION_MISMATCH", + [r"BIDS\.DATATYPE_MISMATCH", r"BIDS\.EXTENSION_MISMATCH"], + ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"], + ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"], + ), + # Multiple patterns with wildcard + ( + r"BIDS\.\S+,DANDI\.\S+", + [r"BIDS\.\S+", r"DANDI\.\S+"], + [ + "BIDS.DATATYPE_MISMATCH", + "BIDS.EXTENSION_MISMATCH", + "DANDI.NO_DANDISET_FOUND", + ], + ["NWBI"], + ), + ], + ) + def test_match_patterns( + self, + tmp_path: Path, + match_patterns: str, + parsed_patterns: list[str], + should_contain: list[str], + should_not_contain: list[str], + monkeypatch: pytest.MonkeyPatch, + mocker: pytest_mock.MockerFixture, + ) -> None: + """Test --match option with single or multiple comma-separated patterns.""" + from .. import cmd_validate + + # Use to monitor what compiled patterns are passed by the CLI + process_issues_spy = mocker.spy(cmd_validate, "_process_issues") + + monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate) + + r = CliRunner().invoke(validate, [f"--match={match_patterns}", str(tmp_path)]) + + process_issues_spy.assert_called_once() + call_args = process_issues_spy.call_args + + # Ensure the patterns are parsed and passed correctly + passed_patterns = call_args.kwargs.get( + "match", call_args.args[3] if len(call_args.args) > 3 else None + ) + assert ( + passed_patterns is not None + ), "No match patterns were passed to _process_issues" + # We don't really care about the order of the patterns + assert {p.pattern for p in passed_patterns} == set(parsed_patterns) + + for text in should_contain: + assert text in r.output, f"Expected '{text}' in output but not found" + + for text in should_not_contain: + assert text not in r.output, f"Expected '{text}' NOT in output but found" + + def test_match_invalid_regex(self, tmp_path: Path) -> None: + """Test --match option with invalid regex pattern.""" + # Invalid regex pattern with unmatched parenthesis + r = CliRunner(mix_stderr=False).invoke( + validate, [r"--match=(INVALID", str(tmp_path)], catch_exceptions=False + ) + + # Should fail with an error about invalid regex + assert r.exit_code != 0 + assert "error" in r.stderr.lower() and "--match" in r.stderr + + def test_match_with_ignore_combination( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Test --match and --ignore options used together.""" + from .. import cmd_validate + + monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate) + + # Then use both match and ignore + r = CliRunner().invoke( + validate, + [ + r"--match=BIDS\.DATATYPE_MISMATCH", + r"--ignore=DATATYPE_MISMATCH", + str(tmp_path), + ], + ) + + assert "BIDS.DATATYPE_MISMATCH" not in r.output + assert "No errors found" in r.output diff --git a/docs/source/cmdline/validate.rst b/docs/source/cmdline/validate.rst index 111ccd441..1a86775e1 100644 --- a/docs/source/cmdline/validate.rst +++ b/docs/source/cmdline/validate.rst @@ -22,6 +22,13 @@ Options Ignore any validation errors & warnings whose ID matches the given regular expression +.. option:: --match REGEX,REGEX,... + + Comma-separated regex patterns used to filter issues in validation results by their + ID. Only issues with an ID matching at least one of the given patterns are included + in the eventual result. Note: The separator used to separate the patterns is a + comma (`,`), so no pattern should contain a comma. + .. option:: --min-severity [HINT|WARNING|ERROR] Only display issues with severities above this level (HINT by default) From 2e5b2bcdb6fa00b7df531ef444c2bdb0aaae1184 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 18 Nov 2025 14:09:30 -0800 Subject: [PATCH 04/13] feat: change return type of `parse_regexes` callback This allows the return type to reflect the fact that the order of the elements in the return doesn't matter. --- dandi/cli/base.py | 8 ++++---- dandi/cli/cmd_validate.py | 4 ++-- dandi/cli/tests/test_base.py | 18 ++++++++---------- dandi/cli/tests/test_cmd_validate.py | 15 +++++++-------- dandi/tests/test_utils.py | 6 +++--- dandi/utils.py | 6 +++--- 6 files changed, 27 insertions(+), 30 deletions(-) diff --git a/dandi/cli/base.py b/dandi/cli/base.py index a826c75ed..298bfbbb2 100644 --- a/dandi/cli/base.py +++ b/dandi/cli/base.py @@ -175,7 +175,7 @@ def _compile_regex(regex: str) -> re.Pattern: def parse_regexes( _ctx: click.Context, _param: click.Parameter, value: Optional[str] -) -> Optional[list[re.Pattern]]: +) -> Optional[set[re.Pattern]]: """ Callback to parse a string of comma-separated regex patterns @@ -193,8 +193,8 @@ def parse_regexes( Returns ------- - list[re.Pattern] - A list of compiled regex patterns. + set[re.Pattern] + A set of compiled regex patterns. Notes ----- @@ -206,4 +206,4 @@ def parse_regexes( regexes = set(value.split(",")) - return [_compile_regex(regex) for regex in regexes] + return {_compile_regex(regex) for regex in regexes} diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 24ad0c25f..b73af4ed5 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -108,7 +108,7 @@ def validate_bids( def validate( paths: tuple[str, ...], ignore: str | None, - match: Optional[list[re.Pattern]], + match: Optional[set[re.Pattern]], grouping: str, min_severity: str, schema: str | None = None, @@ -158,7 +158,7 @@ def _process_issues( validator_result: Iterable[ValidationResult], grouping: str, ignore: str | None = None, - match: Optional[list[re.Pattern]] = None, + match: Optional[set[re.Pattern]] = None, ) -> None: issues = [i for i in validator_result if i.severity is not None] if ignore is not None: diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py index 5c5cec7b8..c436893d9 100644 --- a/dandi/cli/tests/test_base.py +++ b/dandi/cli/tests/test_base.py @@ -49,7 +49,7 @@ def test_none_returns_none(self): def test_single_pattern(self, value): result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) - assert isinstance(result, list) + assert isinstance(result, set) assert len(result) == 1 (compiled,) = result @@ -59,20 +59,18 @@ def test_single_pattern(self, value): @pytest.mark.parametrize( "value, expected_patterns_in_strs", [ - ("foo,,bar", ["foo", "", "bar"]), - ("^start$,end$", ["^start$", "end$"]), - (r"a\.b,c+d", [r"a\.b", r"c+d"]), + ("foo,,bar", {"foo", "", "bar"}), + ("^start$,end$", {"^start$", "end$"}), + (r"a\.b,c+d", {r"a\.b", r"c+d"}), # duplicates should be collapsed by the internal set() - ("foo,foo,bar", ["foo", "bar"]), + ("foo,foo,bar", {"foo", "bar"}), ], ) - def test_multiple_patterns(self, value: str, expected_patterns_in_strs: list[str]): + def test_multiple_patterns(self, value: str, expected_patterns_in_strs: set[str]): result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) - assert isinstance(result, list) + assert isinstance(result, set) - # Order is not guaranteed due to de-duplication via set - # So we just check that all expected patterns are present - assert {p.pattern for p in result} == set(expected_patterns_in_strs) + assert {p.pattern for p in result} == expected_patterns_in_strs @pytest.mark.parametrize( "value, bad_pattern", [("(", "("), ("foo,(", "("), ("good,[a-z", "[a-z")] diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 035ab17b6..684b2751c 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -207,7 +207,7 @@ def _mock_validate(*paths, **kwargs): # Single pattern matching specific validation ID ( r"BIDS\.DATATYPE_MISMATCH", - [r"BIDS\.DATATYPE_MISMATCH"], + {r"BIDS\.DATATYPE_MISMATCH"}, ["BIDS.DATATYPE_MISMATCH"], [ "BIDS.EXTENSION_MISMATCH", @@ -218,28 +218,28 @@ def _mock_validate(*paths, **kwargs): # Single pattern matching by prefix ( r"^BIDS\.", - [r"^BIDS\."], + {r"^BIDS\."}, ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"], ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"], ), # Single pattern that matches nothing (should show "No errors found") ( r"NONEXISTENT_ID", - [r"NONEXISTENT_ID"], + {r"NONEXISTENT_ID"}, ["No errors found"], ["BIDS", "DANDI", "NWBI"], ), # Multiple patterns separated by comma ( r"BIDS\.DATATYPE_MISMATCH,BIDS\.EXTENSION_MISMATCH", - [r"BIDS\.DATATYPE_MISMATCH", r"BIDS\.EXTENSION_MISMATCH"], + {r"BIDS\.DATATYPE_MISMATCH", r"BIDS\.EXTENSION_MISMATCH"}, ["BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH"], ["DANDI.NO_DANDISET_FOUND", "NWBI.check_data_orientation"], ), # Multiple patterns with wildcard ( r"BIDS\.\S+,DANDI\.\S+", - [r"BIDS\.\S+", r"DANDI\.\S+"], + {r"BIDS\.\S+", r"DANDI\.\S+"}, [ "BIDS.DATATYPE_MISMATCH", "BIDS.EXTENSION_MISMATCH", @@ -253,7 +253,7 @@ def test_match_patterns( self, tmp_path: Path, match_patterns: str, - parsed_patterns: list[str], + parsed_patterns: set[str], should_contain: list[str], should_not_contain: list[str], monkeypatch: pytest.MonkeyPatch, @@ -279,8 +279,7 @@ def test_match_patterns( assert ( passed_patterns is not None ), "No match patterns were passed to _process_issues" - # We don't really care about the order of the patterns - assert {p.pattern for p in passed_patterns} == set(parsed_patterns) + assert {p.pattern for p in passed_patterns} == parsed_patterns for text in should_contain: assert text in r.output, f"Expected '{text}' in output but not found" diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py index 838a75db6..b264e3e4c 100644 --- a/dandi/tests/test_utils.py +++ b/dandi/tests/test_utils.py @@ -719,14 +719,14 @@ def test_pattern_filtering( expected_ids: list[str], ) -> None: """Test filtering with various pattern combinations and edge cases.""" - patterns = [re.compile(p) for p in pattern_strs] + patterns = {re.compile(p) for p in pattern_strs} filtered = filter_by_id_patterns(sample_validation_results, patterns) filtered_ids = [result.id for result in filtered] assert filtered_ids == expected_ids def test_empty_validation_results(self) -> None: """Test filtering with an empty validation results list.""" - patterns = [re.compile(r".*")] + patterns = {re.compile(r".*")} filtered = filter_by_id_patterns([], patterns) assert filtered == [] @@ -734,6 +734,6 @@ def test_preserves_order( self, sample_validation_results: list[ValidationResult] ) -> None: """Test that filtering preserves the original order of results.""" - patterns = [re.compile(r".*")] + patterns = {re.compile(r".*")} filtered = filter_by_id_patterns(sample_validation_results, patterns) assert filtered == sample_validation_results diff --git a/dandi/utils.py b/dandi/utils.py index 9d34bec06..72156e924 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -978,7 +978,7 @@ def _generate_next_value_(name, _start, _count, _last_values): def filter_by_id_patterns( - validation_results: Iterable[ValidationResult], patterns: list[re.Pattern[str]] + validation_results: Iterable[ValidationResult], patterns: set[re.Pattern] ) -> list[ValidationResult]: """ Filter validation results by matching their IDs against provided regex patterns. @@ -987,8 +987,8 @@ def filter_by_id_patterns( ---------- validation_results : Iterable[ValidationResult] The iterable of validation results to filter. - patterns : list[re.Pattern[str]] - The list of regex patterns to match validation result IDs against. + patterns : set[re.Pattern] + The set of regex patterns to match validation result IDs against. Returns ------- From a80987a9f94287d802f4d7a11324abb70ca497de Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 18 Nov 2025 15:36:19 -0800 Subject: [PATCH 05/13] test: refactor two tests in `TestParseRegexes` into one --- dandi/cli/tests/test_base.py | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py index c436893d9..cd25ba737 100644 --- a/dandi/cli/tests/test_base.py +++ b/dandi/cli/tests/test_base.py @@ -37,28 +37,15 @@ class TestParseRegexes: def test_none_returns_none(self): assert parse_regexes(DUMMY_CTX, DUMMY_PARAM, None) is None - @pytest.mark.parametrize( - "value", - [ - "abc", - "[a-z]+", - r"a\.b", - r"", - ], - ) - def test_single_pattern(self, value): - - result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) - assert isinstance(result, set) - assert len(result) == 1 - - (compiled,) = result - assert isinstance(compiled, re.Pattern) - assert compiled.pattern == value - @pytest.mark.parametrize( "value, expected_patterns_in_strs", [ + # Single patterns + ("abc", {"abc"}), + ("[a-z]+", {"[a-z]+"}), + (r"a\.b", {r"a\.b"}), + (r"", {r""}), + # Multiple patterns ("foo,,bar", {"foo", "", "bar"}), ("^start$,end$", {"^start$", "end$"}), (r"a\.b,c+d", {r"a\.b", r"c+d"}), @@ -66,7 +53,7 @@ def test_single_pattern(self, value): ("foo,foo,bar", {"foo", "bar"}), ], ) - def test_multiple_patterns(self, value: str, expected_patterns_in_strs: set[str]): + def test_parse_patterns(self, value: str, expected_patterns_in_strs: set[str]): result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) assert isinstance(result, set) From 09c1489e1042aae2db06ac643265e3b35624b5ed Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 18 Nov 2025 15:44:06 -0800 Subject: [PATCH 06/13] fix: provide type annotation to fix mypy errs --- dandi/cli/tests/test_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dandi/cli/tests/test_base.py b/dandi/cli/tests/test_base.py index cd25ba737..9fa4971eb 100644 --- a/dandi/cli/tests/test_base.py +++ b/dandi/cli/tests/test_base.py @@ -53,7 +53,9 @@ def test_none_returns_none(self): ("foo,foo,bar", {"foo", "bar"}), ], ) - def test_parse_patterns(self, value: str, expected_patterns_in_strs: set[str]): + def test_parse_patterns( + self, value: str, expected_patterns_in_strs: set[str] + ) -> None: result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) assert isinstance(result, set) @@ -62,6 +64,8 @@ def test_parse_patterns(self, value: str, expected_patterns_in_strs: set[str]): @pytest.mark.parametrize( "value, bad_pattern", [("(", "("), ("foo,(", "("), ("good,[a-z", "[a-z")] ) - def test_invalid_pattern_raises_bad_parameter(self, value: str, bad_pattern: str): + def test_invalid_pattern_raises_bad_parameter( + self, value: str, bad_pattern: str + ) -> None: with pytest.raises(click.BadParameter, match=re.escape(bad_pattern)): parse_regexes(DUMMY_CTX, DUMMY_PARAM, value) From cdf9dea60e2aad6e8fff214402180d0a3f9da8ee Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 20 Nov 2025 13:45:07 -0800 Subject: [PATCH 07/13] feat: add helper func to filter validation results by associated paths --- dandi/utils.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/dandi/utils.py b/dandi/utils.py index 72156e924..3b37e93f2 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -1001,3 +1001,50 @@ def filter_by_id_patterns( if any(re.search(pattern, result.id) for pattern in patterns): filtered_results.append(result) return filtered_results + + +def filter_by_paths( + validation_results: Iterable[ValidationResult], paths: tuple[Path, ...] +) -> list[ValidationResult]: + """ + Filter validation results by matching their associated paths against provided paths. + + Parameters + ---------- + validation_results : Iterable[ValidationResult] + The iterable of validation results to filter. + paths : tuple[Path, ...] + The tuple of paths to match validation result paths against. + + Returns + ------- + list[ValidationResult] + The filtered list of validation results whose associated paths match the + provided paths. 'Matching' refers to cases where an associated path of a + validation result is the same path as, or falls under, + one of the provided paths. + + Raises + ------ + ValueError + If any of the provided paths doesn't exist in the filesystem. + """ + for p in paths: + if not p.exists(): + raise ValueError(f"Provided path {p} does not exist") + + # Ensure all provided paths are resolved to their absolute forms + paths = tuple(p.resolve(strict=True) for p in paths) + + filtered_results = [] + for r in validation_results: + associated_paths = [ + p.resolve(strict=True) + for p in (r.path, r.dataset_path) + if p is not None and p.exists() + ] + for assoc_path in associated_paths: + if any(assoc_path.is_relative_to(p) for p in paths): + filtered_results.append(r) + break + return filtered_results From 0530591fee16181e90f09327b7b0f7a055ad3eac Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 20 Nov 2025 16:18:50 -0800 Subject: [PATCH 08/13] feat: add helper func to filter validation results by associated paths --- dandi/tests/test_utils.py | 236 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py index b264e3e4c..3ba63cfa3 100644 --- a/dandi/tests/test_utils.py +++ b/dandi/tests/test_utils.py @@ -22,6 +22,7 @@ ensure_datetime, ensure_strtime, filter_by_id_patterns, + filter_by_paths, find_files, flatten, flattened, @@ -737,3 +738,238 @@ def test_preserves_order( patterns = {re.compile(r".*")} filtered = filter_by_id_patterns(sample_validation_results, patterns) assert filtered == sample_validation_results + + +class TestFilterByPaths: + """Test the filter_by_paths function.""" + + @pytest.fixture + def sample_validation_results(self, tmp_path: Path) -> list[ValidationResult]: + """Create sample validation results with file paths for testing.""" + # Create a directory structure for testing + (tmp_path / "sub-mouse001").mkdir() + (tmp_path / "sub-mouse002").mkdir() + (tmp_path / "sub-mouse003").mkdir() + (tmp_path / "data" / "nested").mkdir(parents=True) + (tmp_path / "dataset").mkdir() + + # Create test files + file1 = tmp_path / "sub-mouse001" / "data.nwb" + file2 = tmp_path / "sub-mouse002" / "data.nwb" + file3 = tmp_path / "sub-mouse003" / "data.nwb" + file4 = tmp_path / "data" / "nested" / "experiment.nwb" + file5 = tmp_path / "file_with_both.nwb" + file1.touch() + file2.touch() + file3.touch() + file4.touch() + file5.touch() + + origin_nwbinspector = Origin( + type=OriginType.VALIDATION, + validator=Validator.nwbinspector, + validator_version="", + ) + origin_dandi = Origin( + type=OriginType.VALIDATION, + validator=Validator.dandi, + validator_version="", + ) + + return [ + # Results with only 'path' + ValidationResult( + id="NWBI.check_1", + origin=origin_nwbinspector, + scope=Scope.FILE, + message="Issue 1", + path=file1, + severity=Severity.WARNING, + ), + ValidationResult( + id="NWBI.check_2", + origin=origin_nwbinspector, + scope=Scope.FILE, + message="Issue 2", + path=file2, + severity=Severity.WARNING, + ), + ValidationResult( + id="NWBI.check_3", + origin=origin_nwbinspector, + scope=Scope.FILE, + message="Issue 3", + path=file3, + severity=Severity.ERROR, + ), + ValidationResult( + id="NWBI.check_4", + origin=origin_nwbinspector, + scope=Scope.FILE, + message="Issue 4", + path=file4, + severity=Severity.WARNING, + ), + # Result with only 'dataset_path' + ValidationResult( + id="DANDI.dataset_only", + origin=origin_dandi, + scope=Scope.DANDISET, + message="Dataset issue", + dataset_path=tmp_path / "dataset", + severity=Severity.ERROR, + ), + # Result with both 'path' and 'dataset_path' + ValidationResult( + id="DANDI.both_paths", + origin=origin_dandi, + scope=Scope.FILE, + message="Issue with both paths", + path=file5, + dataset_path=tmp_path / "dataset", + severity=Severity.WARNING, + ), + ] + + @pytest.mark.parametrize( + "path_specs,expected_ids", + [ + # Single file path + (["sub-mouse001/data.nwb"], ["NWBI.check_1"]), + # Multiple file paths + ( + ["sub-mouse001/data.nwb", "sub-mouse002/data.nwb"], + ["NWBI.check_1", "NWBI.check_2"], + ), + # Directory path matching all files under it + (["sub-mouse001"], ["NWBI.check_1"]), + # Parent directory path matching multiple subdirectories + (["data"], ["NWBI.check_4"]), + # Multiple directories + ( + ["sub-mouse001", "sub-mouse003"], + ["NWBI.check_1", "NWBI.check_3"], + ), + # Nested directory + (["data/nested"], ["NWBI.check_4"]), + # All paths via parent directory (includes all results) + ( + ["."], + [ + "NWBI.check_1", + "NWBI.check_2", + "NWBI.check_3", + "NWBI.check_4", + "DANDI.dataset_only", + "DANDI.both_paths", + ], + ), + # Empty paths tuple + ([], []), + # No duplicates when result matches multiple filter paths (file + parent dir) + (["sub-mouse001/data.nwb", "sub-mouse001"], ["NWBI.check_1"]), + # Result with dataset_path + (["dataset"], ["DANDI.dataset_only", "DANDI.both_paths"]), + # Result with both path and dataset_path - filter by file path + (["file_with_both.nwb"], ["DANDI.both_paths"]), + ], + ) + def test_path_filtering( + self, + sample_validation_results: list[ValidationResult], + tmp_path: Path, + path_specs: list[str], + expected_ids: list[str], + ) -> None: + """Test filtering with various path combinations and edge cases.""" + paths = tuple(tmp_path / p for p in path_specs) + filtered = filter_by_paths(sample_validation_results, paths) + filtered_ids = [result.id for result in filtered] + assert filtered_ids == expected_ids + + def test_nonexistent_path_raises_error( + self, sample_validation_results: list[ValidationResult], tmp_path: Path + ) -> None: + """Test that filtering with a non-existent path raises ValueError.""" + nonexistent_path = tmp_path / "nonexistent" + with pytest.raises(ValueError, match="does not exist"): + filter_by_paths(sample_validation_results, (nonexistent_path,)) + + def test_empty_validation_results(self, tmp_path: Path) -> None: + """Test filtering with an empty validation results list.""" + filtered = filter_by_paths([], (tmp_path,)) + assert filtered == [] + + def test_preserves_order( + self, sample_validation_results: list[ValidationResult], tmp_path: Path + ) -> None: + """Test that filtering preserves the original order of results.""" + filtered = filter_by_paths(sample_validation_results, (tmp_path,)) + assert filtered == sample_validation_results + + def test_result_with_nonexistent_path(self, tmp_path: Path) -> None: + """Test that results with non-existent paths are ignored during filtering.""" + existing_file = tmp_path / "existing.nwb" + nonexistent_file = tmp_path / "nonexistent.nwb" + existing_file.touch() + + origin = Origin( + type=OriginType.VALIDATION, + validator=Validator.nwbinspector, + validator_version="", + ) + + results = [ + ValidationResult( + id="NWBI.check_1", + origin=origin, + scope=Scope.FILE, + message="Issue 1", + path=existing_file, + severity=Severity.WARNING, + ), + ValidationResult( + id="NWBI.check_2", + origin=origin, + scope=Scope.FILE, + message="Issue 2", + path=nonexistent_file, # This file doesn't exist + severity=Severity.WARNING, + ), + ] + + # Filter by parent directory - should only get the existing file's result + filtered = filter_by_paths(results, (tmp_path,)) + assert len(filtered) == 1 + assert filtered[0].id == "NWBI.check_1" + + def test_relative_paths_resolved( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Test that relative paths are properly resolved to absolute paths.""" + monkeypatch.chdir(tmp_path) + + subdir = tmp_path / "subdir" + subdir.mkdir() + file1 = subdir / "data.nwb" + file1.touch() + + origin = Origin( + type=OriginType.VALIDATION, + validator=Validator.nwbinspector, + validator_version="", + ) + + result = ValidationResult( + id="NWBI.check_1", + origin=origin, + scope=Scope.FILE, + message="Issue", + path=file1, + severity=Severity.WARNING, + ) + + # Use relative path + filtered = filter_by_paths([result], (Path("subdir"),)) + assert len(filtered) == 1 + assert filtered[0].id == "NWBI.check_1" From 0f1b92440b34d4a8dad18404bf62440959b14874 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 20 Nov 2025 16:41:40 -0800 Subject: [PATCH 09/13] feat: add `--include-path` option to `dandi validate` command This option allows filtering of issues in the validation results to only those associated with the given path(s) --- dandi/cli/cmd_validate.py | 20 +++- dandi/cli/tests/test_cmd_validate.py | 165 +++++++++++++++++++-------- 2 files changed, 135 insertions(+), 50 deletions(-) diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index b73af4ed5..dbebe96c9 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -3,6 +3,7 @@ from collections.abc import Iterable import logging import os +from pathlib import Path import re from typing import Optional, cast import warnings @@ -15,7 +16,7 @@ map_to_click_exceptions, parse_regexes, ) -from ..utils import filter_by_id_patterns, pluralize +from ..utils import filter_by_id_patterns, filter_by_paths, pluralize from ..validate import validate as validate_ from ..validate_types import Severity, ValidationResult @@ -96,6 +97,15 @@ def validate_bids( ), callback=parse_regexes, ) +@click.option( + "--include-path", + multiple=True, + type=click.Path(exists=True, resolve_path=True, path_type=Path), + help=( + "Filter issues in the validation results to only those associated with the " + "given path(s). This option can be specified multiple times." + ), +) @click.option( "--min-severity", help="Only display issues with severities above this level.", @@ -109,6 +119,7 @@ def validate( paths: tuple[str, ...], ignore: str | None, match: Optional[set[re.Pattern]], + include_path: tuple[Path, ...], grouping: str, min_severity: str, schema: str | None = None, @@ -151,7 +162,7 @@ def validate( if i.severity is not None and i.severity.value >= min_severity_value ] - _process_issues(filtered_results, grouping, ignore, match) + _process_issues(filtered_results, grouping, ignore, match, include_path) def _process_issues( @@ -159,6 +170,7 @@ def _process_issues( grouping: str, ignore: str | None = None, match: Optional[set[re.Pattern]] = None, + include_path: tuple[Path, ...] = (), ) -> None: issues = [i for i in validator_result if i.severity is not None] if ignore is not None: @@ -168,6 +180,10 @@ def _process_issues( if match is not None: issues = filter_by_id_patterns(issues, match) + # Filter issues by included paths if provided + if include_path: + issues = filter_by_paths(issues, include_path) + purviews = [i.purview for i in issues] if grouping == "none": display_errors( diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 684b2751c..5c4cd22f6 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -152,54 +152,54 @@ def test_validate_bids_error_grouping_notification( assert notification_substring in r.output -class TestValidateMatchOption: - """Test the --match option for filtering validation results.""" +def _mock_validate(*paths, **kwargs): + """Mock validation function that returns controlled ValidationResult objects.""" + origin = Origin( + type=OriginType.VALIDATION, + validator=Validator.dandi, + validator_version="test", + ) - @staticmethod - def _mock_validate(*paths, **kwargs): - """Mock validation function that returns controlled ValidationResult objects.""" - origin = Origin( - type=OriginType.VALIDATION, - validator=Validator.dandi, - validator_version="test", - ) + # Return a set of validation results with different IDs + results = [ + ValidationResult( + id="BIDS.DATATYPE_MISMATCH", + origin=origin, + severity=Severity.ERROR, + scope=Scope.FILE, + message="Datatype mismatch error", + path=Path(paths[0]) / "file1.nii", + ), + ValidationResult( + id="BIDS.EXTENSION_MISMATCH", + origin=origin, + severity=Severity.ERROR, + scope=Scope.FILE, + message="Extension mismatch error", + path=Path(paths[0]) / "file2.jpg", + ), + ValidationResult( + id="DANDI.NO_DANDISET_FOUND", + origin=origin, + severity=Severity.ERROR, + scope=Scope.DANDISET, + message="No dandiset found", + path=Path(paths[0]), + ), + ValidationResult( + id="NWBI.check_data_orientation", + origin=origin, + severity=Severity.WARNING, + scope=Scope.FILE, + message="Data orientation warning", + path=Path(paths[0]) / "file3.nwb", + ), + ] + return iter(results) - # Return a set of validation results with different IDs - results = [ - ValidationResult( - id="BIDS.DATATYPE_MISMATCH", - origin=origin, - severity=Severity.ERROR, - scope=Scope.FILE, - message="Datatype mismatch error", - path=Path(paths[0]) / "file1.nii", - ), - ValidationResult( - id="BIDS.EXTENSION_MISMATCH", - origin=origin, - severity=Severity.ERROR, - scope=Scope.FILE, - message="Extension mismatch error", - path=Path(paths[0]) / "file2.jpg", - ), - ValidationResult( - id="DANDI.NO_DANDISET_FOUND", - origin=origin, - severity=Severity.ERROR, - scope=Scope.DANDISET, - message="No dandiset found", - path=Path(paths[0]), - ), - ValidationResult( - id="NWBI.check_data_orientation", - origin=origin, - severity=Severity.WARNING, - scope=Scope.FILE, - message="Data orientation warning", - path=Path(paths[0]) / "file3.nwb", - ), - ] - return iter(results) + +class TestValidateMatchOption: + """Test the --match option for filtering validation results.""" @pytest.mark.parametrize( "match_patterns,parsed_patterns,should_contain,should_not_contain", @@ -265,7 +265,7 @@ def test_match_patterns( # Use to monitor what compiled patterns are passed by the CLI process_issues_spy = mocker.spy(cmd_validate, "_process_issues") - monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate) + monkeypatch.setattr(cmd_validate, "validate_", _mock_validate) r = CliRunner().invoke(validate, [f"--match={match_patterns}", str(tmp_path)]) @@ -304,7 +304,7 @@ def test_match_with_ignore_combination( """Test --match and --ignore options used together.""" from .. import cmd_validate - monkeypatch.setattr(cmd_validate, "validate_", self._mock_validate) + monkeypatch.setattr(cmd_validate, "validate_", _mock_validate) # Then use both match and ignore r = CliRunner().invoke( @@ -318,3 +318,72 @@ def test_match_with_ignore_combination( assert "BIDS.DATATYPE_MISMATCH" not in r.output assert "No errors found" in r.output + + +class TestValidateIncludePathOption: + """Test the --include-path option for filtering validation results.""" + + def test_nonexistent_include_path( + self, + tmp_path: Path, + ) -> None: + """Test --include-path option with a non-existent path.""" + + non_existent_path = tmp_path / "nonexistent.nwb" + + r = CliRunner().invoke( + validate, + [f"--include-path={non_existent_path}", str(tmp_path)], + ) + + # Should exit with an error about `--include-path` + assert r.exit_code != 0 + assert "--include-path" in r.output + + @pytest.mark.parametrize( + "include_paths", + [ + ["path1.nwb"], + ["path1.nwb", "path2.nwb"], + ], + ) + def test_validated_option_args( + self, + include_paths: list[str], + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + mocker: pytest_mock.MockerFixture, + ) -> None: + """ + Test that the validated arguments for --include-path correct + """ + from .. import cmd_validate + + # Use to monitor what compiled patterns are passed by the CLI + process_issues_spy = mocker.spy(cmd_validate, "_process_issues") + + # We actually don't care about validation results here. + # We are only mocking to avoid running actual validation logic. + monkeypatch.setattr(cmd_validate, "validate_", _mock_validate) + + paths = [tmp_path / p for p in include_paths] + for p in paths: + p.touch() + + cli_args = [f"--include-path={p}" for p in paths] + [str(tmp_path)] + CliRunner().invoke( + validate, + cli_args, + ) + + process_issues_spy.assert_called_once() + call_args = process_issues_spy.call_args + + # Ensure the paths are parsed and passed correctly + passed_paths = call_args.kwargs.get( + "include_path", + call_args.args[4] if len(call_args.args) > 4 else None, + ) + assert len(passed_paths) == len(include_paths) + assert all(isinstance(p, Path) for p in passed_paths) + assert all(p.is_absolute() for p in passed_paths) From 5cbceb704017bd734dac38541be6f1002ca60d79 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 20 Nov 2025 21:56:46 -0800 Subject: [PATCH 10/13] doc: document `--include-path` option in `dandi validate` command --- docs/source/cmdline/validate.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/cmdline/validate.rst b/docs/source/cmdline/validate.rst index 1a86775e1..bfa68222e 100644 --- a/docs/source/cmdline/validate.rst +++ b/docs/source/cmdline/validate.rst @@ -29,6 +29,13 @@ Options in the eventual result. Note: The separator used to separate the patterns is a comma (`,`), so no pattern should contain a comma. +.. option:: --include-path PATH + + Filter issues in the validation results to only those associated with the + given path(s). A validation issue is associated with a path if its associated + path(s) are the same as or falls under the provided path. This option can be + specified multiple times to include multiple paths. + .. option:: --min-severity [HINT|WARNING|ERROR] Only display issues with severities above this level (HINT by default) From 05e0a5b5c88ff68378c3ca75e3423d881f9922cb Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 20 Nov 2025 22:27:42 -0800 Subject: [PATCH 11/13] docs: correct typo in docs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/source/cmdline/validate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/cmdline/validate.rst b/docs/source/cmdline/validate.rst index bfa68222e..372153f19 100644 --- a/docs/source/cmdline/validate.rst +++ b/docs/source/cmdline/validate.rst @@ -33,7 +33,7 @@ Options Filter issues in the validation results to only those associated with the given path(s). A validation issue is associated with a path if its associated - path(s) are the same as or falls under the provided path. This option can be + path(s) are the same as or fall under the provided path. This option can be specified multiple times to include multiple paths. .. option:: --min-severity [HINT|WARNING|ERROR] From f0c6d86b71dd100b075e75c15a6096cfc155a928 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 20 Nov 2025 22:29:39 -0800 Subject: [PATCH 12/13] doc: correct typo in docstring Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- dandi/cli/tests/test_cmd_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 5c4cd22f6..a380c37f7 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -355,7 +355,7 @@ def test_validated_option_args( mocker: pytest_mock.MockerFixture, ) -> None: """ - Test that the validated arguments for --include-path correct + Test that the validated arguments for --include-path are correct """ from .. import cmd_validate From 9c67a464a6e0043d32cf29d4001b2311fdc9f274 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 20 Nov 2025 22:30:26 -0800 Subject: [PATCH 13/13] doc: correct typo in docstring Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- dandi/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/test_utils.py b/dandi/tests/test_utils.py index 3ba63cfa3..66ca5304b 100644 --- a/dandi/tests/test_utils.py +++ b/dandi/tests/test_utils.py @@ -603,7 +603,7 @@ def test_post_upload_size_check_erroring( class TestFilterByIdPatterns: - """Test the _filter_by_id_patterns function.""" + """Test the filter_by_id_patterns function.""" @pytest.fixture def sample_validation_results(self) -> list[ValidationResult]: