WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions dandi/cli/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from functools import wraps
import os
import re
from typing import Optional

import click

Expand Down Expand Up @@ -147,3 +149,61 @@ def wrapper(obj, *args, **kwargs):
map_to_click_exceptions._do_map = not bool( # type: ignore[attr-defined]
os.environ.get("DANDI_DEVEL", None)
)


def _compile_regex(regex: str) -> re.Pattern:
"""
Helper to compile a regex pattern expressed as an `str` into a `re.Pattern`

Parameters
----------
regex : str
The regex pattern expressed as a string.

Returns
-------
re.Pattern
The compiled regex pattern.
"""
try:
compiled_regex = re.compile(regex)
except re.error as e:
raise click.BadParameter(f"Invalid regex pattern {regex!r}: {e}") from e

return compiled_regex


def parse_regexes(
_ctx: click.Context, _param: click.Parameter, value: Optional[str]
) -> Optional[set[re.Pattern]]:
"""
Callback to parse a string of comma-separated regex patterns

Parameters
----------
_ctx : click.Context
The Click context (not used).

_param : click.Parameter
The Click parameter (not used).

value : str | None
The input string containing comma-separated regex patterns. It is assumed
that none of the patterns contain commas themselves.

Returns
-------
set[re.Pattern]
A set of compiled regex patterns.

Notes
-----
This callback is only suitable to parse patterns that do not contain commas.
"""
if value is None:
# Handle the case where no value is provided
return None

regexes = set(value.split(","))

return {_compile_regex(regex) for regex in regexes}
47 changes: 43 additions & 4 deletions dandi/cli/cmd_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,20 @@
from collections.abc import Iterable
import logging
import os
from pathlib import Path
import re
from typing import cast
from typing import Optional, cast
import warnings

import click

from .base import devel_debug_option, devel_option, map_to_click_exceptions
from ..utils import pluralize
from .base import (
devel_debug_option,
devel_option,
map_to_click_exceptions,
parse_regexes,
)
from ..utils import filter_by_id_patterns, filter_by_paths, pluralize
from ..validate import validate as validate_
from ..validate_types import Severity, ValidationResult

Expand Down Expand Up @@ -80,6 +86,26 @@ def validate_bids(
default="none",
)
@click.option("--ignore", metavar="REGEX", help="Regex matching error IDs to ignore")
@click.option(
"--match",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--match suggests me to match as limiting some work... here we are talking only report filtering... I also wonder if we could join here to support various elements of an issue, not have them separate, so could be smth like

--report-matches=id:REGEX,path:.\*_events.* 

which should serve as AND so should match all of the above

metavar="REGEX,REGEX,...",
help=(
"Comma-separated regex patterns used to filter issues in validation results "
"by their ID. Only issues with an ID matching at least one of the given "
"patterns are included in the eventual result. "
"(No pattern should contain a comma.)"
),
callback=parse_regexes,
)
@click.option(
"--include-path",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • name was a little confusing as I thought "include name in the result"
  • we might want to simply (ab)use existing paths posarg as it is to define what paths we care about. So even if within bids dataset, bids-validator-deno would run on entire dataset ATM and then we need to filter for those paths
    • we should also warn about that -- that we do run bids-validator on the full dandiset although interested only in some

multiple=True,
type=click.Path(exists=True, resolve_path=True, path_type=Path),
help=(
"Filter issues in the validation results to only those associated with the "
"given path(s). This option can be specified multiple times."
),
)
@click.option(
"--min-severity",
help="Only display issues with severities above this level.",
Expand All @@ -92,6 +118,8 @@ def validate_bids(
def validate(
paths: tuple[str, ...],
ignore: str | None,
match: Optional[set[re.Pattern]],
include_path: tuple[Path, ...],
grouping: str,
min_severity: str,
schema: str | None = None,
Expand Down Expand Up @@ -134,17 +162,28 @@ def validate(
if i.severity is not None and i.severity.value >= min_severity_value
]

_process_issues(filtered_results, grouping, ignore)
_process_issues(filtered_results, grouping, ignore, match, include_path)


def _process_issues(
validator_result: Iterable[ValidationResult],
grouping: str,
ignore: str | None = None,
match: Optional[set[re.Pattern]] = None,
include_path: tuple[Path, ...] = (),
) -> None:
issues = [i for i in validator_result if i.severity is not None]
if ignore is not None:
issues = [i for i in issues if not re.search(ignore, i.id)]

# Filter issues by ID patterns if provided
if match is not None:
issues = filter_by_id_patterns(issues, match)

# Filter issues by included paths if provided
if include_path:
issues = filter_by_paths(issues, include_path)

purviews = [i.purview for i in issues]
if grouping == "none":
display_errors(
Expand Down
71 changes: 71 additions & 0 deletions dandi/cli/tests/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import re

import click
import pytest

from dandi.cli.base import _compile_regex, parse_regexes

DUMMY_CTX = click.Context(click.Command("dummy"))
DUMMY_PARAM = click.Option(["--dummy"])


class TestCompileRegex:
@pytest.mark.parametrize(
"pattern",
[
"abc",
"[a-z]+",
"^start$",
r"a\.b",
],
)
def test_valid_patterns_return_pattern(self, pattern):
compiled = _compile_regex(pattern)
assert isinstance(compiled, re.Pattern)
assert compiled.pattern == pattern

@pytest.mark.parametrize("pattern", ["(", "[a-z", "\\"])
def test_invalid_patterns_raise_bad_parameter(self, pattern):
with pytest.raises(click.BadParameter) as exc_info:
_compile_regex(pattern)
msg = str(exc_info.value)
assert "Invalid regex pattern" in msg
assert repr(pattern) in msg


class TestParseRegexes:
def test_none_returns_none(self):
assert parse_regexes(DUMMY_CTX, DUMMY_PARAM, None) is None

@pytest.mark.parametrize(
"value, expected_patterns_in_strs",
[
# Single patterns
("abc", {"abc"}),
("[a-z]+", {"[a-z]+"}),
(r"a\.b", {r"a\.b"}),
(r"", {r""}),
# Multiple patterns
("foo,,bar", {"foo", "", "bar"}),
("^start$,end$", {"^start$", "end$"}),
(r"a\.b,c+d", {r"a\.b", r"c+d"}),
# duplicates should be collapsed by the internal set()
("foo,foo,bar", {"foo", "bar"}),
],
)
def test_parse_patterns(
self, value: str, expected_patterns_in_strs: set[str]
) -> None:
result = parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
assert isinstance(result, set)

assert {p.pattern for p in result} == expected_patterns_in_strs

@pytest.mark.parametrize(
"value, bad_pattern", [("(", "("), ("foo,(", "("), ("good,[a-z", "[a-z")]
)
def test_invalid_pattern_raises_bad_parameter(
self, value: str, bad_pattern: str
) -> None:
with pytest.raises(click.BadParameter, match=re.escape(bad_pattern)):
parse_regexes(DUMMY_CTX, DUMMY_PARAM, value)
Loading
Loading