diff --git a/README.md b/README.md index 6e27299..d64b2fb 100644 --- a/README.md +++ b/README.md @@ -293,6 +293,83 @@ article = crate.dereference("paper.pdf") ## Advanced features +### Subcrates + +An RO-Crate can contain one or more nested RO-Crates. For instance, consider the following layout: + +``` +crate_with_subcrates/ +|-- file.txt +|-- ro-crate-metadata.json +|-- subcrate +| |-- ro-crate-metadata.json +| |-- subfile.txt +| `-- subsubcrate +| |-- deepfile.txt +| `-- ro-crate-metadata.json +`-- subcrate2 + |-- ro-crate-metadata.json + `-- subfile.txt +``` + +In the JSON-LD metadata, the presence of a nested crate rooted at a given directory is indicated by a `conformsTo` pointing to the generic RO-Crate profile `https://w3id.org/ro/crate` (see [Referencing other RO-Crates](https://www.researchobject.org/ro-crate/specification/1.2/data-entities.html#referencing-other-ro-crates)): + +```json +{ + "@id": "subcrate/", + "@type": "Dataset", + "conformsTo": "https://w3id.org/ro/crate" +} +``` + +Since nested crates can potentially contain many and / or large files, they are not loaded by default: to enable their loading, pass `load_subcrates=True` to the `RO-Crate` initializer: + +```pycon +>>> from rocrate.rocrate import ROCrate +>>> crate = ROCrate("test/test-data/crate_with_subcrates", load_subcrates=True) +>>> crate.subcrate_entities +[, ] +``` + +At this point, the nested crates have not been loaded yet. You can load a nested crate explicitly: + +```pycon +>>> nested_crate = subcrate.get_crate() +>>> nested_crate.data_entities +[, ] +>>> nested_crate.subcrate_entities +[] +``` + +Alternatively, you can dereference an item from the higher level crate: + +```pycon +>>> crate.dereference("subcrate2/subfile.txt") + +``` + +Up to this point, we have seen how to consume an existing RO-Crate. The following example shows how to create a new one: + +```pycon +>>> crate = ROCrate() +>>> crate.add_file("test/test-data/test_file_galaxy.txt") + +>>> subcrate = crate.add_subcrate(dest_path="subcrate/") +>>> subcrate + +>>> assert subcrate.get("conformsTo") == "https://w3id.org/ro/crate" +>>> assert crate.subcrate_entities == [subcrate] +>>> subcrate_crate = subcrate.get_crate() +>>> subcrate_crate + +>>> subsubcrate = subcrate_crate.add_subcrate(dest_path="subsubcrate/") +>>> assert subcrate_crate.subcrate_entities == [subsubcrate] +>>> subsubcrate_crate = subsubcrate.get_crate() +>>> subsubf = subsubcrate_crate.add_file("setup.cfg") +>>> assert crate.dereference("subcrate/subsubcrate/setup.cfg") is subsubf +>>> crate.write("/tmp/crate_with_subcrates") +``` + ### Modifying the crate from JSON-LD dictionaries The `add_jsonld` method allows to add a contextual entity directly from a diff --git a/rocrate/model/entity.py b/rocrate/model/entity.py index 473a7eb..b9acc16 100644 --- a/rocrate/model/entity.py +++ b/rocrate/model/entity.py @@ -43,6 +43,7 @@ def __init__(self, crate, identifier=None, properties=None): if name.startswith("@"): self._jsonld[name] = value else: + # this will call the __setitem__ method defined below self[name] = value @property diff --git a/rocrate/rocrate.py b/rocrate/rocrate.py index d36750e..2fdf795 100644 --- a/rocrate/rocrate.py +++ b/rocrate/rocrate.py @@ -21,6 +21,7 @@ # limitations under the License. import errno +from typing import cast import uuid import zipfile import atexit @@ -74,16 +75,37 @@ def is_data_entity(entity): return DATA_ENTITY_TYPES.intersection(as_list(entity.get("@type", []))) -def pick_type(json_entity, type_map, fallback=None): +def pick_type(json_entity, type_map, fallback=None, load_subcrates=False): try: t = json_entity["@type"] except KeyError: raise ValueError(f'entity {json_entity["@id"]!r} has no @type') types = {_.strip() for _ in set(t if isinstance(t, list) else [t])} + + entity_class = None for name, c in type_map.items(): if name in types: - return c - return fallback + entity_class = c + break + + if not entity_class: + return fallback + + if entity_class is Dataset: + + # Check if the dataset is a Subcrate + # i.e it has a conformsTo entry matching a RO-Crate profile + # TODO find a better way to check the profiles? + if load_subcrates and (list_profiles := get_norm_value(json_entity, "conformsTo")): + + for profile_ref in list_profiles: + if profile_ref.startswith("https://w3id.org/ro/crate"): + return Subcrate + + return Dataset + + else: + return entity_class def get_version(metadata_properties): @@ -96,10 +118,16 @@ def get_version(metadata_properties): class ROCrate(): - def __init__(self, source=None, gen_preview=False, init=False, exclude=None, version=DEFAULT_VERSION): + def __init__(self, + source=None, + gen_preview=False, + init=False, exclude=None, + version=DEFAULT_VERSION, + load_subcrates=False): self.mode = None self.source = source self.exclude = exclude + self.load_subcrates = load_subcrates self.__entity_map = {} # TODO: add this as @base in the context? At least when loading # from zip @@ -182,6 +210,14 @@ def __read_data_entities(self, entities, source, gen_preview): self.__add_parts(parts, entities, source) def __add_parts(self, parts, entities, source): + """ + Add entities to the crate from a list of entities id and Entity object. + + :param self: Description + :param parts: a list of dicts (one dict per entity) in the form {@id : "entity_id"} + :param entities: a dict with the full list of entities information as in the hasPart of the root dataset of the crate. + :param source: Description + """ type_map = OrderedDict((_.__name__, _) for _ in subclasses(FileOrDir)) for ref in parts: id_ = ref['@id'] @@ -192,16 +228,28 @@ def __add_parts(self, parts, entities, source): continue entity = entities.pop(id_) assert id_ == entity.pop('@id') - cls = pick_type(entity, type_map, fallback=DataEntity) - if cls is DataEntity: + cls = pick_type(entity, type_map, fallback=DataEntity, load_subcrates=self.load_subcrates) + + if cls is Subcrate: + + if is_url(id_): + instance = Subcrate(self, source=id_, properties=entity) + else: + instance = Subcrate(self, source=source / unquote(id_), properties=entity) + + elif cls is DataEntity: instance = DataEntity(self, identifier=id_, properties=entity) + else: + # cls is either a File or a Dataset (Directory) if is_url(id_): instance = cls(self, id_, properties=entity) else: instance = cls(self, source / unquote(id_), id_, properties=entity) self.add(instance) if instance.type == "Dataset": + # for Subcrate, type is currently Dataset too, + # but the hasPart is not populated yet only once accessing a subcrate element (lazy loading) self.__add_parts(as_list(entity.get("hasPart", [])), entities, source) def __read_contextual_entities(self, entities): @@ -234,6 +282,11 @@ def contextual_entities(self): if not isinstance(e, (RootDataset, Metadata, Preview)) and not hasattr(e, "write")] + @property + def subcrate_entities(self): + return [e for e in self.__entity_map.values() + if isinstance(e, Subcrate)] + @property def name(self): return self.root_dataset.get('name') @@ -364,9 +417,31 @@ def get_entities(self): def _get_root_jsonld(self): self.root_dataset.properties() + def __contains__(self, entity_id): + canonical_id = self.resolve_id(entity_id) + return canonical_id in self.__entity_map + def dereference(self, entity_id, default=None): canonical_id = self.resolve_id(entity_id) - return self.__entity_map.get(canonical_id, default) + + if canonical_id in self.__entity_map: + return self.__entity_map[canonical_id] + + for subcrate_entity in self.subcrate_entities: + + # check if the entity_id might be within a subcrate + # i.e entity_id would start with a subcrate id e.g subcrate/subfile.txt + if entity_id.startswith(subcrate_entity.id): + + # replace id of subcrate to use get in the subcrate + # subcrate/subfile.txt --> subfile.txt + # dont use replace, as it could replace in the middle of the id + entity_id_in_subcrate = entity_id[len(subcrate_entity.id):] + + return subcrate_entity.get_crate().get(entity_id_in_subcrate, default=default) + + # fallback + return default get = dereference @@ -413,6 +488,23 @@ def add_dataset( properties=properties )) + def add_subcrate( + self, + source=None, + dest_path=None, + fetch_remote=False, + validate_url=False, + properties=None + ): + return self.add(Subcrate( + self, + source=source, + dest_path=dest_path, + fetch_remote=fetch_remote, + validate_url=validate_url, + properties=properties + )) + add_directory = add_dataset def add_tree(self, source, dest_path=None, properties=None): @@ -492,7 +584,7 @@ def _copy_unlisted(self, top, base_path): for name in files: source = root / name rel = source.relative_to(top) - if not self.dereference(str(rel)): + if str(rel) not in self: dest = base_path / rel if not dest.exists() or not dest.samefile(source): shutil.copyfile(source, dest) @@ -550,7 +642,7 @@ def _stream_zip(self, chunk_size=8192, out_path=None): continue rel = source.relative_to(self.source) - if not self.dereference(str(rel)) and not str(rel) in listed_files: + if str(rel) not in self and not str(rel) in listed_files: with archive.open(str(rel), mode='w') as out_file, open(source, 'rb') as in_file: while chunk := in_file.read(chunk_size): out_file.write(chunk) @@ -560,6 +652,10 @@ def _stream_zip(self, chunk_size=8192, out_path=None): while chunk := buffer.read(chunk_size): yield chunk + def _all_streams(self, chunk_size=8192): + for writeable_entity in self.data_entities + self.default_entities: + yield from writeable_entity.stream(chunk_size=chunk_size) + def add_workflow( self, source=None, dest_path=None, fetch_remote=False, validate_url=False, properties=None, main=False, lang="cwl", lang_version=None, gen_cwl=False, cls=ComputationalWorkflow, @@ -782,6 +878,63 @@ def __validate_suite(self, suite): return suite +class Subcrate(Dataset): + + def __init__(self, crate, source=None, dest_path=None, fetch_remote=False, + validate_url=False, properties=None, record_size=False): + """ + Data-entity representing a subcrate inside another RO-Crate. + + :param crate: The parent crate + :param source: The relative path to the subcrate, or its URL + """ + super().__init__(crate, source, dest_path, fetch_remote, + validate_url, properties=properties, record_size=record_size) + + self._crate = None + """ + A ROCrate instance allowing access to the nested RO-Crate. + The nested RO-Crate is loaded on first access to any of its attribute. + This attribute should not be confused with the crate attribute, which is a reference to the parent crate. + Caller should rather use the get_crate() method to access the nested RO-Crate. + """ + + def _empty(self): + return { + "@id": self.id, + "@type": "Dataset", + "conformsTo": "https://w3id.org/ro/crate", + } + + def get_crate(self) -> ROCrate: + """ + Return the RO-Crate object referenced by this subcrate. + """ + if self._crate is None: + self._load_subcrate() + + return cast(ROCrate, self._crate) + + def _load_subcrate(self): + """ + Load the nested RO-Crate from the source path or URL. + """ + if self._crate is None: + # load_subcrates=True to load further nested RO-Crate (on-demand / lazily too) + self._crate = ROCrate(self.source, load_subcrates=True) + + def write(self, base_path): + super().write(base_path) + if self.crate.mode == Mode.CREATE: + self.get_crate().write(base_path / unquote(self.id)) + + def stream(self, chunk_size=8192): + yield from super().stream(chunk_size=chunk_size) + if self.crate.mode == Mode.CREATE: + for path, chunk in self.get_crate()._all_streams(chunk_size=chunk_size): + yield os.path.join(unquote(self.id), path), chunk + + def make_workflow_rocrate(workflow_path, wf_type, include_files=[], fetch_remote=False, cwl=None, diagram=None): wf_crate = ROCrate() diff --git a/test/test-data/crate_with_subcrates/file.txt b/test/test-data/crate_with_subcrates/file.txt new file mode 100644 index 0000000..c6cac69 --- /dev/null +++ b/test/test-data/crate_with_subcrates/file.txt @@ -0,0 +1 @@ +empty diff --git a/test/test-data/crate_with_subcrates/ro-crate-metadata.json b/test/test-data/crate_with_subcrates/ro-crate-metadata.json new file mode 100644 index 0000000..0cd761f --- /dev/null +++ b/test/test-data/crate_with_subcrates/ro-crate-metadata.json @@ -0,0 +1,52 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "name": "Top-level crate with subcrates", + "description": "An RO-Crate containing subcrates", + "license": "https://spdx.org/licenses/MIT.html", + "datePublished": "2025-12-02T08:39:54+00:00", + "hasPart": [ + { + "@id": "file.txt" + }, + { + "@id": "subcrate/" + }, + { + "@id": "subcrate2/" + } + ] + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "file.txt", + "@type": "File" + }, + { + "@id": "subcrate/", + "@type": "Dataset", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + } + }, + { + "@id": "subcrate2/", + "@type": "Dataset", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + } + } + ] +} diff --git a/test/test-data/crate_with_subcrates/subcrate/ro-crate-metadata.json b/test/test-data/crate_with_subcrates/subcrate/ro-crate-metadata.json new file mode 100644 index 0000000..eca9026 --- /dev/null +++ b/test/test-data/crate_with_subcrates/subcrate/ro-crate-metadata.json @@ -0,0 +1,39 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "datePublished": "2025-12-02T08:39:54+00:00", + "hasPart": [ + { + "@id": "subfile.txt" + }, + { + "@id": "subsubcrate/" + } + ] + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "subfile.txt", + "@type": "File" + }, + { + "@id": "subsubcrate/", + "@type": "Dataset", + "conformsTo": { + "@id": "https://w3id.org/ro/crate" + } + } + ] +} diff --git a/test/test-data/crate_with_subcrates/subcrate/subfile.txt b/test/test-data/crate_with_subcrates/subcrate/subfile.txt new file mode 100644 index 0000000..c6cac69 --- /dev/null +++ b/test/test-data/crate_with_subcrates/subcrate/subfile.txt @@ -0,0 +1 @@ +empty diff --git a/test/test-data/crate_with_subcrates/subcrate/subsubcrate/deepfile.txt b/test/test-data/crate_with_subcrates/subcrate/subsubcrate/deepfile.txt new file mode 100644 index 0000000..c6cac69 --- /dev/null +++ b/test/test-data/crate_with_subcrates/subcrate/subsubcrate/deepfile.txt @@ -0,0 +1 @@ +empty diff --git a/test/test-data/crate_with_subcrates/subcrate/subsubcrate/ro-crate-metadata.json b/test/test-data/crate_with_subcrates/subcrate/subsubcrate/ro-crate-metadata.json new file mode 100644 index 0000000..b552c79 --- /dev/null +++ b/test/test-data/crate_with_subcrates/subcrate/subsubcrate/ro-crate-metadata.json @@ -0,0 +1,29 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "datePublished": "2025-12-02T08:39:54+00:00", + "hasPart": [ + { + "@id": "deepfile.txt" + } + ] + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "deepfile.txt", + "@type": "File" + } + ] +} diff --git a/test/test-data/crate_with_subcrates/subcrate2/ro-crate-metadata.json b/test/test-data/crate_with_subcrates/subcrate2/ro-crate-metadata.json new file mode 100644 index 0000000..817ffe5 --- /dev/null +++ b/test/test-data/crate_with_subcrates/subcrate2/ro-crate-metadata.json @@ -0,0 +1,29 @@ +{ + "@context": "https://w3id.org/ro/crate/1.1/context", + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "datePublished": "2025-12-02T08:39:54+00:00", + "hasPart": [ + { + "@id": "subfile.txt" + } + ] + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": { + "@id": "https://w3id.org/ro/crate/1.1" + } + }, + { + "@id": "subfile.txt", + "@type": "File" + } + ] +} diff --git a/test/test-data/crate_with_subcrates/subcrate2/subfile.txt b/test/test-data/crate_with_subcrates/subcrate2/subfile.txt new file mode 100644 index 0000000..c6cac69 --- /dev/null +++ b/test/test-data/crate_with_subcrates/subcrate2/subfile.txt @@ -0,0 +1 @@ +empty diff --git a/test/test_model.py b/test/test_model.py index a86c25f..26b13d2 100644 --- a/test/test_model.py +++ b/test/test_model.py @@ -29,7 +29,7 @@ from pathlib import Path import pytest -from rocrate.rocrate import ROCrate +from rocrate.rocrate import ROCrate, Subcrate from rocrate.model import ( DataEntity, File, @@ -103,10 +103,11 @@ def test_data_entities(test_data_dir): crate = ROCrate() file_ = crate.add(File(crate, test_data_dir / 'sample_file.txt')) dataset = crate.add(Dataset(crate, test_data_dir / 'test_add_dir')) + subcrate = crate.add(Subcrate(crate, test_data_dir / 'crate-1.0')) data_entity = crate.add(DataEntity(crate, '#mysterious')) - assert set(crate.data_entities) == {file_, dataset, data_entity} + assert set(crate.data_entities) == {file_, dataset, subcrate, data_entity} part_ids = set(_["@id"] for _ in crate.root_dataset._jsonld["hasPart"]) - assert set(_.id for _ in (file_, dataset, data_entity)) <= part_ids + assert set(_.id for _ in (file_, dataset, subcrate, data_entity)) <= part_ids @pytest.mark.skipif(sys.platform == "darwin", reason="CI sometimes fails on macOS") diff --git a/test/test_read.py b/test/test_read.py index 359da13..07308c3 100644 --- a/test/test_read.py +++ b/test/test_read.py @@ -27,7 +27,7 @@ import zipfile from pathlib import Path -from rocrate.rocrate import ROCrate +from rocrate.rocrate import ROCrate, Subcrate from rocrate.model import DataEntity, ContextEntity, File, Dataset _URL = ('https://raw.githubusercontent.com/ResearchObject/ro-crate-py/master/' @@ -192,6 +192,53 @@ def test_bad_crate(test_data_dir, tmpdir): ROCrate(crate_dir) +def load_crate_with_subcrates(test_data_dir): + return ROCrate(test_data_dir / "crate_with_subcrates", load_subcrates=True) + + +def test_crate_with_subcrates(test_data_dir): + + main_crate = load_crate_with_subcrates(test_data_dir) + + subcrate = main_crate.get("subcrate") + subcrate2 = main_crate.get("subcrate2") + for sc in subcrate, subcrate2: + assert isinstance(sc, Subcrate) + assert set(main_crate.subcrate_entities) == {subcrate, subcrate2} + + # Check the subcrate kept the conformsTo attribute from the original Dataset entity + assert subcrate.get("conformsTo") == "https://w3id.org/ro/crate" + + # check that at this point, we have not yet loaded the subcrate + assert subcrate._crate is None + + # check access from the top-level crate + subfile = main_crate.get("subcrate/subfile.txt") + assert isinstance(subfile, File) + subfile2 = main_crate.get("subcrate2/subfile.txt") + assert isinstance(subfile2, File) + assert subfile2 is not subfile + + # check that the above dereferencing triggered lazy loading + assert isinstance(subcrate._crate, ROCrate) + assert subfile.id == "subfile.txt" + assert subfile.crate is not main_crate + assert subfile.crate is subcrate._crate + + # check with another nested rocrate + assert isinstance(main_crate.get("subcrate/subsubcrate/deepfile.txt"), File) + + # reload the crate to "reset" the state to unloaded + main_crate = load_crate_with_subcrates(test_data_dir) + subcrate = main_crate.get("subcrate") + assert subcrate._crate is None + + # get_crate() should trigger loading of the subcrate + nested_crate = subcrate.get_crate() + assert isinstance(nested_crate, ROCrate) + assert subcrate._crate is nested_crate + + @pytest.mark.parametrize("override", [False, True]) def test_init(test_data_dir, tmpdir, helpers, override): crate_dir = test_data_dir / "ro-crate-galaxy-sortchangecase" diff --git a/test/test_write.py b/test/test_write.py index 3cf0422..bd55d9f 100644 --- a/test/test_write.py +++ b/test/test_write.py @@ -468,7 +468,7 @@ def test_add_tree(test_data_dir, tmpdir): def test_http_header(tmpdir): crate = ROCrate() - url = "https://zenodo.org/records/10782431/files/lysozyme_datasets.zip" + url = "https://ftp.mozilla.org/pub/js/js-1.60.tar.gz" file_ = crate.add_file(url, validate_url=True) assert file_.id == url out_path = tmpdir / 'ro_crate_out' @@ -476,7 +476,7 @@ def test_http_header(tmpdir): out_crate = ROCrate(out_path) out_file = out_crate.dereference(url) props = out_file.properties() - assert props.get("encodingFormat") == "application/octet-stream" + assert props.get("encodingFormat") == "application/x-tar" assert "sdDatePublished" in props with requests.head(url) as response: assert props["sdDatePublished"] == response.headers.get("last-modified") @@ -627,6 +627,81 @@ def test_write_zip_nested_dest(tmpdir, helpers): assert (unpack_path / "subdir" / "a b" / "j k" / "l m.txt").is_file() +@pytest.mark.parametrize("to_zip", [False, True]) +def test_write_subcrate(test_data_dir, tmpdir, to_zip): + """Read the test crate with subcrate and write it to a new location. + Check that the subcrate contents are correctly written.""" + crate = ROCrate(test_data_dir / "crate_with_subcrates", load_subcrates=True) + out_path = tmpdir / "ro_crate_out" + if to_zip: + zip_path = tmpdir / 'ro_crate_out.zip' + crate.write_zip(zip_path) + with zipfile.ZipFile(zip_path, "r") as zf: + zf.extractall(out_path) + else: + crate.write(out_path) + + assert (out_path / "file.txt").is_file() + assert (out_path / "ro-crate-metadata.json").is_file() + + assert (out_path / "subcrate" / "ro-crate-metadata.json").is_file() + assert (out_path / "subcrate" / "subfile.txt").is_file() + + assert (out_path / "subcrate" / "subsubcrate" / "deepfile.txt").is_file() + assert (out_path / "subcrate" / "subsubcrate" / "ro-crate-metadata.json").is_file() + + +@pytest.mark.parametrize("to_zip", [False, True]) +def test_subcrates_creation(test_data_dir, tmpdir, to_zip): + crate = ROCrate() + crate.add_file(test_data_dir / "read_crate" / "with space.txt") + subcrate = crate.add_subcrate(dest_path="subcrate/") + assert subcrate.get("conformsTo") == "https://w3id.org/ro/crate" + assert crate.subcrate_entities == [subcrate] + assert not subcrate._crate + subcrate_crate = subcrate.get_crate() + assert subcrate._crate is subcrate_crate + assert subcrate_crate.source is None + test_file_galaxy_path = (test_data_dir / "test_file_galaxy.txt").rename( + test_data_dir / "test file galaxy.txt" + ) + subf = subcrate_crate.add_file(test_file_galaxy_path) + subsubcrate = subcrate_crate.add_subcrate(dest_path="subsubcrate/") + assert subcrate_crate.subcrate_entities == [subsubcrate] + subsubcrate_crate = subsubcrate.get_crate() + subsubf = subsubcrate_crate.add_file("setup.cfg") + assert crate.get("subcrate/test%20file%20galaxy.txt") is subf + assert crate.get("subcrate/subsubcrate/setup.cfg") is subsubf + assert subcrate_crate.get("subsubcrate/setup.cfg") is subsubf + + out_path = tmpdir / "ro_crate_out" + if to_zip: + zip_path = tmpdir / 'ro_crate_out.zip' + crate.write_zip(zip_path) + with zipfile.ZipFile(zip_path, "r") as zf: + zf.extractall(out_path) + else: + crate.write(out_path) + + assert (out_path / "ro-crate-metadata.json").is_file() + assert (out_path / "with space.txt").is_file() + assert (out_path / "subcrate" / "ro-crate-metadata.json").is_file() + assert (out_path / "subcrate" / "test file galaxy.txt").is_file() + assert (out_path / "subcrate" / "subsubcrate" / "ro-crate-metadata.json").is_file() + assert (out_path / "subcrate" / "subsubcrate" / "setup.cfg").is_file() + out_crate = ROCrate(out_path, load_subcrates=True) + assert out_crate.get("with%20space.txt") + out_subcrate = out_crate.get("subcrate/") + assert out_subcrate.get("conformsTo") == "https://w3id.org/ro/crate" + assert out_crate.subcrate_entities == [out_subcrate] + out_subf = out_crate.get("subcrate/test%20file%20galaxy.txt") + assert out_subf + out_subsubf = out_crate.get("subcrate/subsubcrate/setup.cfg") + assert out_subsubf + out_subcrate_crate = out_subcrate.get_crate() + assert out_subcrate_crate.get("subsubcrate/setup.cfg") is out_subsubf + + @pytest.mark.parametrize("version", ["1.0", "1.1", "1.2"]) def test_write_version(tmpdir, helpers, version): basename = helpers.LEGACY_METADATA_FILE_NAME if version == "1.0" else helpers.METADATA_FILE_NAME