Skip to content

utils

Various helper functions that packers can use.

check_metadata_file

check_metadata_file(path: Path, **kwargs)

Check a metadata file, return error object.

If required is set, will add an error if file is missing.

If schema is passed and file exists, will validate the file and log errors.

Combine both to check that a file does exist and is valid according to a schema.

Source code in src/metador_core/packer/utils.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def check_metadata_file(path: Path, **kwargs):
    """Check a metadata file, return error object.

    If `required` is set, will add an error if file is missing.

    If `schema` is passed and file exists, will validate the file and log errors.

    Combine both to check that a file does exist and is valid according to a schema.
    """
    required: bool = kwargs.get("required", False)
    schema: MetadataSchema = kwargs.get("schema", None)
    errs = DirValidationErrors()

    exists = path.is_file()
    if required and not exists:
        errs.add(str(path), f"Required metadata file not found: '{path}'")
    if schema is not None and exists:
        try:
            schema.parse_file(path)
        except (JSONDecodeError, ValidationError, FileNotFoundError) as e:
            errs.add(str(path), str(e))

    return errs

pack_file

pack_file(
    node: Union[MetadorContainer, MetadorGroup],
    file_path: Union[Path, str],
    *,
    target: Optional[str] = None,
    metadata: Optional[MetadataSchema] = None
) -> MetadorDataset

Embed a file, adding minimal generic metadata to it.

Will also ensure that the attached metadata has RO-Crate compatible @id set correctly.

Parameters:

Name Type Description Default
node Union[MetadorContainer, MetadorGroup]

Container where to embed the file contents

required
file_path Union[Path, str]

Path of an existing file to be embedded

required
target Optional[str]

Fresh path in container where to place the file

None
metadata Optional[MetadataSchema]

If provided, will attach this instead of harvesting defaults.

None

Returns:

Type Description
MetadorDataset

Dataset of new embedded file.

Source code in src/metador_core/packer/utils.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def pack_file(
    node: Union[MetadorContainer, MetadorGroup],
    file_path: Union[Path, str],
    *,
    target: Optional[str] = None,
    metadata: Optional[MetadataSchema] = None,
) -> MetadorDataset:
    """Embed a file, adding minimal generic metadata to it.

    Will also ensure that the attached metadata has RO-Crate compatible @id set correctly.

    Args:
        node: Container where to embed the file contents
        file_path: Path of an existing file to be embedded
        target: Fresh path in container where to place the file
        metadata: If provided, will attach this instead of harvesting defaults.

    Returns:
        Dataset of new embedded file.
    """
    file_path = Path(file_path)

    # if no target path given, use <current node path / file name>
    if not target:
        target = file_path.name

    # check container and file
    if target in node:
        raise ValueError(f"Path '{node}' already exists in given container or group!")
    if not file_path.is_file():
        raise ValueError(f"Path '{file_path}' does not look like an existing file!")

    if not metadata:
        # no metadata given -> harvest minimal information about a file
        hv_file = harvesters["core.file.generic"]
        metadata = harvest(FileMeta, [hv_file(filepath=file_path)])
    else:
        metadata = metadata.copy()  # defensive copying!

    # check metadata
    if not isinstance(metadata, FileMeta):
        msg = f"Provided metadata is not compatible with '{FileMeta.Plugin.name}'!"
        raise ValueError(msg)
    if not schemas.is_plugin(type(metadata)):
        msg = f"Given metadata is a {type(metadata)}, which is not a schema plugin!"
        raise ValueError(msg)

    data = _h5_wrap_bytes(file_path.read_bytes())
    ret = node.create_dataset(target, data=data)

    # set file metadata @id to be relative to dataset root just like RO Crate wants
    metadata.id_ = urllib.parse.quote(f".{ret.name}")

    # embed file and metadata in container:
    ret.meta[metadata.Plugin.name] = metadata
    return ret