Skip to content

example

This is an example packer plugin for simple general data types.

A packer plugin implements use-case specific container-related functionality for Metador containers.

To develop your own packer plugin, implement a class deriving from Packer and register the class as an entrypoint of your package (see the pyproject.toml of this package, where GenericPacker is registered as a packer plugin called example.)

GenericPacker

Bases: Packer

The generic packer is demonstrating how a packer can be implemented.

It will pack CSV tables with metadata into corresponding HDF5 containers, and it will pack all other kinds of files as embedded opaque blobs.

Both kinds of nodes will have corresponding metadata attributes attached.

The directory is expected to have a _meta.yaml file in the container root and each CSV file file.csv needs a companion metadata file file.csv_meta.yaml.

All symlinks inside the directory are completely ignored.

This packer does very verbose logging for didactic purposes. Other packers may log their actions as they deem appropriate.

Source code in src/metador_core/packer/example.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
class GenericPacker(Packer):
    """The generic packer is demonstrating how a packer can be implemented.

    It will pack CSV tables with metadata into corresponding HDF5 containers,
    and it will pack all other kinds of files as embedded opaque blobs.

    Both kinds of nodes will have corresponding metadata attributes attached.

    The directory is expected to have a _meta.yaml file in the container root
    and each CSV file file.csv needs a companion metadata file file.csv_meta.yaml.

    All symlinks inside the directory are completely ignored.

    This packer does very verbose logging for didactic purposes.
    Other packers may log their actions as they deem appropriate.
    """

    class Plugin:
        name = "core.generic"
        version = (0, 1, 0)

    META_SUFFIX: str = "_meta.yaml"

    @classmethod
    def sidecar_for(cls, path: Union[Path, str]) -> str:
        """Sidecar file name for given path."""
        return f"{path}{cls.META_SUFFIX}"

    @classmethod
    @overrides
    def check_dir(cls, data_dir: Path) -> DirValidationErrors:
        print("--------")
        print("called check_dir")
        errs = DirValidationErrors()
        errs.update(
            check_metadata_file(
                data_dir / cls.META_SUFFIX, required=True, schema=BibMeta
            )
        )
        return errs

    @classmethod
    @overrides
    def update(cls, mc: MetadorContainer, data_dir: Path, diff: DirDiff):
        print("--------")
        print("called update")

        for path, dnode in diff.annotate(data_dir).items():
            # the status indicates whether the file was added, removed or modified
            status = diff.status(dnode)
            print(status.value, path)

            if dnode is None:  # unchanged paths in the directory have no diff node
                print("IGNORE:", path, "(unchanged)")
                continue  # nothing to do

            if path.is_symlink():  # we ignore symlinks in the data directory
                print("IGNORE:", path, "(symlink)")
                continue

            if path.name.lower().endswith(".csv_meta.yaml"):
                # will be taken care of when the CSV file is processed
                print("IGNORE:", path, "(sidecar file)")
                continue

            # for this simple packer, each file maps 1-to-1 to a container path
            key = f"{dnode.path}"  # path inside the container

            if status == DiffNode.Status.removed:  # entity was removed ->
                # also remove in container, if it was not a symlink (which we ignored)
                if dnode.prev_type != DiffNode.ObjType.symlink:
                    print("DELETE:", key)
                    del mc[key]
                continue

            if status == DiffNode.Status.modified:  # changed
                if dnode.prev_type == dnode.curr_type and path.is_dir():
                    continue  # a changed dir should already exist + remain in container

                # otherwise it was replaced either file -> dir or dir -> file, so
                # remove entity, proceeding with loop body to add new entity version
                print("DELETE:", key)
                del mc[key]

            # now we (re-)add new or modified entities:
            if path.is_dir():
                print("CREATE:", path, "->", key, "(dir)")

                mc.create_group(key)

            elif path.is_file():
                if path.name.endswith(cls.META_SUFFIX):
                    if key == cls.META_SUFFIX:
                        # update root meta
                        print("CREATE:", path, "->", key, "(biblio metadata)")
                        mc.meta["common_biblio"] = BibMeta.parse_file(path)
                else:
                    if path.name.lower().endswith(".csv"):
                        # embed CSV as numpy array with table metadata
                        print("CREATE:", path, "->", key, "(table)")

                        mc[key] = pandas.read_csv(path).to_numpy()  # type: ignore
                        mc[key].meta["common_table"] = TableMeta.for_file(
                            cls.sidecar_for(path)
                        )

                    elif path.name.lower().endswith((".jpg", ".jpeg", ".png")):
                        # embed image file with image-specific metadata
                        print("CREATE:", path, "->", key, "(image)")
                        pack_file(mc, path, target=key)
                        # mc[key].meta["common_image"] = image_meta_for(path)

                    else:
                        # treat as opaque blob and add file metadata
                        print("CREATE:", path, "->", key, "(file)")
                        pack_file(mc, path, target=key)

sidecar_for classmethod

sidecar_for(path: Union[Path, str]) -> str

Sidecar file name for given path.

Source code in src/metador_core/packer/example.py
49
50
51
52
@classmethod
def sidecar_for(cls, path: Union[Path, str]) -> str:
    """Sidecar file name for given path."""
    return f"{path}{cls.META_SUFFIX}"