Coverage for src/metador_core/packer/example.py: 31%
70 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
1"""This is an example packer plugin for simple general data types.
3A packer plugin implements use-case specific container-related
4functionality for Metador containers.
6To develop your own packer plugin, implement a class deriving from
7`Packer` and register the class as an entrypoint of your package
8(see the `pyproject.toml` of this package, where `GenericPacker`
9is registered as a packer plugin called `example`.)
10"""
12from pathlib import Path
13from typing import Any, Union
15import pandas
16from overrides import overrides
18from ..util.diff import DiffNode, DirDiff
19from . import MetadorContainer, Packer
20from .utils import DirValidationErrors, check_metadata_file, pack_file
22BibMeta = Any
23TableMeta = Any
26class GenericPacker(Packer):
27 """The generic packer is demonstrating how a packer can be implemented.
29 It will pack CSV tables with metadata into corresponding HDF5 containers,
30 and it will pack all other kinds of files as embedded opaque blobs.
32 Both kinds of nodes will have corresponding metadata attributes attached.
34 The directory is expected to have a _meta.yaml file in the container root
35 and each CSV file file.csv needs a companion metadata file file.csv_meta.yaml.
37 All symlinks inside the directory are completely ignored.
39 This packer does very verbose logging for didactic purposes.
40 Other packers may log their actions as they deem appropriate.
41 """
43 class Plugin:
44 name = "core.generic"
45 version = (0, 1, 0)
47 META_SUFFIX: str = "_meta.yaml"
49 @classmethod
50 def sidecar_for(cls, path: Union[Path, str]) -> str:
51 """Sidecar file name for given path."""
52 return f"{path}{cls.META_SUFFIX}"
54 @classmethod
55 @overrides
56 def check_dir(cls, data_dir: Path) -> DirValidationErrors:
57 print("--------")
58 print("called check_dir")
59 errs = DirValidationErrors()
60 errs.update(
61 check_metadata_file(
62 data_dir / cls.META_SUFFIX, required=True, schema=BibMeta
63 )
64 )
65 return errs
67 @classmethod
68 @overrides
69 def update(cls, mc: MetadorContainer, data_dir: Path, diff: DirDiff):
70 print("--------")
71 print("called update")
73 for path, dnode in diff.annotate(data_dir).items():
74 # the status indicates whether the file was added, removed or modified
75 status = diff.status(dnode)
76 print(status.value, path)
78 if dnode is None: # unchanged paths in the directory have no diff node
79 print("IGNORE:", path, "(unchanged)")
80 continue # nothing to do
82 if path.is_symlink(): # we ignore symlinks in the data directory
83 print("IGNORE:", path, "(symlink)")
84 continue
86 if path.name.lower().endswith(".csv_meta.yaml"):
87 # will be taken care of when the CSV file is processed
88 print("IGNORE:", path, "(sidecar file)")
89 continue
91 # for this simple packer, each file maps 1-to-1 to a container path
92 key = f"{dnode.path}" # path inside the container
94 if status == DiffNode.Status.removed: # entity was removed ->
95 # also remove in container, if it was not a symlink (which we ignored)
96 if dnode.prev_type != DiffNode.ObjType.symlink:
97 print("DELETE:", key)
98 del mc[key]
99 continue
101 if status == DiffNode.Status.modified: # changed
102 if dnode.prev_type == dnode.curr_type and path.is_dir():
103 continue # a changed dir should already exist + remain in container
105 # otherwise it was replaced either file -> dir or dir -> file, so
106 # remove entity, proceeding with loop body to add new entity version
107 print("DELETE:", key)
108 del mc[key]
110 # now we (re-)add new or modified entities:
111 if path.is_dir():
112 print("CREATE:", path, "->", key, "(dir)")
114 mc.create_group(key)
116 elif path.is_file():
117 if path.name.endswith(cls.META_SUFFIX):
118 if key == cls.META_SUFFIX:
119 # update root meta
120 print("CREATE:", path, "->", key, "(biblio metadata)")
121 mc.meta["common_biblio"] = BibMeta.parse_file(path)
122 else:
123 if path.name.lower().endswith(".csv"):
124 # embed CSV as numpy array with table metadata
125 print("CREATE:", path, "->", key, "(table)")
127 mc[key] = pandas.read_csv(path).to_numpy() # type: ignore
128 mc[key].meta["common_table"] = TableMeta.for_file(
129 cls.sidecar_for(path)
130 )
132 elif path.name.lower().endswith((".jpg", ".jpeg", ".png")):
133 # embed image file with image-specific metadata
134 print("CREATE:", path, "->", key, "(image)")
135 pack_file(mc, path, target=key)
136 # mc[key].meta["common_image"] = image_meta_for(path)
138 else:
139 # treat as opaque blob and add file metadata
140 print("CREATE:", path, "->", key, "(file)")
141 pack_file(mc, path, target=key)
143 # mc[key].meta["common_file"] = file_meta_for(path)