Coverage for src/metador_core/packer/example.py: 31%

70 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-02 09:33 +0000

1"""This is an example packer plugin for simple general data types. 

2 

3A packer plugin implements use-case specific container-related 

4functionality for Metador containers. 

5 

6To develop your own packer plugin, implement a class deriving from 

7`Packer` and register the class as an entrypoint of your package 

8(see the `pyproject.toml` of this package, where `GenericPacker` 

9is registered as a packer plugin called `example`.) 

10""" 

11 

12from pathlib import Path 

13from typing import Any, Union 

14 

15import pandas 

16from overrides import overrides 

17 

18from ..util.diff import DiffNode, DirDiff 

19from . import MetadorContainer, Packer 

20from .utils import DirValidationErrors, check_metadata_file, pack_file 

21 

22BibMeta = Any 

23TableMeta = Any 

24 

25 

26class GenericPacker(Packer): 

27 """The generic packer is demonstrating how a packer can be implemented. 

28 

29 It will pack CSV tables with metadata into corresponding HDF5 containers, 

30 and it will pack all other kinds of files as embedded opaque blobs. 

31 

32 Both kinds of nodes will have corresponding metadata attributes attached. 

33 

34 The directory is expected to have a _meta.yaml file in the container root 

35 and each CSV file file.csv needs a companion metadata file file.csv_meta.yaml. 

36 

37 All symlinks inside the directory are completely ignored. 

38 

39 This packer does very verbose logging for didactic purposes. 

40 Other packers may log their actions as they deem appropriate. 

41 """ 

42 

43 class Plugin: 

44 name = "core.generic" 

45 version = (0, 1, 0) 

46 

47 META_SUFFIX: str = "_meta.yaml" 

48 

49 @classmethod 

50 def sidecar_for(cls, path: Union[Path, str]) -> str: 

51 """Sidecar file name for given path.""" 

52 return f"{path}{cls.META_SUFFIX}" 

53 

54 @classmethod 

55 @overrides 

56 def check_dir(cls, data_dir: Path) -> DirValidationErrors: 

57 print("--------") 

58 print("called check_dir") 

59 errs = DirValidationErrors() 

60 errs.update( 

61 check_metadata_file( 

62 data_dir / cls.META_SUFFIX, required=True, schema=BibMeta 

63 ) 

64 ) 

65 return errs 

66 

67 @classmethod 

68 @overrides 

69 def update(cls, mc: MetadorContainer, data_dir: Path, diff: DirDiff): 

70 print("--------") 

71 print("called update") 

72 

73 for path, dnode in diff.annotate(data_dir).items(): 

74 # the status indicates whether the file was added, removed or modified 

75 status = diff.status(dnode) 

76 print(status.value, path) 

77 

78 if dnode is None: # unchanged paths in the directory have no diff node 

79 print("IGNORE:", path, "(unchanged)") 

80 continue # nothing to do 

81 

82 if path.is_symlink(): # we ignore symlinks in the data directory 

83 print("IGNORE:", path, "(symlink)") 

84 continue 

85 

86 if path.name.lower().endswith(".csv_meta.yaml"): 

87 # will be taken care of when the CSV file is processed 

88 print("IGNORE:", path, "(sidecar file)") 

89 continue 

90 

91 # for this simple packer, each file maps 1-to-1 to a container path 

92 key = f"{dnode.path}" # path inside the container 

93 

94 if status == DiffNode.Status.removed: # entity was removed -> 

95 # also remove in container, if it was not a symlink (which we ignored) 

96 if dnode.prev_type != DiffNode.ObjType.symlink: 

97 print("DELETE:", key) 

98 del mc[key] 

99 continue 

100 

101 if status == DiffNode.Status.modified: # changed 

102 if dnode.prev_type == dnode.curr_type and path.is_dir(): 

103 continue # a changed dir should already exist + remain in container 

104 

105 # otherwise it was replaced either file -> dir or dir -> file, so 

106 # remove entity, proceeding with loop body to add new entity version 

107 print("DELETE:", key) 

108 del mc[key] 

109 

110 # now we (re-)add new or modified entities: 

111 if path.is_dir(): 

112 print("CREATE:", path, "->", key, "(dir)") 

113 

114 mc.create_group(key) 

115 

116 elif path.is_file(): 

117 if path.name.endswith(cls.META_SUFFIX): 

118 if key == cls.META_SUFFIX: 

119 # update root meta 

120 print("CREATE:", path, "->", key, "(biblio metadata)") 

121 mc.meta["common_biblio"] = BibMeta.parse_file(path) 

122 else: 

123 if path.name.lower().endswith(".csv"): 

124 # embed CSV as numpy array with table metadata 

125 print("CREATE:", path, "->", key, "(table)") 

126 

127 mc[key] = pandas.read_csv(path).to_numpy() # type: ignore 

128 mc[key].meta["common_table"] = TableMeta.for_file( 

129 cls.sidecar_for(path) 

130 ) 

131 

132 elif path.name.lower().endswith((".jpg", ".jpeg", ".png")): 

133 # embed image file with image-specific metadata 

134 print("CREATE:", path, "->", key, "(image)") 

135 pack_file(mc, path, target=key) 

136 # mc[key].meta["common_image"] = image_meta_for(path) 

137 

138 else: 

139 # treat as opaque blob and add file metadata 

140 print("CREATE:", path, "->", key, "(file)") 

141 pack_file(mc, path, target=key) 

142 

143 # mc[key].meta["common_file"] = file_meta_for(path)