Coverage for src/metador_core/ih5/skeleton.py: 98%

55 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-02 09:33 +0000

1"""IH5 skeletons and stubs (low-level structures used by IH5MFRecord). 

2 

3A skeleton is documenting the tree structure of a HDF5-like container, 

4ignoring the actual data content (attribute values and datasets). 

5 

6This can be used to implement manifest file and support "patching in thin air", 

7i.e. without having the actual container. 

8""" 

9from typing import Dict, Literal, Union 

10 

11import h5py 

12from pydantic import BaseModel 

13 

14from .overlay import H5Type, IH5Dataset, IH5Group 

15from .record import IH5Record, IH5UserBlock 

16 

17 

18class SkeletonNodeInfo(BaseModel): 

19 node_type: Literal[H5Type.group, H5Type.dataset] 

20 patch_index: int # if its a dataset -> patch where to get data, group: creation patch 

21 attrs: Dict[str, int] # names and patch indices of attributes 

22 

23 def with_patch_index(self, idx: int): 

24 ret = self.copy() 

25 ret.attrs = dict(self.attrs) 

26 ret.patch_index = idx 

27 for k in ret.attrs.keys(): 

28 ret.attrs[k] = idx 

29 return ret 

30 

31 @classmethod 

32 def for_node(cls, node: Union[IH5Group, IH5Dataset]): 

33 if isinstance(node, IH5Dataset): 

34 dt = H5Type.dataset 

35 else: 

36 dt = H5Type.group 

37 

38 def cidx_to_patch_idx(cidx): 

39 return node._record._ublock(cidx).patch_index 

40 

41 pidx = cidx_to_patch_idx(node._cidx) 

42 ats = { 

43 key: cidx_to_patch_idx(node.attrs._find(key)) for key in node.attrs.keys() 

44 } 

45 

46 return cls(node_type=dt, patch_index=pidx, attrs=ats) 

47 

48 

49class IH5Skeleton(BaseModel): 

50 __root__: Dict[str, SkeletonNodeInfo] 

51 

52 def with_patch_index(self, idx: int): 

53 """Copy of skeletonwith patch index of all nodes and attributes modified.""" 

54 ret = self.copy() 

55 ret.__root__ = dict(self.__root__) 

56 for k, v in ret.__root__.items(): 

57 ret.__root__[k] = v.with_patch_index(idx) 

58 return ret 

59 

60 @classmethod 

61 def for_record(cls, rec: IH5Record): 

62 """Return mapping from all paths in a IH5 record to their type. 

63 

64 The attributes are represented as special paths with the shape `a/b/.../n@attr`, 

65 pointing to the attribute named `attr` at the path `a/b/.../n`. 

66 

67 First component is a H5Type enum value, 

68 Second component is more detailed type for attribute values and `IH5Dataset`s. 

69 """ 

70 skel = {"/": SkeletonNodeInfo.for_node(rec["/"])} 

71 

72 def add_paths(_, node): 

73 skel[node.name] = SkeletonNodeInfo.for_node(node) 

74 

75 rec.visititems(add_paths) 

76 return cls(__root__=skel) 

77 

78 

79# NOTE: we pass in the empty container as first argument in the following 

80# in order to make this generic over subtypes (IH5Record, IH5MFRecord)! 

81 

82 

83def init_stub_skeleton(ds: IH5Record, skel: IH5Skeleton): 

84 """Fill a passed fresh container with stub structure based on a skeleton.""" 

85 if len(ds) or len(ds.attrs): 

86 raise ValueError("Container not empty, cannot initialize stub structure here!") 

87 

88 for k, v in skel.__root__.items(): 

89 if v.node_type == H5Type.group: 

90 if k not in ds: 

91 ds.create_group(k) 

92 elif v.node_type == H5Type.dataset: 

93 ds[k] = h5py.Empty(None) 

94 

95 for a in v.attrs.keys(): 

96 ds[k].attrs[a] = h5py.Empty(None) 

97 

98 

99def init_stub_base(target: IH5Record, src_ub: IH5UserBlock, src_skel: IH5Skeleton): 

100 """Prepare a stub base container, given empty target, source user block and skeleton. 

101 

102 Patches on top of this container will work with the original container. 

103 """ 

104 init_stub_skeleton(target, src_skel) 

105 # mark as base container 

106 target._set_ublock(-1, src_ub.copy(update={"prev_patch": None}))