Skip to content

interface

NodeAcl

Bases: Enum

Metador node soft access control flags.

Soft means - they can be bypassed, it is about trying to prevent errors.

Group nodes inherit their ACL flags to child nodes.

Source code in src/metador_core/container/interface.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
class NodeAcl(Enum):
    """Metador node soft access control flags.

    Soft means - they can be bypassed, it is about trying to prevent errors.

    Group nodes inherit their ACL flags to child nodes.
    """

    # NOTE: maybe refactor this to IntFlag? Then e.g. restrict() interface can be like:
    # node.restrict(acl=NodeAcl.read_only | NodeAcl.local_only)

    read_only = auto()
    """Forbid calling methods mutating contents of (meta)data."""

    local_only = auto()
    """Forbid access to parents beyond the initial local node."""

    skel_only = auto()
    """Forbid reading datasets and metadata, only existence can be checked."""

read_only class-attribute instance-attribute

read_only = auto()

Forbid calling methods mutating contents of (meta)data.

local_only class-attribute instance-attribute

local_only = auto()

Forbid access to parents beyond the initial local node.

skel_only class-attribute instance-attribute

skel_only = auto()

Forbid reading datasets and metadata, only existence can be checked.

StoredMetadata dataclass

Information about a metadata schema instance stored at a node.

Source code in src/metador_core/container/interface.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@dataclass
class StoredMetadata:
    """Information about a metadata schema instance stored at a node."""

    uuid: UUID
    """UUID identifying the metadata object in the container.

    Used for bookkeeping, i.e. keeping the container TOC in sync.
    """

    schema: PluginRef
    """Schema the object is an instance of."""

    node: H5DatasetLike
    """Node with serialized metadata object."""

    def to_path(self):
        """Return path of metadata object.

        (E.g. to return canonical path for copying TOC link nodes)
        """
        prefix = self.node.parent.name
        ep_name = to_ep_name(self.schema.name, self.schema.version)
        return f"{prefix}/{ep_name}={self.uuid}"

    @staticmethod
    def from_node(obj: H5DatasetLike) -> StoredMetadata:
        """Instantiate info about a stored metadata node."""
        path = obj.name
        segs = path.lstrip("/").split("/")
        ep_name, uuid_str = segs.pop().split("=")
        s_name, s_vers = from_ep_name(EPName(ep_name))
        uuid = UUID(uuid_str)
        s_ref = schemas.PluginRef(name=s_name, version=s_vers)
        return StoredMetadata(uuid=uuid, schema=s_ref, node=obj)

uuid instance-attribute

uuid: UUID

UUID identifying the metadata object in the container.

Used for bookkeeping, i.e. keeping the container TOC in sync.

schema instance-attribute

schema: PluginRef

Schema the object is an instance of.

node instance-attribute

node: H5DatasetLike

Node with serialized metadata object.

to_path

to_path()

Return path of metadata object.

(E.g. to return canonical path for copying TOC link nodes)

Source code in src/metador_core/container/interface.py
89
90
91
92
93
94
95
96
def to_path(self):
    """Return path of metadata object.

    (E.g. to return canonical path for copying TOC link nodes)
    """
    prefix = self.node.parent.name
    ep_name = to_ep_name(self.schema.name, self.schema.version)
    return f"{prefix}/{ep_name}={self.uuid}"

from_node staticmethod

from_node(obj: H5DatasetLike) -> StoredMetadata

Instantiate info about a stored metadata node.

Source code in src/metador_core/container/interface.py
 98
 99
100
101
102
103
104
105
106
107
@staticmethod
def from_node(obj: H5DatasetLike) -> StoredMetadata:
    """Instantiate info about a stored metadata node."""
    path = obj.name
    segs = path.lstrip("/").split("/")
    ep_name, uuid_str = segs.pop().split("=")
    s_name, s_vers = from_ep_name(EPName(ep_name))
    uuid = UUID(uuid_str)
    s_ref = schemas.PluginRef(name=s_name, version=s_vers)
    return StoredMetadata(uuid=uuid, schema=s_ref, node=obj)

MetadorMeta

Interface to Metador metadata objects stored at a single HDF5 node.

Source code in src/metador_core/container/interface.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
class MetadorMeta:
    """Interface to Metador metadata objects stored at a single HDF5 node."""

    # helpers for __getitem__ and __setitem__

    @staticmethod
    def _require_schema(
        schema_name: str, schema_ver: Optional[SemVerTuple]
    ) -> Type[MetadataSchema]:
        """Return compatible installed schema class, if possible.

        Raises KeyError if no suitable schema was found.

        Raises TypeError if an auxiliary schema is requested.
        """
        schema_class = schemas._get_unsafe(
            schema_name, schema_ver
        )  # can raise KeyError
        if schema_class.Plugin.auxiliary:  # reject auxiliary schemas in container
            msg = f"Cannot attach instances of auxiliary schema '{schema_name}' to a node!"
            raise TypeError(msg)
        return schema_class

    @staticmethod
    def _parse_obj(
        schema: Type[S], obj: Union[str, bytes, Dict[str, Any], MetadataSchema]
    ) -> S:
        """Return original object if it is an instance of passed schema, or else parse it.

        Raises ValidationError if parsing fails.
        """
        if isinstance(obj, schema):
            return obj  # skip validation, already correct model!
        # try to convert/parse it:
        if isinstance(obj, (str, bytes)):
            return schema.parse_raw(obj)
        if isinstance(obj, MetadataSchema):
            return schema.parse_obj(obj.dict())
        else:  # dict
            return schema.parse_obj(obj)

    # raw getters and setters don't care about the environment,
    # they work only based on what objects are available and compatible
    # and do not perform validation etc.

    def _get_raw(
        self, schema_name: str, version: Optional[SemVerTuple] = None
    ) -> Optional[StoredMetadata]:
        """Return stored metadata for given schema at this node (or None).

        If a version is passed, the stored version must also be compatible.
        """
        # retrieve stored instance (if suitable)
        ret: Optional[StoredMetadata] = self._objs.get(schema_name)
        if not version:
            return ret  # no specified version -> anything goes
        # otherwise: only return if it is compatible
        req_ref: Optional[PluginRef] = None
        req_ref = schemas.PluginRef(name=schema_name, version=version)
        return ret if ret and req_ref.supports(ret.schema) else None

    def _set_raw(self, schema_ref: PluginRef, obj: MetadataSchema) -> None:
        """Store metadata object as instance of passed schema at this node."""
        # reserve UUID, construct dataset path and store metadata object
        obj_uuid = self._mc.metador._links.fresh_uuid()
        obj_path = f"{self._base_dir}/{_ep_name_for(schema_ref)}={str(obj_uuid)}"
        # store object
        self._mc.__wrapped__[obj_path] = bytes(obj)
        obj_node = self._mc.__wrapped__[obj_path]
        assert isinstance(obj_node, H5DatasetLike)
        stored_obj = StoredMetadata(uuid=obj_uuid, schema=schema_ref, node=obj_node)
        self._objs[schema_ref] = stored_obj
        # update TOC
        self._mc.metador._links.register(stored_obj)
        return

    def _del_raw(self, schema_name: str, *, _unlink: bool = True) -> None:
        """Delete stored metadata for given schema at this node."""
        # NOTE: _unlink is only for the destroy method
        stored_obj = self._objs[schema_name]
        # unregister in TOC (will also trigger clean up there)
        if _unlink:
            self._mc.metador._links.unregister(stored_obj.uuid)
        # remove metadata object
        del self._objs[stored_obj.schema.name]
        del self._mc.__wrapped__[stored_obj.node.name]
        # no metadata objects left -> remove metadata dir
        if not self._objs:
            del self._mc.__wrapped__[self._base_dir]
        return

    # helpers for container-level opertions (move, copy, delete etc)

    def _destroy(self, *, _unlink: bool = True):
        """Unregister and delete all metadata objects attached to this node."""
        # NOTE: _unlink is only set to false for node copy without metadata
        for schema_name in list(self.keys()):
            self._del_raw(schema_name, _unlink=_unlink)

    # ----

    def __init__(self, node: MetadorNode):
        self._mc: MetadorContainer = node._self_container
        """Underlying container (for convenience)."""

        self._node: MetadorNode = node
        """Underlying actual user node."""

        is_dataset = isinstance(node, H5DatasetLike)
        self._base_dir: str = M.to_meta_base_path(node.name, is_dataset)
        """Path of this metador metadata group node.

        Actual node exists iff any metadata is stored for the node.
        """

        self._objs: Dict[str, StoredMetadata] = {}
        """Information about available metadata objects."""

        # load available object metadata encoded in the node names
        meta_grp = cast(H5GroupLike, self._mc.__wrapped__.get(self._base_dir, {}))
        for obj_node in meta_grp.values():
            assert isinstance(obj_node, H5DatasetLike)
            obj = StoredMetadata.from_node(obj_node)
            self._objs[obj.schema.name] = obj

    # ----

    def keys(self) -> KeysView[str]:
        """Return names of explicitly attached metadata objects.

        Transitive parent schemas are not included.
        """
        return self._objs.keys()

    def values(self) -> ValuesView[StoredMetadata]:
        self._node._guard_acl(NodeAcl.skel_only)
        return self._objs.values()

    def items(self) -> ItemsView[str, StoredMetadata]:
        self._node._guard_acl(NodeAcl.skel_only)
        return self._objs.items()

    # ----

    def __len__(self) -> int:
        """Return number of explicitly attached metadata objects.

        Transitive parent schemas are not counted.
        """
        return len(self.keys())

    def __iter__(self) -> Iterator[str]:
        """Iterate listing schema names of all actually attached metadata objects.

        Transitive parent schemas are not included.
        """
        return iter(self.keys())

    # ----

    def query(
        self,
        schema: Union[
            str, Tuple[str, Optional[SemVerTuple]], PluginRef, Type[MetadataSchema]
        ] = "",
        version: Optional[SemVerTuple] = None,
    ) -> Iterator[PluginRef]:
        """Return schema names for which objects at this node are compatible with passed schema.

        Will also consider compatible child schema instances.

        Returned iterator will yield passed schema first, if an object is available.
        Apart from this, the order is not specified.
        """
        schema_name, schema_ver = plugin_args(schema, version)
        # no schema selected -> list everything
        if not schema_name:
            for obj in self.values():
                yield obj.schema
            return

        # try exact schema (in any compatible version, if version specified)
        if obj := self._get_raw(schema_name, schema_ver):
            yield obj.schema

        # next, try compatible child schemas of compatible versions of requested schema
        compat = set().union(
            *(
                self._mc.metador.schemas.children(ref)
                for ref in self._mc.metador.schemas.versions(schema_name, schema_ver)
            )
        )
        avail = {self._get_raw(s).schema for s in self.keys()}
        for s_ref in avail.intersection(compat):
            yield s_ref

    def __contains__(
        self,
        schema: Union[
            str, Tuple[str, Optional[SemVerTuple]], PluginRef, Type[MetadataSchema]
        ],
    ) -> bool:
        """Check whether a compatible metadata object for given schema exists.

        Will also consider compatible child schema instances.
        """
        if schema == "" or isinstance(schema, tuple) and schema[0] == "":
            return False  # empty query lists everything, here the logic is inverted!
        return next(self.query(schema), None) is not None

    @overload
    def __getitem__(self, schema: str) -> MetadataSchema:
        ...

    @overload
    def __getitem__(self, schema: Type[S]) -> S:
        ...

    def __getitem__(self, schema: Union[str, Type[S]]) -> Union[S, MetadataSchema]:
        """Like get, but will raise KeyError on failure."""
        if ret := self.get(schema):
            return ret
        raise KeyError(schema)

    @overload
    def get(
        self, schema: str, version: Optional[SemVerTuple] = None
    ) -> Optional[MetadataSchema]:
        ...

    @overload
    def get(
        self, schema: Type[S], version: Optional[SemVerTuple] = None
    ) -> Optional[S]:
        ...

    def get(
        self, schema: Union[str, Type[S]], version: Optional[SemVerTuple] = None
    ) -> Optional[Union[MetadataSchema, S]]:
        """Get a parsed metadata object matching the given schema (if it exists).

        Will also consider compatible child schema instances.
        """
        self._node._guard_acl(NodeAcl.skel_only)

        # normalize arguments
        schema_name, schema_ver = plugin_args(schema, version)

        # get a compatible schema instance that is available at this node
        compat_schema = next(self.query(schema_name, schema_ver), None)
        if not compat_schema:
            return None  # not found

        # get class of schema and parse object
        schema_class = self._require_schema(schema_name, schema_ver)
        if obj := self._get_raw(compat_schema.name, compat_schema.version):
            return cast(S, self._parse_obj(schema_class, obj.node[()]))
        return None

    def __setitem__(
        self, schema: Union[str, Type[S]], value: Union[Dict[str, Any], MetadataSchema]
    ) -> None:
        """Store metadata object as instance of given schema.

        Raises KeyError if passed schema is not installed in environment.

        Raises TypeError if passed schema is marked auxiliary.

        Raises ValueError if an object for the schema already exists.

        Raises ValidationError if passed object is not valid for the schema.
        """
        self._node._guard_acl(NodeAcl.read_only)
        schema_name, schema_ver = plugin_args(schema)

        # if self.get(schema_name, schema_ver):  # <- also subclass schemas
        # NOTE: for practical reasons let's be more lenient here and allow redundancy
        # hence only check if exact schema (modulo version) is already there
        if self._get_raw(schema_name):  # <- only same schema
            msg = f"Metadata object for schema {schema_name} already exists!"
            raise ValueError(msg)

        schema_class = self._require_schema(schema_name, schema_ver)
        checked_obj = self._parse_obj(schema_class, value)
        self._set_raw(schema_class.Plugin.ref(), checked_obj)

    def __delitem__(self, schema: Union[str, Type[MetadataSchema]]) -> None:
        """Delete metadata object explicitly stored for the passed schema.

        If a schema class is passed, its version is ignored,
        as each node may contain at most one explicit instance per schema.

        Raises KeyError if no metadata object for that schema exists.
        """
        self._node._guard_acl(NodeAcl.read_only)
        schema_name, _ = plugin_args(schema)

        if self._get_raw(schema_name) is None:
            raise KeyError(schema_name)  # no (explicit) metadata object

        self._del_raw(schema_name)

keys

keys() -> KeysView[str]

Return names of explicitly attached metadata objects.

Transitive parent schemas are not included.

Source code in src/metador_core/container/interface.py
246
247
248
249
250
251
def keys(self) -> KeysView[str]:
    """Return names of explicitly attached metadata objects.

    Transitive parent schemas are not included.
    """
    return self._objs.keys()

__len__

__len__() -> int

Return number of explicitly attached metadata objects.

Transitive parent schemas are not counted.

Source code in src/metador_core/container/interface.py
263
264
265
266
267
268
def __len__(self) -> int:
    """Return number of explicitly attached metadata objects.

    Transitive parent schemas are not counted.
    """
    return len(self.keys())

__iter__

__iter__() -> Iterator[str]

Iterate listing schema names of all actually attached metadata objects.

Transitive parent schemas are not included.

Source code in src/metador_core/container/interface.py
270
271
272
273
274
275
def __iter__(self) -> Iterator[str]:
    """Iterate listing schema names of all actually attached metadata objects.

    Transitive parent schemas are not included.
    """
    return iter(self.keys())

query

query(
    schema: Union[
        str,
        Tuple[str, Optional[SemVerTuple]],
        PluginRef,
        Type[MetadataSchema],
    ] = "",
    version: Optional[SemVerTuple] = None,
) -> Iterator[PluginRef]

Return schema names for which objects at this node are compatible with passed schema.

Will also consider compatible child schema instances.

Returned iterator will yield passed schema first, if an object is available. Apart from this, the order is not specified.

Source code in src/metador_core/container/interface.py
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
def query(
    self,
    schema: Union[
        str, Tuple[str, Optional[SemVerTuple]], PluginRef, Type[MetadataSchema]
    ] = "",
    version: Optional[SemVerTuple] = None,
) -> Iterator[PluginRef]:
    """Return schema names for which objects at this node are compatible with passed schema.

    Will also consider compatible child schema instances.

    Returned iterator will yield passed schema first, if an object is available.
    Apart from this, the order is not specified.
    """
    schema_name, schema_ver = plugin_args(schema, version)
    # no schema selected -> list everything
    if not schema_name:
        for obj in self.values():
            yield obj.schema
        return

    # try exact schema (in any compatible version, if version specified)
    if obj := self._get_raw(schema_name, schema_ver):
        yield obj.schema

    # next, try compatible child schemas of compatible versions of requested schema
    compat = set().union(
        *(
            self._mc.metador.schemas.children(ref)
            for ref in self._mc.metador.schemas.versions(schema_name, schema_ver)
        )
    )
    avail = {self._get_raw(s).schema for s in self.keys()}
    for s_ref in avail.intersection(compat):
        yield s_ref

__contains__

__contains__(
    schema: Union[
        str,
        Tuple[str, Optional[SemVerTuple]],
        PluginRef,
        Type[MetadataSchema],
    ]
) -> bool

Check whether a compatible metadata object for given schema exists.

Will also consider compatible child schema instances.

Source code in src/metador_core/container/interface.py
315
316
317
318
319
320
321
322
323
324
325
326
327
def __contains__(
    self,
    schema: Union[
        str, Tuple[str, Optional[SemVerTuple]], PluginRef, Type[MetadataSchema]
    ],
) -> bool:
    """Check whether a compatible metadata object for given schema exists.

    Will also consider compatible child schema instances.
    """
    if schema == "" or isinstance(schema, tuple) and schema[0] == "":
        return False  # empty query lists everything, here the logic is inverted!
    return next(self.query(schema), None) is not None

__getitem__

__getitem__(
    schema: Union[str, Type[S]]
) -> Union[S, MetadataSchema]

Like get, but will raise KeyError on failure.

Source code in src/metador_core/container/interface.py
337
338
339
340
341
def __getitem__(self, schema: Union[str, Type[S]]) -> Union[S, MetadataSchema]:
    """Like get, but will raise KeyError on failure."""
    if ret := self.get(schema):
        return ret
    raise KeyError(schema)

get

get(
    schema: Union[str, Type[S]],
    version: Optional[SemVerTuple] = None,
) -> Optional[Union[MetadataSchema, S]]

Get a parsed metadata object matching the given schema (if it exists).

Will also consider compatible child schema instances.

Source code in src/metador_core/container/interface.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
def get(
    self, schema: Union[str, Type[S]], version: Optional[SemVerTuple] = None
) -> Optional[Union[MetadataSchema, S]]:
    """Get a parsed metadata object matching the given schema (if it exists).

    Will also consider compatible child schema instances.
    """
    self._node._guard_acl(NodeAcl.skel_only)

    # normalize arguments
    schema_name, schema_ver = plugin_args(schema, version)

    # get a compatible schema instance that is available at this node
    compat_schema = next(self.query(schema_name, schema_ver), None)
    if not compat_schema:
        return None  # not found

    # get class of schema and parse object
    schema_class = self._require_schema(schema_name, schema_ver)
    if obj := self._get_raw(compat_schema.name, compat_schema.version):
        return cast(S, self._parse_obj(schema_class, obj.node[()]))
    return None

__setitem__

__setitem__(
    schema: Union[str, Type[S]],
    value: Union[Dict[str, Any], MetadataSchema],
) -> None

Store metadata object as instance of given schema.

Raises KeyError if passed schema is not installed in environment.

Raises TypeError if passed schema is marked auxiliary.

Raises ValueError if an object for the schema already exists.

Raises ValidationError if passed object is not valid for the schema.

Source code in src/metador_core/container/interface.py
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def __setitem__(
    self, schema: Union[str, Type[S]], value: Union[Dict[str, Any], MetadataSchema]
) -> None:
    """Store metadata object as instance of given schema.

    Raises KeyError if passed schema is not installed in environment.

    Raises TypeError if passed schema is marked auxiliary.

    Raises ValueError if an object for the schema already exists.

    Raises ValidationError if passed object is not valid for the schema.
    """
    self._node._guard_acl(NodeAcl.read_only)
    schema_name, schema_ver = plugin_args(schema)

    # if self.get(schema_name, schema_ver):  # <- also subclass schemas
    # NOTE: for practical reasons let's be more lenient here and allow redundancy
    # hence only check if exact schema (modulo version) is already there
    if self._get_raw(schema_name):  # <- only same schema
        msg = f"Metadata object for schema {schema_name} already exists!"
        raise ValueError(msg)

    schema_class = self._require_schema(schema_name, schema_ver)
    checked_obj = self._parse_obj(schema_class, value)
    self._set_raw(schema_class.Plugin.ref(), checked_obj)

__delitem__

__delitem__(
    schema: Union[str, Type[MetadataSchema]]
) -> None

Delete metadata object explicitly stored for the passed schema.

If a schema class is passed, its version is ignored, as each node may contain at most one explicit instance per schema.

Raises KeyError if no metadata object for that schema exists.

Source code in src/metador_core/container/interface.py
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
def __delitem__(self, schema: Union[str, Type[MetadataSchema]]) -> None:
    """Delete metadata object explicitly stored for the passed schema.

    If a schema class is passed, its version is ignored,
    as each node may contain at most one explicit instance per schema.

    Raises KeyError if no metadata object for that schema exists.
    """
    self._node._guard_acl(NodeAcl.read_only)
    schema_name, _ = plugin_args(schema)

    if self._get_raw(schema_name) is None:
        raise KeyError(schema_name)  # no (explicit) metadata object

    self._del_raw(schema_name)

Link management for synchronizing metadata objects and container TOC.

Source code in src/metador_core/container/interface.py
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
class TOCLinks:
    """Link management for synchronizing metadata objects and container TOC."""

    # NOTE: This is not exposed to the end-user

    @staticmethod
    def _link_path_for(schema_ref: PluginRef) -> str:
        return f"{M.METADOR_LINKS_PATH}/{_ep_name_for(schema_ref)}"

    def __init__(self, raw_cont: H5FileLike, toc_schemas: TOCSchemas):
        self._raw: H5FileLike = raw_cont
        """Raw underlying container (for quick access)."""

        self._toc_schemas = toc_schemas
        """Schemas used in container (to (un)register)."""

        self._toc_path: Dict[UUID, str] = {}
        """Maps metadata object UUIDs to paths of respective pseudo-symlink in TOC."""

        # load links into memory
        if M.METADOR_LINKS_PATH in self._raw:
            link_grp = self._raw.require_group(M.METADOR_LINKS_PATH)
            assert isinstance(link_grp, H5GroupLike)
            for schema_link_grp in link_grp.values():
                assert isinstance(schema_link_grp, H5GroupLike)
                for uuid, link_node in schema_link_grp.items():
                    assert isinstance(link_node, H5DatasetLike)
                    self._toc_path[UUID(uuid)] = link_node.name

    def fresh_uuid(self) -> UUID:
        """Return a UUID string not used for a metadata object in the container yet."""
        fresh = False
        ret: UUID
        # NOTE: here a very unlikely race condition is present if parallelized
        while not fresh:
            ret = uuid1()
            fresh = ret not in self._toc_path
        self._toc_path[ret] = None  # not assigned yet, but "reserved"
        # ----
        return ret

    def resolve(self, uuid: UUID) -> str:
        """Get the path a UUID in the TOC points to."""
        link_path = self._toc_path[uuid]
        link_node = cast(H5DatasetLike, self._raw[link_path])
        return link_node[()].decode("utf-8")

    def update(self, uuid: UUID, new_target: str):
        """Update target of an existing link to point to a new location."""
        link_path = self._toc_path[uuid]
        del self._raw[link_path]
        self._raw[link_path] = new_target

    def register(self, obj: StoredMetadata) -> None:
        """Create a link for a metadata object in container TOC.

        The link points to the metadata object.
        """
        self._toc_schemas._register(obj.schema)

        toc_path = f"{self._link_path_for(obj.schema)}/{obj.uuid}"
        self._toc_path[obj.uuid] = toc_path
        self._raw[toc_path] = str(obj.node.name)

    def unregister(self, uuid: UUID) -> None:
        """Unregister metadata object in TOC given its UUID.

        Will remove the object and clean up empty directories in the TOC.
        """
        # delete the link itself and free the UUID
        toc_path = self._toc_path[uuid]

        schema_group = self._raw[toc_path].parent
        assert isinstance(schema_group, H5GroupLike)
        link_group = schema_group.parent
        assert link_group.name == M.METADOR_LINKS_PATH

        del self._raw[toc_path]
        del self._toc_path[uuid]
        if len(schema_group):
            return  # schema still has instances

        s_name_vers: str = schema_group.name.split("/")[-1]
        # delete empty group for schema
        del self._raw[schema_group.name]
        # notify schema manager (cleans up schema + package info)
        self._toc_schemas._unregister(_schema_ref_for(s_name_vers))

        if len(link_group.keys()):
            return  # container still has metadata
        else:
            # remove the link dir itself (no known metadata in container left)
            del self._raw[link_group.name]

    # ----

    def find_broken(self, repair: bool = False) -> List[UUID]:
        """Return list of UUIDs in TOC not pointing to an existing metadata object.

        Will use loaded cache of UUIDs and check them, without scanning the container.

        If repair is set, will remove those broken links.
        """
        broken = []
        for uuid in self._toc_path.keys():
            target = self.resolve(uuid)
            if target not in self._raw:
                broken.append(uuid)
        if repair:
            for uuid in broken:
                self.unregister(uuid)
        return broken

    def find_missing(self, path: H5GroupLike) -> List[H5DatasetLike]:
        """Return list of metadata objects not listed in TOC."""
        missing = []

        def collect_missing(_, node):
            if not M.is_internal_path(node.name, M.METADOR_META_PREF):
                return  # not a metador metadata path
            if M.is_meta_base_path(node.name):
                # top dir, not a "link dataset",
                # e.g. /.../foo/metador_meta_ or /.../metador_meta_foo
                return

            # now we assume we have a path to a metadata link object in the group
            obj = StoredMetadata.from_node(node)
            known = obj.uuid in self._toc_path
            # check UUID collision: i.e., used in TOC, but points elsewhere
            # (requires fixing up the name of this object / new UUID)
            # implies that THIS object IS missing in the TOC
            collision = known and self.resolve(obj.uuid) != node.name
            if not known or collision:
                missing.append(node)

        # ensure its a group and collect
        self._raw.require_group(path.name).visititems(collect_missing)
        return missing

    def repair_missing(
        self, missing: List[H5DatasetLike], update: bool = False
    ) -> None:
        """Repair links (objects get new UUIDs, unless update is true)."""
        # NOTE: needed for correct copy and move of nodes with their metadata
        for node in missing:
            obj = StoredMetadata.from_node(node)
            if update and obj.uuid in self._toc_path:
                # update target of existing link (e.g. for move)
                self.update(obj.uuid, node.name)
            else:
                # assign new UUID (e.g. for copy)
                # (copied metadata node refers to some other uuid in the name)
                obj.uuid = self.fresh_uuid()
                new_path = obj.to_path()
                # rename the metadata node to point to the new UUID
                self._raw.move(node.name, new_path)
                obj.node = cast(H5DatasetLike, self._raw[new_path])
                # register the object with the new UUID in the TOC
                self.register(obj)

fresh_uuid

fresh_uuid() -> UUID

Return a UUID string not used for a metadata object in the container yet.

Source code in src/metador_core/container/interface.py
454
455
456
457
458
459
460
461
462
463
464
def fresh_uuid(self) -> UUID:
    """Return a UUID string not used for a metadata object in the container yet."""
    fresh = False
    ret: UUID
    # NOTE: here a very unlikely race condition is present if parallelized
    while not fresh:
        ret = uuid1()
        fresh = ret not in self._toc_path
    self._toc_path[ret] = None  # not assigned yet, but "reserved"
    # ----
    return ret

resolve

resolve(uuid: UUID) -> str

Get the path a UUID in the TOC points to.

Source code in src/metador_core/container/interface.py
466
467
468
469
470
def resolve(self, uuid: UUID) -> str:
    """Get the path a UUID in the TOC points to."""
    link_path = self._toc_path[uuid]
    link_node = cast(H5DatasetLike, self._raw[link_path])
    return link_node[()].decode("utf-8")

update

update(uuid: UUID, new_target: str)

Update target of an existing link to point to a new location.

Source code in src/metador_core/container/interface.py
472
473
474
475
476
def update(self, uuid: UUID, new_target: str):
    """Update target of an existing link to point to a new location."""
    link_path = self._toc_path[uuid]
    del self._raw[link_path]
    self._raw[link_path] = new_target

register

register(obj: StoredMetadata) -> None

Create a link for a metadata object in container TOC.

The link points to the metadata object.

Source code in src/metador_core/container/interface.py
478
479
480
481
482
483
484
485
486
487
def register(self, obj: StoredMetadata) -> None:
    """Create a link for a metadata object in container TOC.

    The link points to the metadata object.
    """
    self._toc_schemas._register(obj.schema)

    toc_path = f"{self._link_path_for(obj.schema)}/{obj.uuid}"
    self._toc_path[obj.uuid] = toc_path
    self._raw[toc_path] = str(obj.node.name)

unregister

unregister(uuid: UUID) -> None

Unregister metadata object in TOC given its UUID.

Will remove the object and clean up empty directories in the TOC.

Source code in src/metador_core/container/interface.py
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
def unregister(self, uuid: UUID) -> None:
    """Unregister metadata object in TOC given its UUID.

    Will remove the object and clean up empty directories in the TOC.
    """
    # delete the link itself and free the UUID
    toc_path = self._toc_path[uuid]

    schema_group = self._raw[toc_path].parent
    assert isinstance(schema_group, H5GroupLike)
    link_group = schema_group.parent
    assert link_group.name == M.METADOR_LINKS_PATH

    del self._raw[toc_path]
    del self._toc_path[uuid]
    if len(schema_group):
        return  # schema still has instances

    s_name_vers: str = schema_group.name.split("/")[-1]
    # delete empty group for schema
    del self._raw[schema_group.name]
    # notify schema manager (cleans up schema + package info)
    self._toc_schemas._unregister(_schema_ref_for(s_name_vers))

    if len(link_group.keys()):
        return  # container still has metadata
    else:
        # remove the link dir itself (no known metadata in container left)
        del self._raw[link_group.name]

find_broken

find_broken(repair: bool = False) -> List[UUID]

Return list of UUIDs in TOC not pointing to an existing metadata object.

Will use loaded cache of UUIDs and check them, without scanning the container.

If repair is set, will remove those broken links.

Source code in src/metador_core/container/interface.py
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
def find_broken(self, repair: bool = False) -> List[UUID]:
    """Return list of UUIDs in TOC not pointing to an existing metadata object.

    Will use loaded cache of UUIDs and check them, without scanning the container.

    If repair is set, will remove those broken links.
    """
    broken = []
    for uuid in self._toc_path.keys():
        target = self.resolve(uuid)
        if target not in self._raw:
            broken.append(uuid)
    if repair:
        for uuid in broken:
            self.unregister(uuid)
    return broken

find_missing

find_missing(path: H5GroupLike) -> List[H5DatasetLike]

Return list of metadata objects not listed in TOC.

Source code in src/metador_core/container/interface.py
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
def find_missing(self, path: H5GroupLike) -> List[H5DatasetLike]:
    """Return list of metadata objects not listed in TOC."""
    missing = []

    def collect_missing(_, node):
        if not M.is_internal_path(node.name, M.METADOR_META_PREF):
            return  # not a metador metadata path
        if M.is_meta_base_path(node.name):
            # top dir, not a "link dataset",
            # e.g. /.../foo/metador_meta_ or /.../metador_meta_foo
            return

        # now we assume we have a path to a metadata link object in the group
        obj = StoredMetadata.from_node(node)
        known = obj.uuid in self._toc_path
        # check UUID collision: i.e., used in TOC, but points elsewhere
        # (requires fixing up the name of this object / new UUID)
        # implies that THIS object IS missing in the TOC
        collision = known and self.resolve(obj.uuid) != node.name
        if not known or collision:
            missing.append(node)

    # ensure its a group and collect
    self._raw.require_group(path.name).visititems(collect_missing)
    return missing

repair_missing

repair_missing(
    missing: List[H5DatasetLike], update: bool = False
) -> None

Repair links (objects get new UUIDs, unless update is true).

Source code in src/metador_core/container/interface.py
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
def repair_missing(
    self, missing: List[H5DatasetLike], update: bool = False
) -> None:
    """Repair links (objects get new UUIDs, unless update is true)."""
    # NOTE: needed for correct copy and move of nodes with their metadata
    for node in missing:
        obj = StoredMetadata.from_node(node)
        if update and obj.uuid in self._toc_path:
            # update target of existing link (e.g. for move)
            self.update(obj.uuid, node.name)
        else:
            # assign new UUID (e.g. for copy)
            # (copied metadata node refers to some other uuid in the name)
            obj.uuid = self.fresh_uuid()
            new_path = obj.to_path()
            # rename the metadata node to point to the new UUID
            self._raw.move(node.name, new_path)
            obj.node = cast(H5DatasetLike, self._raw[new_path])
            # register the object with the new UUID in the TOC
            self.register(obj)

TOCSchemas

Schema management for schemas used in the container.

Interface is made to mimic PGSchema wherever it makes sense.

Source code in src/metador_core/container/interface.py
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
class TOCSchemas:
    """Schema management for schemas used in the container.

    Interface is made to mimic PGSchema wherever it makes sense.
    """

    @classmethod
    def _schema_path_for(cls, s_ref: PluginRef) -> str:
        return f"{M.METADOR_SCHEMAS_PATH}/{to_ep_name(s_ref.name, s_ref.version)}"

    @classmethod
    def _jsonschema_path_for(cls, s_ref: PluginRef) -> str:
        return f"{cls._schema_path_for(s_ref)}/jsonschema.json"

    @staticmethod
    def _load_json(node: H5DatasetLike):
        return json.loads(node[()].decode("utf-8"))

    def _update_parents_children(
        self, schema_ref: PluginRef, parents: Optional[List[PluginRef]]
    ):
        if parents is None:  # remove schema
            for parent in self._parents[schema_ref]:
                if parent in self._schemas:
                    self._children[parent].remove(schema_ref)
                elif all(
                    (child not in self._schemas for child in self._children[parent])
                ):
                    del self._parents[parent]
                    del self._children[parent]
        else:  # add schema
            for i, parent in enumerate(parents):
                if parent not in self._parents:
                    self._parents[parent] = parents[: i + 1]
                if parent not in self._children:
                    self._children[parent] = set()
                if parent != schema_ref:
                    self._children[parent].add(schema_ref)

    def _register(self, schema_ref: PluginRef):
        """Notify that a schema is used in the container (metadata object is created/updated).

        If the schema has not been used before in the container, will store metadata about it.
        """
        if schema_ref in self._schemas:
            return  # nothing to do

        # store json schema
        schema_cls = schemas.get(schema_ref.name, schema_ref.version)
        jsonschema_dat = schema_cls.schema_json().encode("utf-8")
        jsonschema_path = self._jsonschema_path_for(schema_ref)
        self._raw[jsonschema_path] = jsonschema_dat

        # store parent schema refs
        compat_path = f"{self._schema_path_for(schema_ref)}/compat"
        parents = schemas.parent_path(schema_ref.name, schema_ref.version)
        parents_dat: bytes = json.dumps(list(map(lambda x: x.dict(), parents))).encode(
            "utf-8"
        )

        self._raw[compat_path] = parents_dat
        self._schemas.add(schema_ref)
        self._update_parents_children(schema_ref, parents)

        # add providing package (if no stored package provides it)
        if not self._pkgs._providers.get(schema_ref, []):
            env_pkg_info: PluginPkgMeta = schemas.provider(schema_cls.Plugin.ref())
            pkg_name_ver = (str(env_pkg_info.name), env_pkg_info.version)
            self._pkgs._register(pkg_name_ver, env_pkg_info)
            self._used[pkg_name_ver] = set()

        # update used schemas tracker for all packages providing this schema
        for pkg in self._pkgs._providers[schema_ref]:
            self._used[pkg].add(schema_ref)

    def _unregister(self, schema_ref: PluginRef):
        """Notify that a schema is not used at any container node anymore.

        If after that no schema of a listed dep package is used,
        this dependency will be removed from the container.
        """
        del self._raw[self._schema_path_for(schema_ref)]
        self._schemas.remove(schema_ref)
        self._update_parents_children(schema_ref, None)

        providers = set(self._pkgs._providers[schema_ref])
        for pkg in providers:
            pkg_used = self._used[pkg]
            if schema_ref in pkg_used:
                # remove schema from list of used schemas of pkg
                pkg_used.remove(schema_ref)
            if not len(pkg_used):
                # package not used anymore in container -> clean up
                self._pkgs._unregister(pkg)

        # remove schemas group if it is empty (no schemas used in container)
        if not self._raw.require_group(M.METADOR_SCHEMAS_PATH).keys():
            del self._raw[M.METADOR_SCHEMAS_PATH]

    def __init__(self, raw_cont: H5FileLike, toc_packages: TOCPackages):
        self._raw: H5FileLike = raw_cont
        """Raw underlying container (for quick access)."""

        self._pkgs = toc_packages
        """TOC package metadata manager object."""

        self._schemas: Set[PluginRef] = set()
        """Stored JSON Schemas of actually used schemas."""

        self._parents: Dict[PluginRef, List[PluginRef]] = {}
        """Parents of a used json schema (i.e. other partially compatible schemas)."""

        self._children: Dict[PluginRef, Set[PluginRef]] = {}
        """Children of a used json schema (i.e. other fully compatible schemas)."""

        self._used: Dict[PythonDep, Set[PluginRef]] = {}
        """package name + version -> name of schemas used in container"""

        for pkg in self._pkgs.keys():
            self._used[pkg] = set()

        if M.METADOR_SCHEMAS_PATH in self._raw:
            schema_grp = self._raw.require_group(M.METADOR_SCHEMAS_PATH)
            for name, node in schema_grp.items():
                s_ref: PluginRef = _schema_ref_for(name)
                assert isinstance(node, H5GroupLike)
                compat = node["compat"]
                assert isinstance(compat, H5DatasetLike)

                reflist = json.loads(compat[()].decode("utf-8"))
                parents = list(map(PluginRef.parse_obj, reflist))

                self._schemas.add(s_ref)
                self._update_parents_children(s_ref, parents)
                for pkg in self._pkgs._providers[s_ref]:
                    self._used[pkg].add(s_ref)

    @property
    def packages(self) -> TOCPackages:
        """Like PluginGroup.packages, but with respect to schemas used in container."""
        return self._pkgs

    def provider(self, schema_ref: PluginRef) -> PluginPkgMeta:
        """Like PluginGroup.provider, but with respect to container deps."""
        pkg_name_ver = next(iter(self._pkgs._providers.get(schema_ref, [])), None)
        if pkg_name_ver is None:
            msg = f"Did not find metadata of a package providing schema: '{schema_ref}'"
            raise KeyError(msg)
        return self._pkgs[pkg_name_ver]

    def parent_path(
        self, schema, version: Optional[SemVerTuple] = None
    ) -> List[PluginRef]:
        """Like PGSchema.parent_path, but with respect to container deps."""
        name, vers = plugin_args(schema, version, require_version=True)
        s_ref = schemas.PluginRef(name=name, version=vers)
        return self._parents[s_ref]

    def versions(
        self, p_name: str, version: Optional[SemVerTuple] = None
    ) -> List[PluginRef]:
        """Like PGSchema.versions, but with respect to container deps."""
        # NOTE: using _children instead of _schemas because some are only listed
        # due to their appearance in the parent_path of some actually used schema
        # but we need them here for "parent compatibility" to work right.
        refs = list(filter(lambda s: s.name == p_name, self._children))

        if version is None:
            return refs
        # filter plugins for compatible version
        requested = schemas.PluginRef(name=p_name, version=version)
        # NOTE: here "supports" arguments are reversed (compared to "plugin versions")!
        # because its about instances (that must be "below" the requested schema version)
        return [ref for ref in refs if requested.supports(ref)]

    def children(self, schema, version: Optional[SemVerTuple] = None) -> Set[PluginRef]:
        """Like PGSchema.children, but with respect to container deps."""
        name, vers = plugin_args(schema, version)
        if vers is not None:
            s_refs = [schemas.PluginRef(name=name, version=vers)]
        else:
            # if no version is given, collect all possibilities
            s_refs = [ref for ref in self._children.keys() if ref.name == name]
        # return all that can be actually retrieved
        return set().union(
            *filter(lambda x: x is not None, map(self._children.get, s_refs))
        )

    # ----

    def __len__(self):
        return len(self._schemas)

    def __iter__(self):
        return iter(self.keys())

    def __contains__(self, schema_ref: PluginRef):
        return schema_ref in self._schemas

    def __getitem__(self, schema_ref: PluginRef):
        node_path = self._jsonschema_path_for(schema_ref)
        assert node_path in self._raw
        return self._load_json(cast(H5DatasetLike, self._raw[node_path]))

    def get(self, schema_ref: PluginRef):
        try:
            self[schema_ref]
        except KeyError:
            return None

    def keys(self):
        return set(self._schemas)

    def values(self):
        return [self[k] for k in self.keys()]

    def items(self):
        return [(k, self[k]) for k in self.keys()]

packages property

packages: TOCPackages

Like PluginGroup.packages, but with respect to schemas used in container.

provider

provider(schema_ref: PluginRef) -> PluginPkgMeta

Like PluginGroup.provider, but with respect to container deps.

Source code in src/metador_core/container/interface.py
728
729
730
731
732
733
734
def provider(self, schema_ref: PluginRef) -> PluginPkgMeta:
    """Like PluginGroup.provider, but with respect to container deps."""
    pkg_name_ver = next(iter(self._pkgs._providers.get(schema_ref, [])), None)
    if pkg_name_ver is None:
        msg = f"Did not find metadata of a package providing schema: '{schema_ref}'"
        raise KeyError(msg)
    return self._pkgs[pkg_name_ver]

parent_path

parent_path(
    schema, version: Optional[SemVerTuple] = None
) -> List[PluginRef]

Like PGSchema.parent_path, but with respect to container deps.

Source code in src/metador_core/container/interface.py
736
737
738
739
740
741
742
def parent_path(
    self, schema, version: Optional[SemVerTuple] = None
) -> List[PluginRef]:
    """Like PGSchema.parent_path, but with respect to container deps."""
    name, vers = plugin_args(schema, version, require_version=True)
    s_ref = schemas.PluginRef(name=name, version=vers)
    return self._parents[s_ref]

versions

versions(
    p_name: str, version: Optional[SemVerTuple] = None
) -> List[PluginRef]

Like PGSchema.versions, but with respect to container deps.

Source code in src/metador_core/container/interface.py
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
def versions(
    self, p_name: str, version: Optional[SemVerTuple] = None
) -> List[PluginRef]:
    """Like PGSchema.versions, but with respect to container deps."""
    # NOTE: using _children instead of _schemas because some are only listed
    # due to their appearance in the parent_path of some actually used schema
    # but we need them here for "parent compatibility" to work right.
    refs = list(filter(lambda s: s.name == p_name, self._children))

    if version is None:
        return refs
    # filter plugins for compatible version
    requested = schemas.PluginRef(name=p_name, version=version)
    # NOTE: here "supports" arguments are reversed (compared to "plugin versions")!
    # because its about instances (that must be "below" the requested schema version)
    return [ref for ref in refs if requested.supports(ref)]

children

children(
    schema, version: Optional[SemVerTuple] = None
) -> Set[PluginRef]

Like PGSchema.children, but with respect to container deps.

Source code in src/metador_core/container/interface.py
761
762
763
764
765
766
767
768
769
770
771
772
def children(self, schema, version: Optional[SemVerTuple] = None) -> Set[PluginRef]:
    """Like PGSchema.children, but with respect to container deps."""
    name, vers = plugin_args(schema, version)
    if vers is not None:
        s_refs = [schemas.PluginRef(name=name, version=vers)]
    else:
        # if no version is given, collect all possibilities
        s_refs = [ref for ref in self._children.keys() if ref.name == name]
    # return all that can be actually retrieved
    return set().union(
        *filter(lambda x: x is not None, map(self._children.get, s_refs))
    )

TOCPackages

Package metadata management for schemas used in the container.

The container will always store for each schema used in the information about one package providing that schema.

If there are multiple providers of the same schema, the first/existing one is preferred.

Source code in src/metador_core/container/interface.py
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
class TOCPackages:
    """Package metadata management for schemas used in the container.

    The container will always store for each schema used in the
    information about one package providing that schema.

    If there are multiple providers of the same schema,
    the first/existing one is preferred.
    """

    @staticmethod
    def _pkginfo_path_for(pkg_name: str, pkg_version: SemVerTuple) -> str:
        return f"{M.METADOR_PACKAGES_PATH}/{to_ep_name(pkg_name, pkg_version)}"

    def _add_providers(self, pkg: PythonDep, pkginfo: PluginPkgMeta):
        # fill schema -> package lookup table for provided package
        for schema_ref in pkginfo.plugins[schemas.name]:
            if schema_ref not in self._providers:
                self._providers[schema_ref] = set()
            self._providers[schema_ref].add(pkg)

    def _register(self, pkg: PythonDep, info: PluginPkgMeta):
        pkg_path = self._pkginfo_path_for(*pkg)
        self._raw[pkg_path] = bytes(info)
        self._pkginfos[pkg] = info
        self._add_providers(pkg, info)

    def _unregister(self, pkg: PythonDep):
        pkg_path = self._pkginfo_path_for(*pkg)
        del self._raw[pkg_path]
        info = self._pkginfos.pop(pkg)
        # unregister providers
        for schema_ref in info.plugins[schemas.name]:
            providers = self._providers[schema_ref]
            providers.remove(pkg)
            if not providers:  # schema not provided by any package
                del self._providers[schema_ref]

        # remove schemas group if it is empty (no schemas used in container)
        if not self._raw.require_group(M.METADOR_PACKAGES_PATH).keys():
            del self._raw[M.METADOR_PACKAGES_PATH]

    def __init__(self, raw_container: H5FileLike):
        self._raw: H5FileLike = raw_container
        """Raw underlying container (for quick access)."""

        self._pkginfos: Dict[PythonDep, PluginPkgMeta] = {}
        """Package name + version -> package info"""

        self._providers: Dict[PluginRef, Set[PythonDep]] = {}
        """schema reference -> package name + version"""

        # parse package infos if they exist
        if M.METADOR_PACKAGES_PATH in self._raw:
            deps_grp = self._raw.require_group(M.METADOR_PACKAGES_PATH)
            for name, node in deps_grp.items():
                pkg: PythonDep = from_ep_name(EPName(name))
                info = PluginPkgMeta.parse_raw(cast(H5DatasetLike, node)[()])
                self._pkginfos[pkg] = info
                self._add_providers(pkg, info)

    # ----

    def __len__(self):
        return len(self._pkginfos)

    def __iter__(self):
        return iter(self._pkginfos)

    def __contains__(self, pkg: PythonDep):
        return pkg in self._pkginfos

    def __getitem__(self, pkg: PythonDep):
        return self._pkginfos[pkg]

    def keys(self):
        return self._pkginfos.keys()

    def values(self):
        return self._pkginfos.values()

    def items(self):
        return self._pkginfos.items()

MetadorContainerTOC

Interface to the Metador metadata index (table of contents) of a container.

Source code in src/metador_core/container/interface.py
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
class MetadorContainerTOC:
    """Interface to the Metador metadata index (table of contents) of a container."""

    def __init__(self, container: MetadorContainer):
        self._container = container
        self._raw = self._container.__wrapped__

        ver = self.spec_version if M.METADOR_VERSION_PATH in self._raw else None
        if ver:
            if ver >= [2]:
                msg = f"Unsupported Metador container version: {ver}"
                raise ValueError(msg)
        else:
            if self._container.acl[NodeAcl.read_only]:
                msg = "Container is read-only and does not look like a Metador container! "
                msg += "Please open in writable mode to initialize Metador structures!"
                raise ValueError(msg)

            # writable + no version = fresh (for metador), initialize it
            self._raw[M.METADOR_VERSION_PATH] = M.METADOR_SPEC_VERSION
            self._raw[M.METADOR_UUID_PATH] = str(uuid1())

        # if we're here, we have a prepared container TOC structure

        # proceed to initialize TOC
        self._driver_type: MetadorDriverEnum = get_driver_type(self._raw)

        self._packages = TOCPackages(self._raw)
        self._schemas = TOCSchemas(self._raw, self._packages)
        self._links = TOCLinks(self._raw, self._schemas)

    # ----

    @property
    def driver_type(self) -> MetadorDriverEnum:
        """Return the type of the container driver."""
        return self._driver_type

    @property
    def driver(self) -> Type[MetadorDriver]:
        """Return the container driver class used by the container."""
        return METADOR_DRIVERS[self.driver_type]

    @property
    def source(self) -> Any:
        """Return data underlying thes container (file, set of files, etc. used with the driver)."""
        return get_source(self._raw, self.driver_type)

    # ----

    @property
    def container_uuid(self) -> UUID:
        """Return UUID of the container."""
        uuid = self._raw[M.METADOR_UUID_PATH]
        uuid_ds = cast(H5DatasetLike, uuid)
        return UUID(uuid_ds[()].decode("utf-8"))

    @property
    def spec_version(self) -> List[int]:
        """Return Metador container specification version of the container."""
        ver = cast(H5DatasetLike, self._raw[M.METADOR_VERSION_PATH])
        return list(map(int, ver[()].decode("utf-8").split(".")))

    @property
    def schemas(self):
        """Information about all schemas used for metadata objects in this container."""
        return self._schemas

    def query(
        self,
        schema: Union[str, Type[S]],
        version: Optional[SemVerTuple] = None,
        *,
        node: Optional[MetadorNode] = None,
    ) -> Iterator[MetadorNode]:
        """Return nodes that contain a metadata object compatible with the given schema."""
        schema_name, schema_ver = plugin_args(schema, version)
        if not schema_name:  # could be e.g. empty string
            msg = "A schema name, plugin reference or class must be provided!"
            raise ValueError(msg)

        start_node: MetadorNode = node or self._container["/"]

        # check start node metadata explicitly
        if (schema_name, schema_ver) in start_node.meta:
            yield start_node

        if not isinstance(start_node, H5GroupLike):
            return  # the node is not group-like, cannot be traversed down

        # collect nodes below start node recursively
        # NOTE: yielding from the collect_nodes does not work :'(
        # so we have to actually materialize the list >.<
        # but we expose only the generator interface anyway (better design)
        # (maybe consider replacing visititems with a custom traversal here)
        ret: List[MetadorNode] = []

        def collect_nodes(_, node: MetadorNode):
            if (schema_name, schema_ver) in node.meta:
                ret.append(node)

        start_node.visititems(collect_nodes)
        yield from iter(ret)

driver_type property

driver_type: MetadorDriverEnum

Return the type of the container driver.

driver property

driver: Type[MetadorDriver]

Return the container driver class used by the container.

source property

source: Any

Return data underlying thes container (file, set of files, etc. used with the driver).

container_uuid property

container_uuid: UUID

Return UUID of the container.

spec_version property

spec_version: List[int]

Return Metador container specification version of the container.

schemas property

schemas

Information about all schemas used for metadata objects in this container.

query

query(
    schema: Union[str, Type[S]],
    version: Optional[SemVerTuple] = None,
    *,
    node: Optional[MetadorNode] = None
) -> Iterator[MetadorNode]

Return nodes that contain a metadata object compatible with the given schema.

Source code in src/metador_core/container/interface.py
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
def query(
    self,
    schema: Union[str, Type[S]],
    version: Optional[SemVerTuple] = None,
    *,
    node: Optional[MetadorNode] = None,
) -> Iterator[MetadorNode]:
    """Return nodes that contain a metadata object compatible with the given schema."""
    schema_name, schema_ver = plugin_args(schema, version)
    if not schema_name:  # could be e.g. empty string
        msg = "A schema name, plugin reference or class must be provided!"
        raise ValueError(msg)

    start_node: MetadorNode = node or self._container["/"]

    # check start node metadata explicitly
    if (schema_name, schema_ver) in start_node.meta:
        yield start_node

    if not isinstance(start_node, H5GroupLike):
        return  # the node is not group-like, cannot be traversed down

    # collect nodes below start node recursively
    # NOTE: yielding from the collect_nodes does not work :'(
    # so we have to actually materialize the list >.<
    # but we expose only the generator interface anyway (better design)
    # (maybe consider replacing visititems with a custom traversal here)
    ret: List[MetadorNode] = []

    def collect_nodes(_, node: MetadorNode):
        if (schema_name, schema_ver) in node.meta:
            ret.append(node)

    start_node.visititems(collect_nodes)
    yield from iter(ret)