Coverage for src/metador_core/schema/pg.py: 100%
67 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
1"""Schema plugin group."""
3from __future__ import annotations
5from typing import Dict, List, Optional, Set, Type
7from ..plugin import interface as pg
8from ..plugins import plugingroups
9from .core import MetadataSchema, check_types, infer_parent
10from .partial import PartialModel
11from .plugins import PluginBase
12from .plugins import PluginRef as AnyPluginRef
13from .types import SemVerTuple
15SCHEMA_GROUP_NAME = "schema" # name of schema plugin group
18class SchemaPlugin(PluginBase):
19 """Schema-specific Plugin section."""
21 auxiliary: bool = False
22 """If set to True, the schema is considered auxiliary.
24 The consequence is that metadata objects based on this schema cannot be
25 attached to containers.
27 The intended for schema plugins that are too general or not self-contained,
28 but could be useful in a larger context, e.g. as a parent schema or nested
29 schema.
30 """
33class PGSchema(pg.PluginGroup[MetadataSchema]):
34 """Interface to access installed schema plugins.
36 All registered schema plugins can be used anywhere in a Metador container to
37 annotate any group or dataset with metadata objects following that schema.
39 If you don't want that, do not register the schema as a plugin, but just use
40 the schema class as a normal Python dependency. Schemas that are not
41 registered as plugins still must inherit from MetadataSchema, to ensure that
42 all required methods are available and work as expected by the system.
44 Unregistered schemas can be used as "abstract" parent schemas that cannot be
45 instantiated in containers because they are too general to be useful, or for
46 schemas that are not intended to be used on their own in the container, but
47 model a meaningful metadata object that can be part of a larger schema.
50 Guidelines for field definition:
52 * Stick to the following types to construct your field annotation:
53 - basic types: (`bool, int, float, str`)
54 - basic hints from `typing`: `Optional, Literal, Union, Set, List, Tuple`
55 - default pydantic types (such as `AnyHttpUrl`)
56 - default classes supported by pydantic (e.g. `enum.Enum`, `datetime`, etc.)
57 - constrained types defined using the `phantom` package
58 - valid schema classes (subclasses of `MetadataSchema`)
60 * `Optional` is for values that are semantically *missing*,
61 You must not assume that a `None` value represents anything else than that.
63 * Prefer `Set` over `List` when order is irrelevant and duplicates are not needed
65 * Avoid using plain `Dict`, always define a schema instead if you know the keys,
66 unless you really need to "pass through" whatever is given, which is usually
67 not necessary for schemas that you design from scratch.
69 * Prefer types from `phantom` over using pydantic `Field` settings for expressing
70 simple value constraints (e.g. minimal/maximal value or collection length, etc.),
71 because `phantom` types can be subclassed to narrow them down.
73 * In general, avoid using `Field` at all, except for defining an `alias` for
74 attributes that are not valid as Python variables (e.g. `@id` or `$schema`).
76 * When using `Field`, make sure to annotate it with `typing_extensions.Annotated`,
77 instead of assigning the `Field` object to the field name.
80 Rules for schema versioning:
82 All schemas must be direct or indirect subclass of `MetadataSchema`.
84 Semantic versioning (MAJOR, MINOR, PATCH) is to be followed.
85 Bumping a version component means incrementing it and resetting the
86 later ones to 0. When updating a schema, you must bump:
88 * PATCH, if you do not modify the set of parsable instances,
90 * MINOR, if if your changes strictly increase parsable instances,
92 * MAJOR otherwise, i.e. some older metadata might not be valid anymore.
94 If you update a nested or inherited schema to a version
95 with higher X (MAJOR, MINOR or PATCH), the version
96 of your schema must be bumped in X as well.
99 Rules for schema subclassing:
101 A child schema that only extends a parent with new fields is safe.
102 To schemas that redefine parent fields additional rules apply:
104 EACH instance of a schema MUST also be parsable by the parent schema
106 This means that a child schema may only override parent fields
107 with more specific types, i.e., only RESTRICT the set of acceptable
108 values compared to the parent field (safe examples include
109 adding new or narrowing existing bounds and constraints,
110 or excluding some values that are allowed by the parent schema).
112 As automatically verifying this in full generality is not feasible, but the
113 ability to "restrict" fields is very much needed in practical use, the
114 schema developer MUST create suitable represantative test cases that check
115 whether this property is satisfied.
117 Try expressing field value restrictions by:
119 * removing alternatives from a `Union`
120 * using a subclass of a schema or `phantom` type that was used in the parent
122 These can be automatically checked most of the time.
123 """
125 class Plugin:
126 name = SCHEMA_GROUP_NAME
127 version = (0, 1, 0)
128 requires = [plugingroups.Plugin.ref()]
129 plugin_class = MetadataSchema
130 plugin_info_class = SchemaPlugin
132 def __post_init__(self):
133 self._parent_schema: Dict[
134 Type[MetadataSchema], Optional[Type[MetadataSchema]]
135 ] = {}
136 self._parents: Dict[AnyPluginRef, List[AnyPluginRef]] = {} # base plugins
137 self._children: Dict[AnyPluginRef, Set[AnyPluginRef]] = {} # subclass plugins
139 # used schemas inside schemas
140 self._field_types: Dict[
141 Type[MetadataSchema], Dict[str, Set[Type[MetadataSchema]]]
142 ] = {}
143 self._subschemas: Dict[MetadataSchema, Set[MetadataSchema]] = {}
145 # partial schema classes
146 self._partials: Dict[MetadataSchema, PartialModel] = {}
147 self._forwardrefs: Dict[str, MetadataSchema] = {}
149 def plugin_deps(self, plugin) -> Set[AnyPluginRef]:
150 self._parent_schema[plugin] = infer_parent(plugin)
151 if pcls := self._parent_schema[plugin]:
152 # make sure a parent schema plugin is initialized before the child
153 info = pcls.Plugin
154 return {self.PluginRef(name=info.name, version=info.version)}
155 else:
156 return set()
158 def check_plugin(self, name: str, plugin: Type[MetadataSchema]):
159 check_types(plugin) # ensure that (overrides of) fields are valid
161 def _compute_parent_path(self, plugin: Type[MetadataSchema]) -> List[AnyPluginRef]:
162 ref = plugin.Plugin.ref()
163 ret = [ref]
164 curr = plugin
165 parent = self._parent_schema[curr]
166 while parent is not None:
167 p_ref = parent.Plugin.ref()
168 ret.append(p_ref)
169 curr = self._get_unsafe(p_ref.name, p_ref.version)
170 parent = self._parent_schema[curr]
172 ret.reverse()
173 return ret
175 def init_plugin(self, plugin):
176 # pre-compute parent schema path
177 ref = plugin.Plugin.ref()
178 self._parents[ref] = self._compute_parent_path(plugin)
179 if ref not in self._children:
180 self._children[ref] = set()
182 # collect children schema set for all parents
183 parents = self._parents[ref][:-1]
184 for parent in parents:
185 self._children[parent].add(ref)
187 # ----
189 def parent_path(
190 self, schema, version: Optional[SemVerTuple] = None
191 ) -> List[AnyPluginRef]:
192 """Get sequence of registered parent schema plugins leading to the given schema.
194 This sequence can be a subset of the parent sequences in the actual class
195 hierarchy (not every subclass must be registered as a plugin).
196 """
197 name, vers = pg.plugin_args(schema, version, require_version=True)
198 ref = self.PluginRef(name=name, version=vers)
199 self._ensure_is_loaded(ref)
200 return list(self._parents[ref])
202 def children(
203 self, schema, version: Optional[SemVerTuple] = None
204 ) -> Set[AnyPluginRef]:
205 """Get set of names of registered (strict) child schemas."""
206 name, vers = pg.plugin_args(schema, version, require_version=True)
207 ref = self.PluginRef(name=name, version=vers)
208 self._ensure_is_loaded(ref)
209 return set(self._children[ref])
212SchemaPlugin.update_forward_refs()