Coverage for src/metador_core/schema/pg.py: 100%

67 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-02 09:33 +0000

1"""Schema plugin group.""" 

2 

3from __future__ import annotations 

4 

5from typing import Dict, List, Optional, Set, Type 

6 

7from ..plugin import interface as pg 

8from ..plugins import plugingroups 

9from .core import MetadataSchema, check_types, infer_parent 

10from .partial import PartialModel 

11from .plugins import PluginBase 

12from .plugins import PluginRef as AnyPluginRef 

13from .types import SemVerTuple 

14 

15SCHEMA_GROUP_NAME = "schema" # name of schema plugin group 

16 

17 

18class SchemaPlugin(PluginBase): 

19 """Schema-specific Plugin section.""" 

20 

21 auxiliary: bool = False 

22 """If set to True, the schema is considered auxiliary. 

23 

24 The consequence is that metadata objects based on this schema cannot be 

25 attached to containers. 

26 

27 The intended for schema plugins that are too general or not self-contained, 

28 but could be useful in a larger context, e.g. as a parent schema or nested 

29 schema. 

30 """ 

31 

32 

33class PGSchema(pg.PluginGroup[MetadataSchema]): 

34 """Interface to access installed schema plugins. 

35 

36 All registered schema plugins can be used anywhere in a Metador container to 

37 annotate any group or dataset with metadata objects following that schema. 

38 

39 If you don't want that, do not register the schema as a plugin, but just use 

40 the schema class as a normal Python dependency. Schemas that are not 

41 registered as plugins still must inherit from MetadataSchema, to ensure that 

42 all required methods are available and work as expected by the system. 

43 

44 Unregistered schemas can be used as "abstract" parent schemas that cannot be 

45 instantiated in containers because they are too general to be useful, or for 

46 schemas that are not intended to be used on their own in the container, but 

47 model a meaningful metadata object that can be part of a larger schema. 

48 

49 

50 Guidelines for field definition: 

51 

52 * Stick to the following types to construct your field annotation: 

53 - basic types: (`bool, int, float, str`) 

54 - basic hints from `typing`: `Optional, Literal, Union, Set, List, Tuple` 

55 - default pydantic types (such as `AnyHttpUrl`) 

56 - default classes supported by pydantic (e.g. `enum.Enum`, `datetime`, etc.) 

57 - constrained types defined using the `phantom` package 

58 - valid schema classes (subclasses of `MetadataSchema`) 

59 

60 * `Optional` is for values that are semantically *missing*, 

61 You must not assume that a `None` value represents anything else than that. 

62 

63 * Prefer `Set` over `List` when order is irrelevant and duplicates are not needed 

64 

65 * Avoid using plain `Dict`, always define a schema instead if you know the keys, 

66 unless you really need to "pass through" whatever is given, which is usually 

67 not necessary for schemas that you design from scratch. 

68 

69 * Prefer types from `phantom` over using pydantic `Field` settings for expressing 

70 simple value constraints (e.g. minimal/maximal value or collection length, etc.), 

71 because `phantom` types can be subclassed to narrow them down. 

72 

73 * In general, avoid using `Field` at all, except for defining an `alias` for 

74 attributes that are not valid as Python variables (e.g. `@id` or `$schema`). 

75 

76 * When using `Field`, make sure to annotate it with `typing_extensions.Annotated`, 

77 instead of assigning the `Field` object to the field name. 

78 

79 

80 Rules for schema versioning: 

81 

82 All schemas must be direct or indirect subclass of `MetadataSchema`. 

83 

84 Semantic versioning (MAJOR, MINOR, PATCH) is to be followed. 

85 Bumping a version component means incrementing it and resetting the 

86 later ones to 0. When updating a schema, you must bump: 

87 

88 * PATCH, if you do not modify the set of parsable instances, 

89 

90 * MINOR, if if your changes strictly increase parsable instances, 

91 

92 * MAJOR otherwise, i.e. some older metadata might not be valid anymore. 

93 

94 If you update a nested or inherited schema to a version 

95 with higher X (MAJOR, MINOR or PATCH), the version 

96 of your schema must be bumped in X as well. 

97 

98 

99 Rules for schema subclassing: 

100 

101 A child schema that only extends a parent with new fields is safe. 

102 To schemas that redefine parent fields additional rules apply: 

103 

104 EACH instance of a schema MUST also be parsable by the parent schema 

105 

106 This means that a child schema may only override parent fields 

107 with more specific types, i.e., only RESTRICT the set of acceptable 

108 values compared to the parent field (safe examples include 

109 adding new or narrowing existing bounds and constraints, 

110 or excluding some values that are allowed by the parent schema). 

111 

112 As automatically verifying this in full generality is not feasible, but the 

113 ability to "restrict" fields is very much needed in practical use, the 

114 schema developer MUST create suitable represantative test cases that check 

115 whether this property is satisfied. 

116 

117 Try expressing field value restrictions by: 

118 

119 * removing alternatives from a `Union` 

120 * using a subclass of a schema or `phantom` type that was used in the parent 

121 

122 These can be automatically checked most of the time. 

123 """ 

124 

125 class Plugin: 

126 name = SCHEMA_GROUP_NAME 

127 version = (0, 1, 0) 

128 requires = [plugingroups.Plugin.ref()] 

129 plugin_class = MetadataSchema 

130 plugin_info_class = SchemaPlugin 

131 

132 def __post_init__(self): 

133 self._parent_schema: Dict[ 

134 Type[MetadataSchema], Optional[Type[MetadataSchema]] 

135 ] = {} 

136 self._parents: Dict[AnyPluginRef, List[AnyPluginRef]] = {} # base plugins 

137 self._children: Dict[AnyPluginRef, Set[AnyPluginRef]] = {} # subclass plugins 

138 

139 # used schemas inside schemas 

140 self._field_types: Dict[ 

141 Type[MetadataSchema], Dict[str, Set[Type[MetadataSchema]]] 

142 ] = {} 

143 self._subschemas: Dict[MetadataSchema, Set[MetadataSchema]] = {} 

144 

145 # partial schema classes 

146 self._partials: Dict[MetadataSchema, PartialModel] = {} 

147 self._forwardrefs: Dict[str, MetadataSchema] = {} 

148 

149 def plugin_deps(self, plugin) -> Set[AnyPluginRef]: 

150 self._parent_schema[plugin] = infer_parent(plugin) 

151 if pcls := self._parent_schema[plugin]: 

152 # make sure a parent schema plugin is initialized before the child 

153 info = pcls.Plugin 

154 return {self.PluginRef(name=info.name, version=info.version)} 

155 else: 

156 return set() 

157 

158 def check_plugin(self, name: str, plugin: Type[MetadataSchema]): 

159 check_types(plugin) # ensure that (overrides of) fields are valid 

160 

161 def _compute_parent_path(self, plugin: Type[MetadataSchema]) -> List[AnyPluginRef]: 

162 ref = plugin.Plugin.ref() 

163 ret = [ref] 

164 curr = plugin 

165 parent = self._parent_schema[curr] 

166 while parent is not None: 

167 p_ref = parent.Plugin.ref() 

168 ret.append(p_ref) 

169 curr = self._get_unsafe(p_ref.name, p_ref.version) 

170 parent = self._parent_schema[curr] 

171 

172 ret.reverse() 

173 return ret 

174 

175 def init_plugin(self, plugin): 

176 # pre-compute parent schema path 

177 ref = plugin.Plugin.ref() 

178 self._parents[ref] = self._compute_parent_path(plugin) 

179 if ref not in self._children: 

180 self._children[ref] = set() 

181 

182 # collect children schema set for all parents 

183 parents = self._parents[ref][:-1] 

184 for parent in parents: 

185 self._children[parent].add(ref) 

186 

187 # ---- 

188 

189 def parent_path( 

190 self, schema, version: Optional[SemVerTuple] = None 

191 ) -> List[AnyPluginRef]: 

192 """Get sequence of registered parent schema plugins leading to the given schema. 

193 

194 This sequence can be a subset of the parent sequences in the actual class 

195 hierarchy (not every subclass must be registered as a plugin). 

196 """ 

197 name, vers = pg.plugin_args(schema, version, require_version=True) 

198 ref = self.PluginRef(name=name, version=vers) 

199 self._ensure_is_loaded(ref) 

200 return list(self._parents[ref]) 

201 

202 def children( 

203 self, schema, version: Optional[SemVerTuple] = None 

204 ) -> Set[AnyPluginRef]: 

205 """Get set of names of registered (strict) child schemas.""" 

206 name, vers = pg.plugin_args(schema, version, require_version=True) 

207 ref = self.PluginRef(name=name, version=vers) 

208 self._ensure_is_loaded(ref) 

209 return set(self._children[ref]) 

210 

211 

212SchemaPlugin.update_forward_refs()