Skip to content

jsonschema

Hacks to improve pydantic JSON Schema generation.

KEY_SCHEMA_DEFS module-attribute

KEY_SCHEMA_DEFS = '$defs'

JSON schema key to store subschema definitions.

KEY_SCHEMA_HASH module-attribute

KEY_SCHEMA_HASH = '$jsonschema_hash'

Custom key to store schema hashsum.

JSONSCHEMA_STRIP module-attribute

JSONSCHEMA_STRIP = {
    "title",
    "description",
    "examples",
    "$comment",
    "readOnly",
    "writeOnly",
    "deprecated",
    "$id",
    "definitions",
    KEY_SCHEMA_DEFS,
    KEY_SCHEMA_HASH,
}

Fields to be removed for JSON Schema hashsum computation.

KEY_PYD_DEFS module-attribute

KEY_PYD_DEFS = 'definitions'

key name where pydantic stores subschema definitions.

REF_PREFIX module-attribute

REF_PREFIX = f'#/{KEY_PYD_DEFS}/'

default $refs prefix of pydantic.

jsonschema_id

jsonschema_id(schema: JSONType)

Compute robust semantic schema identifier.

A schema identifier is based on the schema plugin name + version and its JSON Schema representation, which includes all parent and nested schemas.

Source code in src/metador_core/schema/jsonschema.py
78
79
80
81
82
83
84
def jsonschema_id(schema: JSONType):
    """Compute robust semantic schema identifier.

    A schema identifier is based on the schema plugin name + version
    and its JSON Schema representation, which includes all parent and nested schemas.
    """
    return hashsum(normalized_json(clean_jsonschema(schema)), "sha256")

lift_nested_defs

lift_nested_defs(schema: JSONObject)

Flatten nested $defs ($defs -> key -> $defs) in-place.

Source code in src/metador_core/schema/jsonschema.py
90
91
92
93
94
95
96
97
98
99
def lift_nested_defs(schema: JSONObject):
    """Flatten nested $defs ($defs -> key -> $defs) in-place."""
    if mydefs := schema.get(KEY_SCHEMA_DEFS):
        inner = []
        for schema in mydefs.values():
            lift_nested_defs(schema)
            if nested := schema.pop(KEY_SCHEMA_DEFS, None):
                inner.append(nested)
        for nested in inner:
            mydefs.update(nested)

merge_nested_defs

merge_nested_defs(schema: JSONObject)

Merge definitions in-place.

Source code in src/metador_core/schema/jsonschema.py
109
110
111
112
113
114
115
116
117
118
def merge_nested_defs(schema: JSONObject):
    """Merge definitions in-place."""
    if defs := schema.pop(KEY_PYD_DEFS, None):
        my_defs = schema.get(KEY_SCHEMA_DEFS)
        if not my_defs:
            schema[KEY_SCHEMA_DEFS] = {}
            my_defs = schema[KEY_SCHEMA_DEFS]
        # update, by preserve existing
        defs.update(my_defs)
        my_defs.update(defs)

collect_defmap

collect_defmap(defs: JSONObject)

Compute dict mapping current name in $defs to new name based on metador_hash.

Source code in src/metador_core/schema/jsonschema.py
124
125
126
127
128
129
130
131
132
133
134
def collect_defmap(defs: JSONObject):
    """Compute dict mapping current name in $defs to new name based on metador_hash."""
    defmap = {}
    for name, subschema in defs.items():
        if KEY_SCHEMA_HASH in subschema:
            defmap[name] = subschema[KEY_SCHEMA_HASH].strip("/")
        else:
            # print("no hashsum: ", name)
            defmap[name] = name

    return defmap

map_ref

map_ref(defmap, refstr: str)

Update the $ref string based on defmap.

Will replace #/definitions/orig with #/$defs/mapped.

Source code in src/metador_core/schema/jsonschema.py
137
138
139
140
141
142
143
144
145
146
147
148
def map_ref(defmap, refstr: str):
    """Update the `$ref` string based on defmap.

    Will replace `#/definitions/orig`
    with `#/$defs/mapped`.
    """
    if refstr.startswith(REF_PREFIX):
        # print("remap", refstr)
        plen = len(REF_PREFIX)
        if new_name := defmap.get(refstr[plen:]):
            return f"#/{KEY_SCHEMA_DEFS}/{new_name}"
    return refstr

update_refs

update_refs(defmap, obj)

Recursively update $ref in obj based on defmap.

Source code in src/metador_core/schema/jsonschema.py
151
152
153
154
155
156
157
158
159
160
161
162
163
def update_refs(defmap, obj):
    """Recursively update `$ref` in `obj` based on defmap."""
    print("update", obj)
    if isinstance(obj, (type(None), bool, int, float, str)):
        return obj
    elif isinstance(obj, list):
        return list(map(partial(update_refs, defmap), obj))
    elif isinstance(obj, dict):
        return {
            k: (update_refs(defmap, v) if k != "$ref" else map_ref(defmap, v))
            for k, v in obj.items()
        }
    raise ValueError(f"Object {obj} not of a JSON type: {type(obj)}")

remap_refs

remap_refs(schema)

Remap the $refs to use metador_hash-based keys.

Input must be a completed schema with a global $defs section that all nested entities use for local references.

Source code in src/metador_core/schema/jsonschema.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def remap_refs(schema):
    """Remap the $refs to use metador_hash-based keys.

    Input must be a completed schema with a global `$defs` section
    that all nested entities use for local references.
    """
    defs = schema.pop(KEY_SCHEMA_DEFS, None)
    if not defs:  # nothing to do
        return schema

    # get name map, old -> new
    defmap = collect_defmap(defs)
    # update refs
    defs.update(update_refs(defmap, defs))
    schema.update(update_refs(defmap, schema))
    # rename defs
    schema[KEY_SCHEMA_DEFS] = {defmap[k]: v for k, v in defs.items()}

schema_of

schema_of(model: Type[BaseModel], *args, **kwargs)

Return JSON Schema for a model.

Improved version of pydantic.schema_of, returns result in $defs normal form, with $ref pointing to the model.

Source code in src/metador_core/schema/jsonschema.py
194
195
196
197
198
199
200
201
202
203
204
def schema_of(model: Type[BaseModel], *args, **kwargs):
    """Return JSON Schema for a model.

    Improved version of `pydantic.schema_of`, returns result
    in $defs normal form, with $ref pointing to the model.
    """
    schema = pyd_schema_of(model, *args, **kwargs)
    print(type(schema), schema)
    schema.pop("title", None)
    fixup_jsonschema(schema)
    return schema

schemas

schemas(models: Iterable[Type[BaseModel]], *args, **kwargs)

Return JSON Schema for multiple models.

Improved version of pydantic.schema.schema, returns result in $defs normal form.

Source code in src/metador_core/schema/jsonschema.py
207
208
209
210
211
212
213
214
215
def schemas(models: Iterable[Type[BaseModel]], *args, **kwargs):
    """Return JSON Schema for multiple models.

    Improved version of `pydantic.schema.schema`,
    returns result in $defs normal form.
    """
    schema = pyd_schemas(tuple(models), *args, **kwargs)
    fixup_jsonschema(schema)
    return schema

split_model_inheritance

split_model_inheritance(
    schema: JSONObject, model: Type[BaseModel]
)

Decompose a model into an allOf combination with a parent model.

This is ugly because pydantic does in-place wrangling and caching, and we need to hack around it.

Source code in src/metador_core/schema/jsonschema.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
def split_model_inheritance(schema: JSONObject, model: Type[BaseModel]):
    """Decompose a model into an allOf combination with a parent model.

    This is ugly because pydantic does in-place wrangling and caching,
    and we need to hack around it.
    """
    # NOTE: important - we assume to get the $defs standard form
    # print("want schema of", model.__base__.__name__)
    base_schema = model.__base__.schema()  # type: ignore

    # compute filtered properties / required section
    schema_new = dict(schema)
    ps = schema_new.pop("properties", None)
    rq = schema_new.pop("required", None)

    lst_fields = updated_fields(model)
    ps_new = {k: v for k, v in ps.items() if k in lst_fields}
    rq_new = None if not rq else [k for k in rq if k in ps_new]
    schema_this = {k: v for k, v in [("properties", ps_new), ("required", rq_new)] if v}

    # construct new schema as combination of base schema and remainder schema
    schema_new.update(
        {
            # "rdfs:subClassOf": f"/{base_id}",
            "allOf": [{"$ref": base_schema["$ref"]}, schema_this],
        }
    )

    # we need to add the definitions to/from the base schema as well
    if KEY_SCHEMA_DEFS not in schema_new:
        schema_new[KEY_SCHEMA_DEFS] = {}
    schema_new[KEY_SCHEMA_DEFS].update(base_schema.get(KEY_SCHEMA_DEFS, {}))

    schema.clear()
    schema.update(schema_new)

finalize_schema_extra

finalize_schema_extra(
    schema: JSONObject,
    model: Type[BaseModel],
    *,
    base_model: Type[BaseModel] = None
) -> None

Perform custom JSON Schema postprocessing.

To be called as last action in custom schema_extra method in the used base model.

Parameters:

Name Type Description Default
schema JSONObject

The JSON object containing the schema

required
model Type[BaseModel]

The underlying pydantic model

required
base_model Type[BaseModel]

The custom base model that this function is called for.

None
Source code in src/metador_core/schema/jsonschema.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def finalize_schema_extra(
    schema: JSONObject,
    model: Type[BaseModel],
    *,
    base_model: Type[BaseModel] = None,
) -> None:
    """Perform custom JSON Schema postprocessing.

    To be called as last action in custom schema_extra method in the used base model.

    Arguments:
        schema: The JSON object containing the schema
        model: The underlying pydantic model
        base_model: The custom base model that this function is called for.
    """
    base_model = base_model or BaseModel
    assert issubclass(model, base_model)

    # a schema should have a specified standard
    schema["$schema"] = "https://json-schema.org/draft/2020-12/schema"

    if model.__base__ is not base_model:
        # tricky part: de-duplicate fields from parent class
        split_model_inheritance(schema, model)

    # do this last, because it needs everything else to compute the correct hashsum:
    schema[KEY_SCHEMA_HASH] = f"{jsonschema_id(schema)}"
    fixup_jsonschema(schema)