Coverage for src/somesy/codemeta/utils.py: 100%
50 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-10 14:33 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-08-10 14:33 +0000
1"""Helpers to work around issue with non-deterministic serialization."""
3import importlib.resources
4import json
5import logging
6from pathlib import Path
7from tempfile import NamedTemporaryFile
8from typing import Dict
10import rdflib
11import rdflib.compare
13from .exec import cff_to_codemeta
15log = logging.getLogger("somesy")
17# assembled context (manually downloaded and combined in a JSON array)
18_CM_CONTEXT_FILE = "codemeta_context_2023-04-19.json"
20# load codemeta context
21with importlib.resources.open_text(__package__, _CM_CONTEXT_FILE) as c:
22 _CM_CONTEXT = json.load(c)
24# expected URLs
25_codemeta_context = set(
26 [
27 "https://doi.org/10.5063/schema/codemeta-2.0",
28 "https://w3id.org/software-iodata",
29 "https://raw.githubusercontent.com/jantman/repostatus.org/"
30 "master/badges/latest/ontology.jsonld",
31 "https://schema.org",
32 "https://w3id.org/software-types",
33 ]
34)
37def _localize_codemetapy_context(json):
38 """Prevent rdflib external context resolution by embedding it from a file.
40 The context is required to parse the JSON-LD correctly, fields with no
41 context are ignored (not considered LD).
42 """
43 ctx = set(json.get("@context") or [])
44 if not ctx:
45 # probably empty or not codemeta, nothing to do
46 return json
48 if ctx != _codemeta_context:
49 msg = f"Unexpected codemeta context: {json['@context']}. Is this really from codemetapy?"
50 raise RuntimeError(msg)
52 ret = dict(json)
53 ret.update({"@context": _CM_CONTEXT})
55 return ret
58def serialize_codemeta(cm: Dict) -> str:
59 """Convert JSON Dict to str (using settings like codemetapy)."""
60 # using settings like in codemetapy
61 return json.dumps(cm, indent=4, ensure_ascii=False, sort_keys=True)
64def _graph_from_cm_dict(graph_dict):
65 """Returns codemeta with localized context from a dict produced by codemetapy."""
66 g = rdflib.Graph()
67 expanded = json.dumps(_localize_codemetapy_context(graph_dict))
68 g.parse(data=expanded, format="json-ld")
69 return g
72def _graph_from_cm_file(file: Path) -> rdflib.Graph:
73 """Returns loaded codemeta with localized context.
75 If file does not exist, returns `None` (to distinguish from existing but empty).
76 """
77 if file.is_file():
78 with open(file, "r") as f:
79 graph_dict = json.load(f)
80 return _graph_from_cm_dict(graph_dict)
83# ----
86def update_codemeta_file(cm_file: Path, cm_dict: Dict) -> bool:
87 """Update codemeta file with graph in dict if it changed.
89 Returns True if the file update happened.
90 """
91 old = _graph_from_cm_file(cm_file) or rdflib.Graph()
92 new = _graph_from_cm_dict(cm_dict)
94 if not rdflib.compare.isomorphic(old, new):
95 with open(cm_file, "w") as f:
96 f.write(serialize_codemeta(cm_dict))
97 return True
98 return False
101def cff_codemeta_tempfile(cff_file: Path):
102 """Returns named temporary file with codemeta export of citation file."""
103 cm_cff = cff_to_codemeta(cff_file)
104 temp_cff_cm = NamedTemporaryFile(prefix="cff_cm_", suffix=".json")
105 temp_cff_cm.write(json.dumps(cm_cff).encode("utf-8"))
106 temp_cff_cm.flush() # needed, or it won't be readable yet
107 return temp_cff_cm