Coverage for src/somesy/codemeta/utils.py: 100%

50 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-10 14:33 +0000

1"""Helpers to work around issue with non-deterministic serialization.""" 

2 

3import importlib.resources 

4import json 

5import logging 

6from pathlib import Path 

7from tempfile import NamedTemporaryFile 

8from typing import Dict 

9 

10import rdflib 

11import rdflib.compare 

12 

13from .exec import cff_to_codemeta 

14 

15log = logging.getLogger("somesy") 

16 

17# assembled context (manually downloaded and combined in a JSON array) 

18_CM_CONTEXT_FILE = "codemeta_context_2023-04-19.json" 

19 

20# load codemeta context 

21with importlib.resources.open_text(__package__, _CM_CONTEXT_FILE) as c: 

22 _CM_CONTEXT = json.load(c) 

23 

24# expected URLs 

25_codemeta_context = set( 

26 [ 

27 "https://doi.org/10.5063/schema/codemeta-2.0", 

28 "https://w3id.org/software-iodata", 

29 "https://raw.githubusercontent.com/jantman/repostatus.org/" 

30 "master/badges/latest/ontology.jsonld", 

31 "https://schema.org", 

32 "https://w3id.org/software-types", 

33 ] 

34) 

35 

36 

37def _localize_codemetapy_context(json): 

38 """Prevent rdflib external context resolution by embedding it from a file. 

39 

40 The context is required to parse the JSON-LD correctly, fields with no 

41 context are ignored (not considered LD). 

42 """ 

43 ctx = set(json.get("@context") or []) 

44 if not ctx: 

45 # probably empty or not codemeta, nothing to do 

46 return json 

47 

48 if ctx != _codemeta_context: 

49 msg = f"Unexpected codemeta context: {json['@context']}. Is this really from codemetapy?" 

50 raise RuntimeError(msg) 

51 

52 ret = dict(json) 

53 ret.update({"@context": _CM_CONTEXT}) 

54 

55 return ret 

56 

57 

58def serialize_codemeta(cm: Dict) -> str: 

59 """Convert JSON Dict to str (using settings like codemetapy).""" 

60 # using settings like in codemetapy 

61 return json.dumps(cm, indent=4, ensure_ascii=False, sort_keys=True) 

62 

63 

64def _graph_from_cm_dict(graph_dict): 

65 """Returns codemeta with localized context from a dict produced by codemetapy.""" 

66 g = rdflib.Graph() 

67 expanded = json.dumps(_localize_codemetapy_context(graph_dict)) 

68 g.parse(data=expanded, format="json-ld") 

69 return g 

70 

71 

72def _graph_from_cm_file(file: Path) -> rdflib.Graph: 

73 """Returns loaded codemeta with localized context. 

74 

75 If file does not exist, returns `None` (to distinguish from existing but empty). 

76 """ 

77 if file.is_file(): 

78 with open(file, "r") as f: 

79 graph_dict = json.load(f) 

80 return _graph_from_cm_dict(graph_dict) 

81 

82 

83# ---- 

84 

85 

86def update_codemeta_file(cm_file: Path, cm_dict: Dict) -> bool: 

87 """Update codemeta file with graph in dict if it changed. 

88 

89 Returns True if the file update happened. 

90 """ 

91 old = _graph_from_cm_file(cm_file) or rdflib.Graph() 

92 new = _graph_from_cm_dict(cm_dict) 

93 

94 if not rdflib.compare.isomorphic(old, new): 

95 with open(cm_file, "w") as f: 

96 f.write(serialize_codemeta(cm_dict)) 

97 return True 

98 return False 

99 

100 

101def cff_codemeta_tempfile(cff_file: Path): 

102 """Returns named temporary file with codemeta export of citation file.""" 

103 cm_cff = cff_to_codemeta(cff_file) 

104 temp_cff_cm = NamedTemporaryFile(prefix="cff_cm_", suffix=".json") 

105 temp_cff_cm.write(json.dumps(cm_cff).encode("utf-8")) 

106 temp_cff_cm.flush() # needed, or it won't be readable yet 

107 return temp_cff_cm