Coverage for src/dev_utils/update_codemeta.py: 98%

54 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-26 10:13 +0000

1"""Shallow wrapper around codemetapy to use it with pre-commit.""" 

2import importlib.resources 

3import json 

4from pathlib import Path 

5from typing import List 

6 

7import rdflib 

8import rdflib.compare 

9import typer 

10from codemeta.codemeta import build 

11from codemeta.serializers.jsonld import serialize_to_jsonld 

12 

13# ---- 

14# Replicate basic codemetapy behavior (based on codemeta.codemeta.main module) 

15 

16 

17def _gen_codemeta(sources, *, with_entrypoints: bool = False): 

18 """Run codemeta file generation using Python API. 

19 

20 Returns JSON-LD dict. 

21 """ 

22 supp_inputs = set(["codemeta.json", "pyproject.toml"]) 

23 req_inputs = supp_inputs.intersection(set(sources)) 

24 eff_inputs = [p for p in req_inputs if Path(p).is_file()] 

25 print(Path(".").absolute()) 

26 print(Path("pyproject.toml").absolute()) 

27 print(Path("pyproject.toml").absolute().is_file()) 

28 print("req", req_inputs) 

29 print("eff", eff_inputs) 

30 

31 g, res, args, _ = build( 

32 inputsources=eff_inputs, 

33 output="json", 

34 with_entrypoints=with_entrypoints, 

35 ) 

36 return serialize_to_jsonld(g, res, args) 

37 

38 

39def _serialize_codemeta(cm) -> str: 

40 """Convert JSON Dict to str (using settings like codemetapy).""" 

41 # using settings like in codemetapy 

42 return json.dumps(cm, indent=4, ensure_ascii=False, sort_keys=True) 

43 

44 

45# ---- 

46# Helpers to work around issue with non-deterministic serialization 

47 

48# expected URLs 

49_codemeta_context = set( 

50 [ 

51 "https://doi.org/10.5063/schema/codemeta-2.0", 

52 "https://w3id.org/software-iodata", 

53 "https://raw.githubusercontent.com/jantman/repostatus.org/" 

54 "master/badges/latest/ontology.jsonld", 

55 "https://schema.org", 

56 "https://w3id.org/software-types", 

57 ] 

58) 

59 

60# assembled context (manually downloaded and combined in a JSON array) 

61_context_file = "codemeta_context_2023-04-19.json" 

62 

63with importlib.resources.open_text(__package__, _context_file) as c: 

64 cached_context = json.load(c) 

65 

66 

67def _localize_codemeta_context(json): 

68 """Prevent rdflib external context resolution by adding it from a file.""" 

69 ctx = set(json.get("@context") or []) 

70 if not ctx: 

71 return json # probably empty or not codemeta, nothing to do 

72 if ctx != _codemeta_context: 

73 raise RuntimeError(f"Unexpected codemeta context: {json['@context']}") 

74 ret = dict(json) 

75 ret.update({"@context": cached_context}) 

76 return ret 

77 

78 

79# ---- 

80# Wrapper CLI app 

81 

82app = typer.Typer() 

83 

84trg_arg = typer.Argument( 

85 ..., 

86 file_okay=True, 

87 dir_okay=False, 

88) 

89 

90src_arg = typer.Argument( 

91 ..., exists=True, file_okay=True, dir_okay=False, readable=True 

92) 

93 

94 

95@app.command( 

96 help=""" 

97Create or update the target codemeta file (first argument) 

98by running codemetapy with all the other passed arguments. 

99If the output is the same as before, will keep file unchanged. 

100""" 

101) 

102def update_codemeta( 

103 target: Path = trg_arg, 

104 sources: List[str] = src_arg, 

105): 

106 """Entry point of CLI application. 

107 

108 Runs codemetapy on the passed sources, 

109 compares resulting graph with target file (if it exists). 

110 

111 Only writes to the output if the metadata is not equivalent. 

112 The equivalence is checked on graph level using `rdflib`. 

113 

114 Args: 

115 target: Output file (usually `codemeta.json`) 

116 sources: Metadata input files (such as `pyproject.toml`) 

117 """ 

118 # load old codemeta graph (if any) 

119 old_metadata = rdflib.Graph() 

120 if target.is_file(): 

121 with open(target, "r") as f: 

122 dat = json.dumps(_localize_codemeta_context(json.load(f))) 

123 old_metadata.parse(data=dat, format="json-ld") 

124 

125 # generate new codemeta graph 

126 cm = _gen_codemeta(sources) 

127 original = _serialize_codemeta(cm) 

128 

129 # only write result to file if the graph changed 

130 expanded = _serialize_codemeta(_localize_codemeta_context(cm)) 

131 new_metadata = rdflib.Graph() 

132 new_metadata.parse(data=expanded, format="json-ld") 

133 if not rdflib.compare.isomorphic(old_metadata, new_metadata): 

134 typer.echo(f"Project metadata changed, writing {target} ...") 

135 with open(target, "w") as f: 

136 f.write(original)