Coverage for src/somesy/codemeta/utils.py: 85%

47 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2025-03-10 14:56 +0000

1"""Utility functions for codemeta.json.""" 

2 

3import copy 

4import json 

5import logging 

6from pathlib import Path 

7 

8from pyld import jsonld 

9 

10logger = logging.getLogger(__name__) 

11 

12V2_DOI = "https://doi.org/10.5063/schema/codemeta-2.0" 

13 

14 

15def validate_codemeta(codemeta: dict) -> list: 

16 """Validate the codemeta.json file against the codemeta.jsonld schema. 

17 

18 Args: 

19 codemeta (dict): codemeta.json file as a dictionary. 

20 

21 Returns: 

22 invalid_fields (list): List of invalid fields. 

23 

24 """ 

25 schema_path = Path(__file__).parent / "schema-2.jsonld" 

26 invalid_fields = [] 

27 

28 # Check for required fields 

29 required_fields = {"@context", "@type", "name"} 

30 missing_fields = [field for field in required_fields if field not in codemeta] 

31 if missing_fields: 

32 invalid_fields.extend(missing_fields) 

33 

34 # Validate @context 

35 codemeta_context = codemeta.get("@context", []) 

36 if isinstance(codemeta_context, str): 

37 codemeta_context = [codemeta_context] 

38 if V2_DOI not in codemeta_context: 

39 invalid_fields.append("@context") 

40 logger.warning( 

41 "The @context field in codemeta.json does not contain the Codemeta v2 DOI." 

42 ) 

43 

44 try: 

45 # Load Codemeta JSON-LD Schema 

46 with open(schema_path, "r", encoding="utf-8") as f: 

47 schema = json.load(f) 

48 

49 # Use schema's context to avoid network fetch issues 

50 schema_context = schema["@context"] 

51 codemeta_copy = copy.deepcopy(codemeta) 

52 codemeta_copy["@context"] = schema_context 

53 

54 # Expand and compact to validate terms 

55 expanded = jsonld.expand(codemeta_copy) 

56 compacted = jsonld.compact(expanded, schema_context) 

57 

58 # Check for unmapped fields (fields with ':' indicating schema prefix) 

59 compacted_keys = compacted.keys() 

60 for key in compacted_keys: 

61 if ":" in key: 

62 logger.error(f"Invalid schema reference found: {key}") 

63 invalid_fields.append(key) 

64 

65 # Check for unsupported terms by comparing original and compacted keys 

66 original_keys = set(codemeta.keys()) 

67 compacted_keys = set(compacted.keys()) 

68 

69 # Remove @type from comparison as it might be handled differently in compaction 

70 if "@type" in original_keys: 

71 original_keys.remove("@type") 

72 if "@type" in compacted_keys: 

73 compacted_keys.remove("@type") 

74 

75 unsupported_terms = original_keys - compacted_keys 

76 if unsupported_terms: 

77 logger.warning(f"Unsupported terms found: {sorted(unsupported_terms)}") 

78 invalid_fields.extend(unsupported_terms) 

79 

80 except Exception as e: 

81 logger.error(f"Codemeta validation failed: {e}") 

82 return ["Validation error"] 

83 

84 return list(set(invalid_fields)) # Remove duplicates