Coverage for src/somesy/codemeta/utils.py: 85%
47 statements
« prev ^ index » next coverage.py v7.6.0, created at 2025-03-10 14:56 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2025-03-10 14:56 +0000
1"""Utility functions for codemeta.json."""
3import copy
4import json
5import logging
6from pathlib import Path
8from pyld import jsonld
10logger = logging.getLogger(__name__)
12V2_DOI = "https://doi.org/10.5063/schema/codemeta-2.0"
15def validate_codemeta(codemeta: dict) -> list:
16 """Validate the codemeta.json file against the codemeta.jsonld schema.
18 Args:
19 codemeta (dict): codemeta.json file as a dictionary.
21 Returns:
22 invalid_fields (list): List of invalid fields.
24 """
25 schema_path = Path(__file__).parent / "schema-2.jsonld"
26 invalid_fields = []
28 # Check for required fields
29 required_fields = {"@context", "@type", "name"}
30 missing_fields = [field for field in required_fields if field not in codemeta]
31 if missing_fields:
32 invalid_fields.extend(missing_fields)
34 # Validate @context
35 codemeta_context = codemeta.get("@context", [])
36 if isinstance(codemeta_context, str):
37 codemeta_context = [codemeta_context]
38 if V2_DOI not in codemeta_context:
39 invalid_fields.append("@context")
40 logger.warning(
41 "The @context field in codemeta.json does not contain the Codemeta v2 DOI."
42 )
44 try:
45 # Load Codemeta JSON-LD Schema
46 with open(schema_path, "r", encoding="utf-8") as f:
47 schema = json.load(f)
49 # Use schema's context to avoid network fetch issues
50 schema_context = schema["@context"]
51 codemeta_copy = copy.deepcopy(codemeta)
52 codemeta_copy["@context"] = schema_context
54 # Expand and compact to validate terms
55 expanded = jsonld.expand(codemeta_copy)
56 compacted = jsonld.compact(expanded, schema_context)
58 # Check for unmapped fields (fields with ':' indicating schema prefix)
59 compacted_keys = compacted.keys()
60 for key in compacted_keys:
61 if ":" in key:
62 logger.error(f"Invalid schema reference found: {key}")
63 invalid_fields.append(key)
65 # Check for unsupported terms by comparing original and compacted keys
66 original_keys = set(codemeta.keys())
67 compacted_keys = set(compacted.keys())
69 # Remove @type from comparison as it might be handled differently in compaction
70 if "@type" in original_keys:
71 original_keys.remove("@type")
72 if "@type" in compacted_keys:
73 compacted_keys.remove("@type")
75 unsupported_terms = original_keys - compacted_keys
76 if unsupported_terms:
77 logger.warning(f"Unsupported terms found: {sorted(unsupported_terms)}")
78 invalid_fields.extend(unsupported_terms)
80 except Exception as e:
81 logger.error(f"Codemeta validation failed: {e}")
82 return ["Validation error"]
84 return list(set(invalid_fields)) # Remove duplicates