Coverage for src/metador_core/rdf/skos.py: 0%
82 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
1"""Parse a SKOS ConceptScheme, explore it and generate enums for schemas.
3Assumptions:
4* ConceptScheme collects Concepts via hasTopConcept
5* Concepts have 0-1 broader Concept and 0-n narrower Concept
6* Concepts define a prefLabel
7* Concepts have a unique IRI (ideally resolving to the concept sub-graph)
9All of this applies to e.g. https://data.nist.gov/od/dm/nmrr/vocab
10"""
11from __future__ import annotations
13from enum import Enum
14from typing import List, Optional, Tuple, Type, cast
16import rdflib
17from rdflib.namespace import SKOS
19from ..util import pythonize_name
20from .lib import GraphNode, RDFParser
23class SemanticEnum(Enum):
24 """Enum subclass for Enums generated from a semantic taxonomy."""
26 __self__term__: Tuple[str, str]
29class Concept(RDFParser):
30 """A concept is a node in a taxonomy defining a term."""
32 _depth: int = 0
33 """Depth of the concept in the taxonomy forest."""
35 id: str
36 """ID of the concept (should be IRI resolving to concept definition)."""
38 prefLabel: str
39 """Canonical name of the concept."""
41 broader: Optional[Concept]
42 """Unique more general concept in the taxonomy (unless it is a root)."""
44 narrower: List[Concept]
45 """A list of more specific sub-concepts."""
47 def __eq__(self, other):
48 """Return whether two concept objects are equal.
50 For our purposes, concepts are the same if they come from the same graph
51 and refer to the same IRI.
52 """
53 return (
54 isinstance(other, Concept)
55 and self.__wrapped__.graph == other.__wrapped__.graph
56 and self.id == other.id
57 )
59 def new_subconcept(self, node: GraphNode):
60 ret = Concept(node)
61 ret._depth = self._depth + 1
62 return ret
64 def new_superconcept(self, node: GraphNode):
65 ret = Concept(node)
66 ret._depth = self._depth - 1
67 return ret
69 # ----
71 def parse_id(self, node) -> str:
72 assert isinstance(node, rdflib.URIRef)
73 return node.toPython()
75 def parse_prefLabel(self, node: GraphNode) -> str:
76 val = node.object(SKOS.prefLabel)
77 assert val.is_literal()
78 return val.value
80 def parse_broader(self, node: GraphNode) -> Optional[Concept]:
81 if v := node.object(SKOS.broader):
82 return self.new_superconcept(v)
83 return None
85 def parse_narrower(self, node: GraphNode) -> List[Concept]:
86 q = node.objects(SKOS.narrower)
87 return list(map(self.new_subconcept, q))
89 # ----
91 def pretty_print(
92 self, *, max_depth=None, indent: Optional[int] = None, indent_unit: str = "\t"
93 ):
94 indent = indent or 0
95 line = f"{indent*indent_unit}{self.prefLabel} -> {self.id}"
96 lines = [line]
98 if max_depth is None or max_depth > 0:
99 max_depth_next = max_depth - 1 if max_depth else None
100 lines += list(
101 map(
102 lambda x: x.pretty_print(
103 indent=indent + 1, max_depth=max_depth_next
104 ),
105 self.narrower,
106 )
107 )
109 return "\n".join(lines)
111 def __str__(self):
112 return self.pretty_print()
114 # ----
116 @property
117 def term(self):
118 """Return (ID, string) pair for this concept."""
119 return (self.id, pythonize_name(self.prefLabel))
121 def sub_terms(self, *, deep: bool = False):
122 """Return dict of subconcepts (recursively, if desired)."""
123 ret = dict(map(lambda x: x.term, self.narrower))
124 if deep:
125 _, pyname = self.term
126 ret.update(
127 dict(
128 t
129 for dct in map(lambda x: x.sub_terms(deep=deep), self.narrower)
130 for t in dct.items()
131 )
132 )
133 return ret
135 def to_enum(self, deep: bool = False) -> Type[SemanticEnum]:
136 """Return Enum with immediate child concepts as possible values."""
137 if deep: # pragma: no cover
138 # TODO: think how to combine the enums in the best way
139 raise NotImplementedError
141 ts = self.sub_terms(deep=deep)
142 assert len(ts) == len(
143 set(ts.values())
144 ) # expect that human values are also unique
145 ret = cast(
146 Type[SemanticEnum],
147 SemanticEnum( # type: ignore
148 f"{self.term[1].capitalize()}_Enum", {v: k for k, v in ts.items()}
149 ),
150 )
151 # useful information for "deep" mode:
152 ret.__self_term__ = self.term # type: ignore
153 return ret
156class ConceptScheme(RDFParser):
157 """A concept scheme points to the roots of a taxonomy forest.
159 The the top level concepts are assumed to be unrelated
160 (otherwise they should be united by the broader super-concept).
162 For this reason, you cannot generate an enum on this level.
163 """
165 id: str
166 hasTopConcept: List[Concept]
168 def parse_id(self, node):
169 assert isinstance(node, rdflib.URIRef)
170 return node.toPython()
172 def parse_hasTopConcept(self, node: GraphNode):
173 q = node.objects(SKOS.hasTopConcept)
174 return list(map(Concept, q))
176 # ----
178 def pretty_print(self, **kwargs):
179 return "\n".join(map(lambda x: x.pretty_print(**kwargs), self.hasTopConcept))
181 def __str__(self):
182 return self.pretty_print()