Coverage for src/metador_core/rdf/skos.py: 0%

82 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-02 09:33 +0000

1"""Parse a SKOS ConceptScheme, explore it and generate enums for schemas. 

2 

3Assumptions: 

4* ConceptScheme collects Concepts via hasTopConcept 

5* Concepts have 0-1 broader Concept and 0-n narrower Concept 

6* Concepts define a prefLabel 

7* Concepts have a unique IRI (ideally resolving to the concept sub-graph) 

8 

9All of this applies to e.g. https://data.nist.gov/od/dm/nmrr/vocab 

10""" 

11from __future__ import annotations 

12 

13from enum import Enum 

14from typing import List, Optional, Tuple, Type, cast 

15 

16import rdflib 

17from rdflib.namespace import SKOS 

18 

19from ..util import pythonize_name 

20from .lib import GraphNode, RDFParser 

21 

22 

23class SemanticEnum(Enum): 

24 """Enum subclass for Enums generated from a semantic taxonomy.""" 

25 

26 __self__term__: Tuple[str, str] 

27 

28 

29class Concept(RDFParser): 

30 """A concept is a node in a taxonomy defining a term.""" 

31 

32 _depth: int = 0 

33 """Depth of the concept in the taxonomy forest.""" 

34 

35 id: str 

36 """ID of the concept (should be IRI resolving to concept definition).""" 

37 

38 prefLabel: str 

39 """Canonical name of the concept.""" 

40 

41 broader: Optional[Concept] 

42 """Unique more general concept in the taxonomy (unless it is a root).""" 

43 

44 narrower: List[Concept] 

45 """A list of more specific sub-concepts.""" 

46 

47 def __eq__(self, other): 

48 """Return whether two concept objects are equal. 

49 

50 For our purposes, concepts are the same if they come from the same graph 

51 and refer to the same IRI. 

52 """ 

53 return ( 

54 isinstance(other, Concept) 

55 and self.__wrapped__.graph == other.__wrapped__.graph 

56 and self.id == other.id 

57 ) 

58 

59 def new_subconcept(self, node: GraphNode): 

60 ret = Concept(node) 

61 ret._depth = self._depth + 1 

62 return ret 

63 

64 def new_superconcept(self, node: GraphNode): 

65 ret = Concept(node) 

66 ret._depth = self._depth - 1 

67 return ret 

68 

69 # ---- 

70 

71 def parse_id(self, node) -> str: 

72 assert isinstance(node, rdflib.URIRef) 

73 return node.toPython() 

74 

75 def parse_prefLabel(self, node: GraphNode) -> str: 

76 val = node.object(SKOS.prefLabel) 

77 assert val.is_literal() 

78 return val.value 

79 

80 def parse_broader(self, node: GraphNode) -> Optional[Concept]: 

81 if v := node.object(SKOS.broader): 

82 return self.new_superconcept(v) 

83 return None 

84 

85 def parse_narrower(self, node: GraphNode) -> List[Concept]: 

86 q = node.objects(SKOS.narrower) 

87 return list(map(self.new_subconcept, q)) 

88 

89 # ---- 

90 

91 def pretty_print( 

92 self, *, max_depth=None, indent: Optional[int] = None, indent_unit: str = "\t" 

93 ): 

94 indent = indent or 0 

95 line = f"{indent*indent_unit}{self.prefLabel} -> {self.id}" 

96 lines = [line] 

97 

98 if max_depth is None or max_depth > 0: 

99 max_depth_next = max_depth - 1 if max_depth else None 

100 lines += list( 

101 map( 

102 lambda x: x.pretty_print( 

103 indent=indent + 1, max_depth=max_depth_next 

104 ), 

105 self.narrower, 

106 ) 

107 ) 

108 

109 return "\n".join(lines) 

110 

111 def __str__(self): 

112 return self.pretty_print() 

113 

114 # ---- 

115 

116 @property 

117 def term(self): 

118 """Return (ID, string) pair for this concept.""" 

119 return (self.id, pythonize_name(self.prefLabel)) 

120 

121 def sub_terms(self, *, deep: bool = False): 

122 """Return dict of subconcepts (recursively, if desired).""" 

123 ret = dict(map(lambda x: x.term, self.narrower)) 

124 if deep: 

125 _, pyname = self.term 

126 ret.update( 

127 dict( 

128 t 

129 for dct in map(lambda x: x.sub_terms(deep=deep), self.narrower) 

130 for t in dct.items() 

131 ) 

132 ) 

133 return ret 

134 

135 def to_enum(self, deep: bool = False) -> Type[SemanticEnum]: 

136 """Return Enum with immediate child concepts as possible values.""" 

137 if deep: # pragma: no cover 

138 # TODO: think how to combine the enums in the best way 

139 raise NotImplementedError 

140 

141 ts = self.sub_terms(deep=deep) 

142 assert len(ts) == len( 

143 set(ts.values()) 

144 ) # expect that human values are also unique 

145 ret = cast( 

146 Type[SemanticEnum], 

147 SemanticEnum( # type: ignore 

148 f"{self.term[1].capitalize()}_Enum", {v: k for k, v in ts.items()} 

149 ), 

150 ) 

151 # useful information for "deep" mode: 

152 ret.__self_term__ = self.term # type: ignore 

153 return ret 

154 

155 

156class ConceptScheme(RDFParser): 

157 """A concept scheme points to the roots of a taxonomy forest. 

158 

159 The the top level concepts are assumed to be unrelated 

160 (otherwise they should be united by the broader super-concept). 

161 

162 For this reason, you cannot generate an enum on this level. 

163 """ 

164 

165 id: str 

166 hasTopConcept: List[Concept] 

167 

168 def parse_id(self, node): 

169 assert isinstance(node, rdflib.URIRef) 

170 return node.toPython() 

171 

172 def parse_hasTopConcept(self, node: GraphNode): 

173 q = node.objects(SKOS.hasTopConcept) 

174 return list(map(Concept, q)) 

175 

176 # ---- 

177 

178 def pretty_print(self, **kwargs): 

179 return "\n".join(map(lambda x: x.pretty_print(**kwargs), self.hasTopConcept)) 

180 

181 def __str__(self): 

182 return self.pretty_print()