Coverage for src/metador_core/schema/common/schemaorg.py: 100%

161 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-02 09:33 +0000

1"""Schema.org-compatible common metadata schemas. 

2 

3Supports a subset of commonly useful fields. 

4 

5Adds almost no constraints beyond the spec, except for fixing a multiplicity for fields. 

6 

7Intended to serve as the basis for more specific schemas. 

8 

9Note that this schemas ARE NOT able to parse arbitrary schema.org-aligned metadata, 

10their purpose is to ensure that successfully parsed input is semantically enriched. 

11 

12See schema.org official documentation for full explanation and list of all fields. 

13""" 

14from __future__ import annotations 

15 

16from datetime import date, datetime, time 

17from typing import List, Optional, Set, Union 

18 

19from pydantic import AnyHttpUrl, NonNegativeInt 

20from typing_extensions import TypeAlias 

21 

22from ..ld import LDIdRef, LDOrRef, LDSchema, ld_decorator 

23from ..types import Bool, Duration, Float, Int, NonEmptyStr 

24 

25CTX_URL_SCHEMAORG = "https://schema.org" 

26 

27schemaorg = ld_decorator(context=CTX_URL_SCHEMAORG) 

28 

29# ---- 

30 

31URL: TypeAlias = AnyHttpUrl 

32Text: TypeAlias = NonEmptyStr 

33Number: TypeAlias = Union[Int, Float] 

34 

35DateOrDatetime = Union[date, datetime] 

36TimeOrDatetime = Union[time, datetime] 

37 

38# ---- 

39 

40 

41@schemaorg(type="Thing") 

42class Thing(LDSchema): 

43 """See https://schema.org/Thing for field documentation.""" 

44 

45 name: Optional[Text] 

46 """Name, title or caption of the entity.""" 

47 

48 identifier: Optional[Union[URL, Text]] # can't put PropertyValue here, weird bug 

49 """Arbitrary identifier of the entity. 

50 

51 Prefer @id if the identifier is web-resolvable, or use more 

52 specific fields if available.""" 

53 

54 url: Optional[URL] 

55 """URL of the entity.""" 

56 

57 description: Optional[Text] 

58 """Description of the entity.""" 

59 

60 # ---- 

61 

62 alternateName: Optional[List[Text]] 

63 """Known aliases of the entity.""" 

64 

65 sameAs: Optional[List[URL]] 

66 

67 

68class ValueCommon(Thing): 

69 """Common properties of multiple *Value classes. 

70 

71 For some reason these have no common ancestor in schema.org. 

72 """ 

73 

74 value: Optional[Union[Bool, Number, Text, StructuredValue]] 

75 

76 # valueReference: Optional[] 

77 

78 minValue: Optional[Number] 

79 """Minimal value of property this value corresponds to.""" 

80 

81 maxValue: Optional[Number] 

82 """Maximal value of property this value corresponds to.""" 

83 

84 unitCode: Optional[Union[URL, Text]] 

85 """UN/CEFACT Common Code (3 characters) or URL. 

86 

87 Other codes may be used with a prefix followed by a colon.""" 

88 

89 unitText: Optional[Text] 

90 """String indicating the unit of measurement. 

91 

92 Useful if no standard unitCode can be provided. 

93 """ 

94 

95 

96@schemaorg(type="StructuredValue") 

97class StructuredValue(ValueCommon): 

98 """See https://schema.org/StructuredValue for field documentation.""" 

99 

100 

101@schemaorg(type="QuantitativeValue") 

102class QuantitativeValue(StructuredValue): 

103 """See https://schema.org/QuantitativeValue for field documentation.""" 

104 

105 

106@schemaorg(type="PropertyValue") 

107class PropertyValue(StructuredValue): 

108 """Use 'name' for the property name and 'description' for alternative human-readable value. 

109 

110 See https://schema.org/PropertyValue for field documentation. 

111 """ 

112 

113 propertyID: Optional[Union[URL, Text]] 

114 """A commonly used identifier for the characteristic represented by the property, 

115 e.g. a manufacturer or a standard code for a property.""" 

116 

117 measurementTechnique: Optional[Union[URL, Text]] 

118 """A technique or technology used in a Dataset (or DataDownload, DataCatalog), 

119 corresponding to the method used for measuring the corresponding variable(s) 

120 (described using variableMeasured). 

121 

122 This is oriented towards scientific and scholarly dataset publication but 

123 may have broader applicability; it is not intended as a full representation 

124 of measurement, but rather as a high level summary for dataset discovery. 

125 """ 

126 

127 

128# ---- 

129 

130 

131@schemaorg(type="Organization") 

132class Organization(Thing): 

133 """See https://schema.org/Organization for field documentation.""" 

134 

135 address: Optional[Text] 

136 """Address of the organization.""" 

137 

138 

139@schemaorg(type="Person") 

140class Person(Thing): 

141 """See https://schema.org/Person for field documentation.""" 

142 

143 givenName: Optional[Text] 

144 """Given name, typically the first name of a Person.""" 

145 

146 familyName: Optional[Text] 

147 """Family name of a Person.""" 

148 

149 additionalName: Optional[Text] 

150 """Additional name for a Person, e.g. for a middle name.""" 

151 

152 email: Optional[Text] 

153 """E-mail address.""" 

154 

155 affiliation: Optional[LDOrRef[Organization]] 

156 """An organization this person is affiliated with.""" 

157 

158 

159OrgOrPerson = Union[Person, Organization] 

160 

161# ---- 

162 

163 

164@schemaorg(type="CreativeWork") 

165class CreativeWork(Thing): 

166 """See https://schema.org/CreativeWork for field documentation.""" 

167 

168 version: Optional[Union[NonNegativeInt, Text]] 

169 """Version of this work. 

170 

171 Either an integer, or a version string, e.g. "1.0.5". 

172 

173 When using version strings, follow https://semver.org 

174 whenever applicable. 

175 """ 

176 

177 citation: Optional[Set[Union[LDOrRef[CreativeWork], Text]]] 

178 """Citation or reference to another creative work, e.g. 

179 another publication, scholarly article, etc.""" 

180 

181 # search 

182 

183 abstract: Optional[Text] 

184 """A short description that summarizes the creative work.""" 

185 

186 keywords: Optional[Set[Text]] 

187 """Keywords or tags to describe this creative work.""" 

188 

189 # people 

190 

191 author: Optional[List[LDOrRef[OrgOrPerson]]] 

192 """People responsible for the work, e.g. in research, 

193 the people who would be authors on the relevant paper.""" 

194 

195 contributor: Optional[List[LDOrRef[OrgOrPerson]]] 

196 """Additional people who contributed to the work, e.g. 

197 in research, the people who would be in the acknowledgements 

198 section of the relevant paper.""" 

199 

200 maintainer: Optional[List[LDOrRef[OrgOrPerson]]] 

201 producer: Optional[List[LDOrRef[OrgOrPerson]]] 

202 provider: Optional[List[LDOrRef[OrgOrPerson]]] 

203 publisher: Optional[List[LDOrRef[OrgOrPerson]]] 

204 sponsor: Optional[List[LDOrRef[OrgOrPerson]]] 

205 editor: Optional[List[LDOrRef[Person]]] 

206 

207 # date 

208 

209 dateCreated: Optional[DateOrDatetime] 

210 dateModified: Optional[DateOrDatetime] 

211 datePublished: Optional[DateOrDatetime] 

212 

213 # legal 

214 

215 copyrightHolder: Optional[LDOrRef[OrgOrPerson]] 

216 copyrightYear: Optional[Int] 

217 copyrightNotice: Optional[Text] 

218 license: Optional[Union[URL, LDOrRef[CreativeWork]]] 

219 

220 # provenance 

221 

222 about: Optional[Set[LDOrRef[Thing]]] 

223 subjectOf: Optional[Set[LDOrRef[CreativeWork]]] 

224 hasPart: Optional[Set[LDOrRef[CreativeWork]]] 

225 isPartOf: Optional[Set[Union[URL, LDOrRef[CreativeWork]]]] 

226 isBasedOn: Optional[Set[Union[URL, LDOrRef[CreativeWork]]]] 

227 

228 

229# ---- 

230 

231 

232@schemaorg(type="DefinedTermSet") 

233class DefinedTermSet(CreativeWork): 

234 """See https://schema.org/DefinedTermSet for field documentation.""" 

235 

236 hasDefinedTerm: List[LDOrRef[DefinedTerm]] 

237 

238 

239@schemaorg(type="DefinedTerm") 

240class DefinedTerm(Thing): 

241 """See https://schema.org/DefinedTerm for field documentation.""" 

242 

243 # NOTE: also use name and description 

244 

245 termCode: Text 

246 """A code that identifies this DefinedTerm within a DefinedTermSet.""" 

247 

248 inDefinedTermSet: Optional[Union[URL, LDIdRef]] # ref to a DefinedTermSet 

249 """A DefinedTermSet that contains this term.""" 

250 

251 

252@schemaorg(type="CategoryCodeSet") 

253class CategoryCodeSet(DefinedTermSet): 

254 """See https://schema.org/CategoryCodeSet for field documentation.""" 

255 

256 hasCategoryCode: List[LDOrRef[CategoryCode]] 

257 

258 

259@schemaorg(type="CategoryCode") 

260class CategoryCode(DefinedTerm): 

261 """See https://schema.org/CategoryCode for field documentation.""" 

262 

263 codeValue: Text 

264 """A short textual code that uniquely identifies the value.""" 

265 

266 inCodeSet: Optional[Union[URL, LDIdRef]] # ref to a CategoryCodeSet 

267 """A CategoryCodeSet that contains this category code.""" 

268 

269 

270# ---- 

271 

272 

273@schemaorg(type="MediaObject") 

274class MediaObject(CreativeWork): 

275 """See https://schema.org/MediaObject for field documentation.""" 

276 

277 contentSize: Optional[Int] 

278 """Size of the object in bytes.""" 

279 

280 sha256: Optional[Text] 

281 """Sha256 hashsum string of the object.""" 

282 

283 encodingFormat: Optional[Union[URL, Text]] 

284 """MIME type, or if the format is too niche or no standard MIME type is 

285 defined, an URL pointing to a description of the format.""" 

286 

287 width: Optional[QuantitativeValue] 

288 """Width of the entity.""" 

289 

290 height: Optional[QuantitativeValue] 

291 """Height of the entity.""" 

292 

293 bitrate: Optional[Text] 

294 """Bitrate of the entity (e.g. for audio or video).""" 

295 

296 duration: Optional[Duration] 

297 """Duration of the entity (e.g. for audio or video).""" 

298 

299 startTime: Optional[TimeOrDatetime] 

300 """Physical starting time, e.g. of a recording or measurement.""" 

301 

302 endTime: Optional[TimeOrDatetime] 

303 """Physical ending time, e.g. of a recording or measurement.""" 

304 

305 

306@schemaorg(type="Dataset") 

307class Dataset(CreativeWork): 

308 """See https://schema.org/Dataset for field documentation.""" 

309 

310 distribution: Optional[URL] # NOTE: for top level description could link to repo 

311 """Downloadable form of this dataset, at a specific location, in a specific format.""" 

312 

313 variableMeasured: Optional[List[Union[Text, PropertyValue]]] 

314 """Variables that are measured in the dataset.""" 

315 

316 

317# ---- 

318 

319 

320@schemaorg(type="Product") 

321class Product(Thing): 

322 """See https://schema.org/Product for field documentation.""" 

323 

324 productID: Optional[Text] 

325 """The product identifier, such as ISBN.""" 

326 

327 # properties 

328 

329 category: Optional[Union[Text, URL, CategoryCode, Thing]] 

330 """A category for the item. 

331 

332 Greater signs or slashes can be used to informally indicate a category hierarchy. 

333 """ 

334 

335 material: Optional[Union[URL, Text, Product]] 

336 """A material that something is made from, e.g. leather, wool, cotton, paper.""" 

337 

338 pattern: Optional[Union[DefinedTerm, Text]] 

339 """A pattern that something has, for example 'polka dot', 'striped', 'Canadian flag'. 

340 

341 Values are typically expressed as text, although links to controlled value schemes are also supported. 

342 """ 

343 

344 width: Optional[QuantitativeValue] 

345 height: Optional[QuantitativeValue] 

346 depth: Optional[QuantitativeValue] 

347 

348 weight: Optional[QuantitativeValue] 

349 color: Optional[Text] 

350 

351 additionalProperty: Optional[List[PropertyValue]] 

352 """A property-value pair representing an additional characteristic of the entity, e.g. a product feature or another characteristic for which there is no matching property in schema.org.""" 

353 

354 # meta 

355 

356 productionDate: Optional[DateOrDatetime] 

357 releaseDate: Optional[DateOrDatetime] 

358 

359 isRelatedTo: Optional[LDOrRef[Product]] 

360 isSimilarTo: Optional[LDOrRef[Product]]