Coverage for src/somesy/core/models.py: 95%

214 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-04-30 09:42 +0000

1"""Core models for the somesy package.""" 

2from __future__ import annotations 

3 

4import functools 

5import json 

6import re 

7from datetime import date 

8from pathlib import Path 

9from typing import Any, Dict, List, Optional 

10 

11from pydantic import BaseModel, Field, PrivateAttr, field_validator, model_validator 

12from rich.pretty import pretty_repr 

13from typing_extensions import Annotated 

14 

15from .core import get_input_content 

16from .log import SomesyLogLevel 

17from .types import ContributionTypeEnum, Country, HttpUrlStr, LicenseEnum 

18 

19# -------- 

20# Somesy configuration model 

21 

22 

23class SomesyBaseModel(BaseModel): 

24 """Customized pydantic BaseModel for somesy. 

25 

26 Apart from some general tweaks for better defaults, 

27 adds a private `_key_order` field, which is used to track the 

28 preferred order for serialization (usually coming from some existing input). 

29 

30 It can be set on an instance using the set_key_order method, 

31 and is preserved by `copy()`. 

32 

33 NOTE: The custom order is intended for leaf models (no further nested models), 

34 custom order will not work correctly across nesting layers. 

35 """ 

36 

37 model_config = dict( 

38 extra="forbid", 

39 validate_assignment=True, 

40 populate_by_name=True, 

41 str_strip_whitespace=True, 

42 str_min_length=1, 

43 ) 

44 

45 # ---- 

46 # Key order magic 

47 

48 _key_order: List[str] = PrivateAttr([]) 

49 """List of field names (NOT aliases!) in the order they should be written in.""" 

50 

51 @classmethod 

52 @functools.lru_cache() # compute once per class 

53 def _aliases(cls) -> Dict[str, str]: 

54 """Map back from alias field names to internal field names.""" 

55 return {v.alias or k: k for k, v in cls.model_fields.items()} 

56 

57 @classmethod 

58 def make_partial(cls, dct): 

59 """Construct unvalidated partial model from dict. 

60 

61 Handles aliases correctly, unlike `construct`. 

62 """ 

63 un_alias = cls._aliases() 

64 return cls.model_construct(**{un_alias.get(k) or k: v for k, v in dct.items()}) 

65 

66 def set_key_order(self, keys: List[str]): 

67 """Setter for custom key order used in serialization.""" 

68 un_alias = self._aliases() 

69 # make sure we use the _actual_ field names 

70 self._key_order = list(map(lambda k: un_alias.get(k) or k, keys)) 

71 

72 def model_copy(self, *args, **kwargs): 

73 """Patched copy method (to preserve custom key order).""" 

74 ret = super().model_copy(*args, **kwargs) 

75 ret.set_key_order(list(self._key_order)) 

76 return ret 

77 

78 @staticmethod 

79 def _patch_kwargs_defaults(kwargs): 

80 for key in ["exclude_defaults", "exclude_none", "exclude_unset"]: 

81 if not kwargs.get(key): 

82 kwargs[key] = True 

83 

84 def _reorder_dict(self, dct): 

85 """Return dict with patched key order (according to `self._key_order`). 

86 

87 Keys in `dct` not listed in `self._key_order` come after all others. 

88 

89 Used to patch up `model_dump()` and `model_dump_json()`. 

90 """ 

91 key_order = self._key_order or [] 

92 existing = set(key_order).intersection(set(dct.keys())) 

93 key_order = [k for k in key_order if k in existing] 

94 key_order += list(set(dct.keys()) - set(key_order)) 

95 return {k: dct[k] for k in key_order} 

96 

97 def model_dump(self, *args, **kwargs): 

98 """Patched dict method (to preserve custom key order).""" 

99 self._patch_kwargs_defaults(kwargs) 

100 by_alias = kwargs.pop("by_alias", False) 

101 

102 dct = super().model_dump(*args, **kwargs, by_alias=False) 

103 ret = self._reorder_dict(dct) 

104 

105 if by_alias: 

106 ret = {self.model_fields[k].alias or k: v for k, v in ret.items()} 

107 return ret 

108 

109 def model_dump_json(self, *args, **kwargs): 

110 """Patched json method (to preserve custom key order).""" 

111 self._patch_kwargs_defaults(kwargs) 

112 by_alias = kwargs.pop("by_alias", False) 

113 

114 # loop back json through dict to apply custom key order 

115 dct = json.loads(super().model_dump_json(*args, **kwargs, by_alias=False)) 

116 ret = self._reorder_dict(dct) 

117 

118 if by_alias: 

119 ret = {self.model_fields[k].alias or k: v for k, v in ret.items()} 

120 return json.dumps(ret, ensure_ascii=False) 

121 

122 

123_SOMESY_TARGETS = [ 

124 "cff", 

125 "pyproject", 

126 "package_json", 

127 "codemeta", 

128 "julia", 

129 "fortran", 

130 "pom_xml", 

131 "mkdocs", 

132 "rust", 

133] 

134 

135 

136class SomesyConfig(SomesyBaseModel): 

137 """Pydantic model for somesy tool configuration. 

138 

139 Note that all fields match CLI options, and CLI options will override the 

140 values declared in a somesy input file (such as `somesy.toml`). 

141 """ 

142 

143 @model_validator(mode="before") 

144 @classmethod 

145 def at_least_one_target(cls, values): 

146 """Check that at least one output file is enabled.""" 

147 if all(map(lambda x: values.get(f"no_sync_{x}"), _SOMESY_TARGETS)): 

148 msg = "No sync target enabled, nothing to do. Probably this is a mistake?" 

149 raise ValueError(msg) 

150 

151 return values 

152 

153 # cli flags 

154 show_info: Annotated[ 

155 bool, 

156 Field( 

157 description="Show basic information messages on run (-v flag).", 

158 ), 

159 ] = False 

160 verbose: Annotated[ 

161 bool, Field(description="Show verbose messages on run (-vv flag).") 

162 ] = False 

163 debug: Annotated[ 

164 bool, Field(description="Show debug messages on run (-vvv flag).") 

165 ] = False 

166 

167 input_file: Annotated[ 

168 Path, Field(description="Project metadata input file path.") 

169 ] = Path("somesy.toml") 

170 

171 no_sync_pyproject: Annotated[ 

172 bool, Field(description="Do not sync with pyproject.toml.") 

173 ] = False 

174 pyproject_file: Annotated[ 

175 Path, Field(description="pyproject.toml file path.") 

176 ] = Path("pyproject.toml") 

177 

178 no_sync_package_json: Annotated[ 

179 bool, Field(description="Do not sync with package.json.") 

180 ] = False 

181 package_json_file: Annotated[ 

182 Path, Field(description="package.json file path.") 

183 ] = Path("package.json") 

184 

185 no_sync_julia: Annotated[ 

186 bool, Field(description="Do not sync with Project.toml.") 

187 ] = False 

188 julia_file: Annotated[Path, Field(description="Project.toml file path.")] = Path( 

189 "Project.toml" 

190 ) 

191 

192 no_sync_fortran: Annotated[ 

193 bool, Field(description="Do not sync with fpm.toml.") 

194 ] = False 

195 fortran_file: Annotated[Path, Field(description="fpm.toml file path.")] = Path( 

196 "fpm.toml" 

197 ) 

198 

199 no_sync_pom_xml: Annotated[ 

200 bool, Field(description="Do not sync with pom.xml.") 

201 ] = False 

202 pom_xml_file: Annotated[Path, Field(description="pom.xml file path.")] = Path( 

203 "pom.xml" 

204 ) 

205 

206 no_sync_mkdocs: Annotated[ 

207 bool, Field(description="Do not sync with mkdocs.yml.") 

208 ] = False 

209 mkdocs_file: Annotated[Path, Field(description="mkdocs.yml file path.")] = Path( 

210 "mkdocs.yml" 

211 ) 

212 

213 no_sync_rust: Annotated[ 

214 bool, Field(description="Do not sync with Cargo.toml.") 

215 ] = False 

216 rust_file: Annotated[Path, Field(description="Cargo.toml file path.")] = Path( 

217 "Cargo.toml" 

218 ) 

219 

220 no_sync_cff: Annotated[bool, Field(description="Do not sync with CFF.")] = False 

221 cff_file: Annotated[Path, Field(description="CFF file path.")] = Path( 

222 "CITATION.cff" 

223 ) 

224 

225 no_sync_codemeta: Annotated[ 

226 bool, Field(description="Do not sync with codemeta.json.") 

227 ] = False 

228 codemeta_file: Annotated[ 

229 Path, Field(description="codemeta.json file path.") 

230 ] = Path("codemeta.json") 

231 

232 def log_level(self) -> SomesyLogLevel: 

233 """Return log level derived from this configuration.""" 

234 return SomesyLogLevel.from_flags( 

235 info=self.show_info, verbose=self.verbose, debug=self.debug 

236 ) 

237 

238 def update_log_level(self, log_level: SomesyLogLevel): 

239 """Update config flags according to passed log level.""" 

240 self.show_info = log_level == SomesyLogLevel.INFO 

241 self.verbose = log_level == SomesyLogLevel.VERBOSE 

242 self.debug = log_level == SomesyLogLevel.DEBUG 

243 

244 def get_input(self) -> SomesyInput: 

245 """Based on the somesy config, load the complete somesy input.""" 

246 # get metadata+config from specified input file 

247 somesy_input = SomesyInput.from_input_file(self.input_file) 

248 # update input with merged config settings (cli overrides config file) 

249 dct: Dict[str, Any] = {} 

250 dct.update(somesy_input.config or {}) 

251 dct.update(self.model_dump()) 

252 somesy_input.config = SomesyConfig(**dct) 

253 return somesy_input 

254 

255 

256# -------- 

257# Project metadata model (modified from CITATION.cff) 

258 

259 

260class Person(SomesyBaseModel): 

261 """Metadata abount a person in the context of a software project. 

262 

263 This schema is based on CITATION.cff 1.2, modified and extended for the needs of somesy. 

264 """ 

265 

266 # NOTE: we rely on the defined aliases for direct CITATION.cff interoperability. 

267 

268 orcid: Annotated[ 

269 Optional[HttpUrlStr], 

270 Field( 

271 description="The person's ORCID url **(not required, but highly suggested)**." 

272 ), 

273 ] = None 

274 

275 email: Annotated[ 

276 str, 

277 Field( 

278 pattern=r"^[\S]+@[\S]+\.[\S]{2,}$", 

279 description="The person's email address.", 

280 ), 

281 ] 

282 

283 family_names: Annotated[ 

284 str, Field(alias="family-names", description="The person's family names.") 

285 ] 

286 given_names: Annotated[ 

287 str, Field(alias="given-names", description="The person's given names.") 

288 ] 

289 

290 name_particle: Annotated[ 

291 Optional[str], 

292 Field( 

293 alias="name-particle", 

294 description="The person's name particle, e.g., a nobiliary particle or a preposition meaning 'of' or 'from'" 

295 " (for example 'von' in 'Alexander von Humboldt').", 

296 examples=["von"], 

297 ), 

298 ] = None 

299 name_suffix: Annotated[ 

300 Optional[str], 

301 Field( 

302 alias="name-suffix", 

303 description="The person's name-suffix, e.g. 'Jr.' for Sammy Davis Jr. or 'III' for Frank Edwin Wright III.", 

304 examples=["Jr.", "III"], 

305 ), 

306 ] = None 

307 alias: Annotated[Optional[str], Field(description="The person's alias.")] = None 

308 

309 affiliation: Annotated[ 

310 Optional[str], Field(description="The person's affiliation.") 

311 ] = None 

312 

313 address: Annotated[Optional[str], Field(description="The person's address.")] = None 

314 city: Annotated[Optional[str], Field(description="The person's city.")] = None 

315 country: Annotated[ 

316 Optional[Country], Field(description="The person's country.") 

317 ] = None 

318 fax: Annotated[Optional[str], Field(description="The person's fax number.")] = None 

319 post_code: Annotated[ 

320 Optional[str], Field(alias="post-code", description="The person's post-code.") 

321 ] = None 

322 region: Annotated[Optional[str], Field(description="The person's region.")] = None 

323 tel: Annotated[ 

324 Optional[str], Field(description="The person's phone number.") 

325 ] = None 

326 

327 # ---- 

328 # somesy-specific extensions 

329 author: Annotated[ 

330 bool, 

331 Field( 

332 description="Indicates whether the person is an author of the project (i.e. significant contributor)." 

333 ), 

334 ] = False 

335 publication_author: Annotated[ 

336 Optional[bool], 

337 Field( 

338 description="Indicates whether the person is to be listed as an author in academic citations." 

339 ), 

340 ] = None 

341 maintainer: Annotated[ 

342 bool, 

343 Field( 

344 description="Indicates whether the person is a maintainer of the project (i.e. for contact)." 

345 ), 

346 ] = False 

347 

348 # NOTE: CFF 1.3 (once done) might provide ways for refined contributor description. That should be implemented here. 

349 contribution: Annotated[ 

350 Optional[str], 

351 Field(description="Summary of how the person contributed to the project."), 

352 ] = None 

353 contribution_types: Annotated[ 

354 Optional[List[ContributionTypeEnum]], 

355 Field( 

356 description="Relevant types of contributions (see https://allcontributors.org/docs/de/emoji-key).", 

357 min_length=1, 

358 ), 

359 ] = None 

360 contribution_begin: Annotated[ 

361 Optional[date], Field(description="Beginning date of the contribution.") 

362 ] = None 

363 contribution_end: Annotated[ 

364 Optional[date], Field(description="Ending date of the contribution.") 

365 ] = None 

366 

367 @model_validator(mode="before") 

368 @classmethod 

369 def author_implies_publication(cls, values): 

370 """Ensure consistency of author and publication_author.""" 

371 if values.get("author"): 

372 # NOTE: explicitly check for False (different case from None = missing!) 

373 if values.get("publication_author") is False: 

374 msg = "Combining author=true and publication_author=false is invalid!" 

375 raise ValueError(msg) 

376 values["publication_author"] = True 

377 return values 

378 

379 # helper methods 

380 

381 @property 

382 def full_name(self) -> str: 

383 """Return the full name of the person.""" 

384 names = [] 

385 

386 if self.given_names: 

387 names.append(self.given_names) 

388 

389 if self.name_particle: 

390 names.append(self.name_particle) 

391 

392 if self.family_names: 

393 names.append(self.family_names) 

394 

395 if self.name_suffix: 

396 names.append(self.name_suffix) 

397 

398 return " ".join(names) if names else "" 

399 

400 def to_name_email_string(self) -> str: 

401 """Convert project metadata person object to poetry string for person format `full name <x@y.z>`.""" 

402 return f"{self.full_name} <{self.email}>" 

403 

404 @classmethod 

405 def from_name_email_string(cls, person: str) -> Person: 

406 """Return a `Person` based on an name/e-mail string like `full name <x@y.z>`. 

407 

408 If the name is `A B C`, then `A B` will be the given names and `C` will be the family name. 

409 """ 

410 m = re.match(r"\s*([^<]+)<([^>]+)>", person) 

411 names, mail = ( 

412 list(map(lambda s: s.strip(), m.group(1).split())), 

413 m.group(2).strip(), 

414 ) 

415 # NOTE: for our purposes, does not matter what are given or family names, 

416 # we only compare on full_name anyway. 

417 return Person( 

418 **{ 

419 "given-names": " ".join(names[:-1]), 

420 "family-names": names[-1], 

421 "email": mail, 

422 } 

423 ) 

424 

425 def same_person(self, other) -> bool: 

426 """Return whether two Person metadata records are about the same real person. 

427 

428 Uses heuristic match based on orcid, email and name (whichever are provided). 

429 """ 

430 if self.orcid is not None and other.orcid is not None: 

431 # having orcids is the best case, a real identifier 

432 # NOTE: converting to str from pydantic-internal Url object for == ! 

433 return str(self.orcid) == str(other.orcid) 

434 

435 # otherwise, try to match according to mail/name 

436 # sourcery skip: merge-nested-ifs 

437 if self.email is not None and other.email is not None: 

438 if self.email == other.email: 

439 # an email address belongs to exactly one person 

440 # => same email -> same person 

441 return True 

442 # otherwise, need to check name 

443 # (a person often has multiple email addresses) 

444 

445 # no orcids, no/distinct email address 

446 # -> decide based on full_name (which is always present) 

447 return self.full_name == other.full_name 

448 

449 

450class ProjectMetadata(SomesyBaseModel): 

451 """Pydantic model for Project Metadata Input.""" 

452 

453 model_config = dict(extra="ignore") 

454 

455 @field_validator("people") 

456 @classmethod 

457 def ensure_distinct_people(cls, people): 

458 """Make sure that no person is listed twice in the same person list.""" 

459 for i in range(len(people)): 

460 for j in range(i + 1, len(people)): 

461 if people[i].same_person(people[j]): 

462 p1 = pretty_repr(json.loads(people[i].model_dump_json())) 

463 p2 = pretty_repr(json.loads(people[j].model_dump_json())) 

464 msg = f"Same person is listed twice:\n{p1}\n{p2}" 

465 raise ValueError(msg) 

466 return people 

467 

468 @field_validator("people") 

469 @classmethod 

470 def at_least_one_author(cls, people): 

471 """Make sure there is at least one author.""" 

472 if not any(map(lambda p: p.author, people)): 

473 raise ValueError("At least one person must be an author of this project.") 

474 return people 

475 

476 name: Annotated[str, Field(description="Project name.")] 

477 description: Annotated[str, Field(description="Project description.")] 

478 version: Annotated[str, Field(description="Project version.")] 

479 license: Annotated[LicenseEnum, Field(description="SPDX License string.")] 

480 

481 homepage: Annotated[ 

482 Optional[HttpUrlStr], Field(description="URL of the project homepage.") 

483 ] = None 

484 repository: Annotated[ 

485 Optional[HttpUrlStr], 

486 Field(description="URL of the project source code repository."), 

487 ] = None 

488 documentation: Annotated[ 

489 Optional[HttpUrlStr], Field(description="URL of the project documentation.") 

490 ] = None 

491 

492 keywords: Annotated[ 

493 Optional[List[str]], 

494 Field(min_length=1, description="Keywords that describe the project."), 

495 ] = None 

496 

497 people: Annotated[ 

498 List[Person], 

499 Field( 

500 min_length=1, description="Project authors, maintainers and contributors." 

501 ), 

502 ] 

503 

504 def authors(self): 

505 """Return people explicitly marked as authors.""" 

506 return [p for p in self.people if p.author] 

507 

508 def publication_authors(self): 

509 """Return people marked as publication authors. 

510 

511 This always includes people marked as authors. 

512 """ 

513 # return an empty list if no publication authors are specified 

514 if not any(map(lambda p: p.publication_author, self.people)): 

515 return [] 

516 return [p for p in self.people if p.publication_author] 

517 

518 def maintainers(self): 

519 """Return people marked as maintainers.""" 

520 return [p for p in self.people if p.maintainer] 

521 

522 def contributors(self): 

523 """Return only people not marked as authors.""" 

524 return [p for p in self.people if not p.author] 

525 

526 

527class SomesyInput(SomesyBaseModel): 

528 """The complete somesy input file (`somesy.toml`) or section (`pyproject.toml`).""" 

529 

530 _origin: Optional[Path] 

531 

532 project: Annotated[ 

533 ProjectMetadata, 

534 Field(description="Project metadata to be used and synchronized."), 

535 ] 

536 config: Annotated[ 

537 Optional[SomesyConfig], 

538 Field( 

539 description="somesy tool configuration (matches CLI flags).", 

540 default_factory=lambda: SomesyConfig(), 

541 ), 

542 ] 

543 

544 def is_somesy_file(self) -> bool: 

545 """Return whether this somesy input is from a somesy config file. 

546 

547 That means, returns False if it is from pyproject.toml or package.json. 

548 """ 

549 return self.is_somesy_file_path(self._origin or Path(".")) 

550 

551 @classmethod 

552 def is_somesy_file_path(cls, path: Path) -> bool: 

553 """Return whether the path looks like a somesy config file. 

554 

555 That means, returns False if it is e.g. pyproject.toml or package.json. 

556 """ 

557 return str(path).endswith("somesy.toml") 

558 

559 @classmethod 

560 def from_input_file(cls, path: Path) -> SomesyInput: 

561 """Load somesy input from given file.""" 

562 content = get_input_content(path) 

563 ret = SomesyInput(**content) 

564 ret._origin = path 

565 return ret