Coverage for src/somesy/core/models.py: 95%

214 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-07-29 07:42 +0000

1"""Core models for the somesy package.""" 

2 

3from __future__ import annotations 

4 

5import functools 

6import json 

7import re 

8from datetime import date 

9from pathlib import Path 

10from typing import Any, Dict, List, Optional 

11 

12from pydantic import BaseModel, Field, PrivateAttr, field_validator, model_validator 

13from rich.pretty import pretty_repr 

14from typing_extensions import Annotated 

15 

16from .core import get_input_content 

17from .log import SomesyLogLevel 

18from .types import ContributionTypeEnum, Country, HttpUrlStr, LicenseEnum 

19 

20# -------- 

21# Somesy configuration model 

22 

23 

24class SomesyBaseModel(BaseModel): 

25 """Customized pydantic BaseModel for somesy. 

26 

27 Apart from some general tweaks for better defaults, 

28 adds a private `_key_order` field, which is used to track the 

29 preferred order for serialization (usually coming from some existing input). 

30 

31 It can be set on an instance using the set_key_order method, 

32 and is preserved by `copy()`. 

33 

34 NOTE: The custom order is intended for leaf models (no further nested models), 

35 custom order will not work correctly across nesting layers. 

36 """ 

37 

38 model_config = dict( 

39 extra="forbid", 

40 validate_assignment=True, 

41 populate_by_name=True, 

42 str_strip_whitespace=True, 

43 str_min_length=1, 

44 ) 

45 

46 # ---- 

47 # Key order magic 

48 

49 _key_order: List[str] = PrivateAttr([]) 

50 """List of field names (NOT aliases!) in the order they should be written in.""" 

51 

52 @classmethod 

53 @functools.lru_cache() # compute once per class 

54 def _aliases(cls) -> Dict[str, str]: 

55 """Map back from alias field names to internal field names.""" 

56 return {v.alias or k: k for k, v in cls.model_fields.items()} 

57 

58 @classmethod 

59 def make_partial(cls, dct): 

60 """Construct unvalidated partial model from dict. 

61 

62 Handles aliases correctly, unlike `construct`. 

63 """ 

64 un_alias = cls._aliases() 

65 return cls.model_construct(**{un_alias.get(k) or k: v for k, v in dct.items()}) 

66 

67 def set_key_order(self, keys: List[str]): 

68 """Setter for custom key order used in serialization.""" 

69 un_alias = self._aliases() 

70 # make sure we use the _actual_ field names 

71 self._key_order = list(map(lambda k: un_alias.get(k) or k, keys)) 

72 

73 def model_copy(self, *args, **kwargs): 

74 """Patched copy method (to preserve custom key order).""" 

75 ret = super().model_copy(*args, **kwargs) 

76 ret.set_key_order(list(self._key_order)) 

77 return ret 

78 

79 @staticmethod 

80 def _patch_kwargs_defaults(kwargs): 

81 for key in ["exclude_defaults", "exclude_none", "exclude_unset"]: 

82 if not kwargs.get(key): 

83 kwargs[key] = True 

84 

85 def _reorder_dict(self, dct): 

86 """Return dict with patched key order (according to `self._key_order`). 

87 

88 Keys in `dct` not listed in `self._key_order` come after all others. 

89 

90 Used to patch up `model_dump()` and `model_dump_json()`. 

91 """ 

92 key_order = self._key_order or [] 

93 existing = set(key_order).intersection(set(dct.keys())) 

94 key_order = [k for k in key_order if k in existing] 

95 key_order += list(set(dct.keys()) - set(key_order)) 

96 return {k: dct[k] for k in key_order} 

97 

98 def model_dump(self, *args, **kwargs): 

99 """Patched dict method (to preserve custom key order).""" 

100 self._patch_kwargs_defaults(kwargs) 

101 by_alias = kwargs.pop("by_alias", False) 

102 

103 dct = super().model_dump(*args, **kwargs, by_alias=False) 

104 ret = self._reorder_dict(dct) 

105 

106 if by_alias: 

107 ret = {self.model_fields[k].alias or k: v for k, v in ret.items()} 

108 return ret 

109 

110 def model_dump_json(self, *args, **kwargs): 

111 """Patched json method (to preserve custom key order).""" 

112 self._patch_kwargs_defaults(kwargs) 

113 by_alias = kwargs.pop("by_alias", False) 

114 

115 # loop back json through dict to apply custom key order 

116 dct = json.loads(super().model_dump_json(*args, **kwargs, by_alias=False)) 

117 ret = self._reorder_dict(dct) 

118 

119 if by_alias: 

120 ret = {self.model_fields[k].alias or k: v for k, v in ret.items()} 

121 return json.dumps(ret, ensure_ascii=False) 

122 

123 

124_SOMESY_TARGETS = [ 

125 "cff", 

126 "pyproject", 

127 "package_json", 

128 "codemeta", 

129 "julia", 

130 "fortran", 

131 "pom_xml", 

132 "mkdocs", 

133 "rust", 

134] 

135 

136 

137class SomesyConfig(SomesyBaseModel): 

138 """Pydantic model for somesy tool configuration. 

139 

140 Note that all fields match CLI options, and CLI options will override the 

141 values declared in a somesy input file (such as `somesy.toml`). 

142 """ 

143 

144 @model_validator(mode="before") 

145 @classmethod 

146 def at_least_one_target(cls, values): 

147 """Check that at least one output file is enabled.""" 

148 if all(map(lambda x: values.get(f"no_sync_{x}"), _SOMESY_TARGETS)): 

149 msg = "No sync target enabled, nothing to do. Probably this is a mistake?" 

150 raise ValueError(msg) 

151 

152 return values 

153 

154 # cli flags 

155 show_info: Annotated[ 

156 bool, 

157 Field( 

158 description="Show basic information messages on run (-v flag).", 

159 ), 

160 ] = False 

161 verbose: Annotated[ 

162 bool, Field(description="Show verbose messages on run (-vv flag).") 

163 ] = False 

164 debug: Annotated[ 

165 bool, Field(description="Show debug messages on run (-vvv flag).") 

166 ] = False 

167 

168 input_file: Annotated[ 

169 Path, Field(description="Project metadata input file path.") 

170 ] = Path("somesy.toml") 

171 

172 no_sync_pyproject: Annotated[ 

173 bool, Field(description="Do not sync with pyproject.toml.") 

174 ] = False 

175 pyproject_file: Annotated[Path, Field(description="pyproject.toml file path.")] = ( 

176 Path("pyproject.toml") 

177 ) 

178 

179 no_sync_package_json: Annotated[ 

180 bool, Field(description="Do not sync with package.json.") 

181 ] = False 

182 package_json_file: Annotated[Path, Field(description="package.json file path.")] = ( 

183 Path("package.json") 

184 ) 

185 

186 no_sync_julia: Annotated[ 

187 bool, Field(description="Do not sync with Project.toml.") 

188 ] = False 

189 julia_file: Annotated[Path, Field(description="Project.toml file path.")] = Path( 

190 "Project.toml" 

191 ) 

192 

193 no_sync_fortran: Annotated[ 

194 bool, Field(description="Do not sync with fpm.toml.") 

195 ] = False 

196 fortran_file: Annotated[Path, Field(description="fpm.toml file path.")] = Path( 

197 "fpm.toml" 

198 ) 

199 

200 no_sync_pom_xml: Annotated[bool, Field(description="Do not sync with pom.xml.")] = ( 

201 False 

202 ) 

203 pom_xml_file: Annotated[Path, Field(description="pom.xml file path.")] = Path( 

204 "pom.xml" 

205 ) 

206 

207 no_sync_mkdocs: Annotated[ 

208 bool, Field(description="Do not sync with mkdocs.yml.") 

209 ] = False 

210 mkdocs_file: Annotated[Path, Field(description="mkdocs.yml file path.")] = Path( 

211 "mkdocs.yml" 

212 ) 

213 

214 no_sync_rust: Annotated[bool, Field(description="Do not sync with Cargo.toml.")] = ( 

215 False 

216 ) 

217 rust_file: Annotated[Path, Field(description="Cargo.toml file path.")] = Path( 

218 "Cargo.toml" 

219 ) 

220 

221 no_sync_cff: Annotated[bool, Field(description="Do not sync with CFF.")] = False 

222 cff_file: Annotated[Path, Field(description="CFF file path.")] = Path( 

223 "CITATION.cff" 

224 ) 

225 

226 no_sync_codemeta: Annotated[ 

227 bool, Field(description="Do not sync with codemeta.json.") 

228 ] = False 

229 codemeta_file: Annotated[Path, Field(description="codemeta.json file path.")] = ( 

230 Path("codemeta.json") 

231 ) 

232 

233 def log_level(self) -> SomesyLogLevel: 

234 """Return log level derived from this configuration.""" 

235 return SomesyLogLevel.from_flags( 

236 info=self.show_info, verbose=self.verbose, debug=self.debug 

237 ) 

238 

239 def update_log_level(self, log_level: SomesyLogLevel): 

240 """Update config flags according to passed log level.""" 

241 self.show_info = log_level == SomesyLogLevel.INFO 

242 self.verbose = log_level == SomesyLogLevel.VERBOSE 

243 self.debug = log_level == SomesyLogLevel.DEBUG 

244 

245 def get_input(self) -> SomesyInput: 

246 """Based on the somesy config, load the complete somesy input.""" 

247 # get metadata+config from specified input file 

248 somesy_input = SomesyInput.from_input_file(self.input_file) 

249 # update input with merged config settings (cli overrides config file) 

250 dct: Dict[str, Any] = {} 

251 dct.update(somesy_input.config or {}) 

252 dct.update(self.model_dump()) 

253 somesy_input.config = SomesyConfig(**dct) 

254 return somesy_input 

255 

256 

257# -------- 

258# Project metadata model (modified from CITATION.cff) 

259 

260 

261class Person(SomesyBaseModel): 

262 """Metadata abount a person in the context of a software project. 

263 

264 This schema is based on CITATION.cff 1.2, modified and extended for the needs of somesy. 

265 """ 

266 

267 # NOTE: we rely on the defined aliases for direct CITATION.cff interoperability. 

268 

269 orcid: Annotated[ 

270 Optional[HttpUrlStr], 

271 Field( 

272 description="The person's ORCID url **(not required, but highly suggested)**." 

273 ), 

274 ] = None 

275 

276 email: Annotated[ 

277 str, 

278 Field( 

279 pattern=r"^[\S]+@[\S]+\.[\S]{2,}$", 

280 description="The person's email address.", 

281 ), 

282 ] 

283 

284 family_names: Annotated[ 

285 str, Field(alias="family-names", description="The person's family names.") 

286 ] 

287 given_names: Annotated[ 

288 str, Field(alias="given-names", description="The person's given names.") 

289 ] 

290 

291 name_particle: Annotated[ 

292 Optional[str], 

293 Field( 

294 alias="name-particle", 

295 description="The person's name particle, e.g., a nobiliary particle or a preposition meaning 'of' or 'from'" 

296 " (for example 'von' in 'Alexander von Humboldt').", 

297 examples=["von"], 

298 ), 

299 ] = None 

300 name_suffix: Annotated[ 

301 Optional[str], 

302 Field( 

303 alias="name-suffix", 

304 description="The person's name-suffix, e.g. 'Jr.' for Sammy Davis Jr. or 'III' for Frank Edwin Wright III.", 

305 examples=["Jr.", "III"], 

306 ), 

307 ] = None 

308 alias: Annotated[Optional[str], Field(description="The person's alias.")] = None 

309 

310 affiliation: Annotated[ 

311 Optional[str], Field(description="The person's affiliation.") 

312 ] = None 

313 

314 address: Annotated[Optional[str], Field(description="The person's address.")] = None 

315 city: Annotated[Optional[str], Field(description="The person's city.")] = None 

316 country: Annotated[ 

317 Optional[Country], Field(description="The person's country.") 

318 ] = None 

319 fax: Annotated[Optional[str], Field(description="The person's fax number.")] = None 

320 post_code: Annotated[ 

321 Optional[str], Field(alias="post-code", description="The person's post-code.") 

322 ] = None 

323 region: Annotated[Optional[str], Field(description="The person's region.")] = None 

324 tel: Annotated[Optional[str], Field(description="The person's phone number.")] = ( 

325 None 

326 ) 

327 

328 # ---- 

329 # somesy-specific extensions 

330 author: Annotated[ 

331 bool, 

332 Field( 

333 description="Indicates whether the person is an author of the project (i.e. significant contributor)." 

334 ), 

335 ] = False 

336 publication_author: Annotated[ 

337 Optional[bool], 

338 Field( 

339 description="Indicates whether the person is to be listed as an author in academic citations." 

340 ), 

341 ] = None 

342 maintainer: Annotated[ 

343 bool, 

344 Field( 

345 description="Indicates whether the person is a maintainer of the project (i.e. for contact)." 

346 ), 

347 ] = False 

348 

349 # NOTE: CFF 1.3 (once done) might provide ways for refined contributor description. That should be implemented here. 

350 contribution: Annotated[ 

351 Optional[str], 

352 Field(description="Summary of how the person contributed to the project."), 

353 ] = None 

354 contribution_types: Annotated[ 

355 Optional[List[ContributionTypeEnum]], 

356 Field( 

357 description="Relevant types of contributions (see https://allcontributors.org/docs/de/emoji-key).", 

358 min_length=1, 

359 ), 

360 ] = None 

361 contribution_begin: Annotated[ 

362 Optional[date], Field(description="Beginning date of the contribution.") 

363 ] = None 

364 contribution_end: Annotated[ 

365 Optional[date], Field(description="Ending date of the contribution.") 

366 ] = None 

367 

368 @model_validator(mode="before") 

369 @classmethod 

370 def author_implies_publication(cls, values): 

371 """Ensure consistency of author and publication_author.""" 

372 if values.get("author"): 

373 # NOTE: explicitly check for False (different case from None = missing!) 

374 if values.get("publication_author") is False: 

375 msg = "Combining author=true and publication_author=false is invalid!" 

376 raise ValueError(msg) 

377 values["publication_author"] = True 

378 return values 

379 

380 # helper methods 

381 

382 @property 

383 def full_name(self) -> str: 

384 """Return the full name of the person.""" 

385 names = [] 

386 

387 if self.given_names: 

388 names.append(self.given_names) 

389 

390 if self.name_particle: 

391 names.append(self.name_particle) 

392 

393 if self.family_names: 

394 names.append(self.family_names) 

395 

396 if self.name_suffix: 

397 names.append(self.name_suffix) 

398 

399 return " ".join(names) if names else "" 

400 

401 def to_name_email_string(self) -> str: 

402 """Convert project metadata person object to poetry string for person format `full name <x@y.z>`.""" 

403 return f"{self.full_name} <{self.email}>" 

404 

405 @classmethod 

406 def from_name_email_string(cls, person: str) -> Person: 

407 """Return a `Person` based on an name/e-mail string like `full name <x@y.z>`. 

408 

409 If the name is `A B C`, then `A B` will be the given names and `C` will be the family name. 

410 """ 

411 m = re.match(r"\s*([^<]+)<([^>]+)>", person) 

412 names, mail = ( 

413 list(map(lambda s: s.strip(), m.group(1).split())), 

414 m.group(2).strip(), 

415 ) 

416 # NOTE: for our purposes, does not matter what are given or family names, 

417 # we only compare on full_name anyway. 

418 return Person( 

419 **{ 

420 "given-names": " ".join(names[:-1]), 

421 "family-names": names[-1], 

422 "email": mail, 

423 } 

424 ) 

425 

426 def same_person(self, other) -> bool: 

427 """Return whether two Person metadata records are about the same real person. 

428 

429 Uses heuristic match based on orcid, email and name (whichever are provided). 

430 """ 

431 if self.orcid is not None and other.orcid is not None: 

432 # having orcids is the best case, a real identifier 

433 # NOTE: converting to str from pydantic-internal Url object for == ! 

434 return str(self.orcid) == str(other.orcid) 

435 

436 # otherwise, try to match according to mail/name 

437 # sourcery skip: merge-nested-ifs 

438 if self.email is not None and other.email is not None: 

439 if self.email == other.email: 

440 # an email address belongs to exactly one person 

441 # => same email -> same person 

442 return True 

443 # otherwise, need to check name 

444 # (a person often has multiple email addresses) 

445 

446 # no orcids, no/distinct email address 

447 # -> decide based on full_name (which is always present) 

448 return self.full_name == other.full_name 

449 

450 

451class ProjectMetadata(SomesyBaseModel): 

452 """Pydantic model for Project Metadata Input.""" 

453 

454 model_config = dict(extra="ignore") 

455 

456 @field_validator("people") 

457 @classmethod 

458 def ensure_distinct_people(cls, people): 

459 """Make sure that no person is listed twice in the same person list.""" 

460 for i in range(len(people)): 

461 for j in range(i + 1, len(people)): 

462 if people[i].same_person(people[j]): 

463 p1 = pretty_repr(json.loads(people[i].model_dump_json())) 

464 p2 = pretty_repr(json.loads(people[j].model_dump_json())) 

465 msg = f"Same person is listed twice:\n{p1}\n{p2}" 

466 raise ValueError(msg) 

467 return people 

468 

469 @field_validator("people") 

470 @classmethod 

471 def at_least_one_author(cls, people): 

472 """Make sure there is at least one author.""" 

473 if not any(map(lambda p: p.author, people)): 

474 raise ValueError("At least one person must be an author of this project.") 

475 return people 

476 

477 name: Annotated[str, Field(description="Project name.")] 

478 description: Annotated[str, Field(description="Project description.")] 

479 version: Annotated[str, Field(description="Project version.")] 

480 license: Annotated[LicenseEnum, Field(description="SPDX License string.")] 

481 

482 homepage: Annotated[ 

483 Optional[HttpUrlStr], Field(description="URL of the project homepage.") 

484 ] = None 

485 repository: Annotated[ 

486 Optional[HttpUrlStr], 

487 Field(description="URL of the project source code repository."), 

488 ] = None 

489 documentation: Annotated[ 

490 Optional[HttpUrlStr], Field(description="URL of the project documentation.") 

491 ] = None 

492 

493 keywords: Annotated[ 

494 Optional[List[str]], 

495 Field(min_length=1, description="Keywords that describe the project."), 

496 ] = None 

497 

498 people: Annotated[ 

499 List[Person], 

500 Field( 

501 min_length=1, description="Project authors, maintainers and contributors." 

502 ), 

503 ] 

504 

505 def authors(self): 

506 """Return people explicitly marked as authors.""" 

507 return [p for p in self.people if p.author] 

508 

509 def publication_authors(self): 

510 """Return people marked as publication authors. 

511 

512 This always includes people marked as authors. 

513 """ 

514 # return an empty list if no publication authors are specified 

515 if not any(map(lambda p: p.publication_author, self.people)): 

516 return [] 

517 return [p for p in self.people if p.publication_author] 

518 

519 def maintainers(self): 

520 """Return people marked as maintainers.""" 

521 return [p for p in self.people if p.maintainer] 

522 

523 def contributors(self): 

524 """Return only people not marked as authors.""" 

525 return [p for p in self.people if not p.author] 

526 

527 

528class SomesyInput(SomesyBaseModel): 

529 """The complete somesy input file (`somesy.toml`) or section (`pyproject.toml`).""" 

530 

531 _origin: Optional[Path] 

532 

533 project: Annotated[ 

534 ProjectMetadata, 

535 Field(description="Project metadata to be used and synchronized."), 

536 ] 

537 config: Annotated[ 

538 Optional[SomesyConfig], 

539 Field( 

540 description="somesy tool configuration (matches CLI flags).", 

541 default_factory=lambda: SomesyConfig(), 

542 ), 

543 ] 

544 

545 def is_somesy_file(self) -> bool: 

546 """Return whether this somesy input is from a somesy config file. 

547 

548 That means, returns False if it is from pyproject.toml or package.json. 

549 """ 

550 return self.is_somesy_file_path(self._origin or Path(".")) 

551 

552 @classmethod 

553 def is_somesy_file_path(cls, path: Path) -> bool: 

554 """Return whether the path looks like a somesy config file. 

555 

556 That means, returns False if it is e.g. pyproject.toml or package.json. 

557 """ 

558 return str(path).endswith("somesy.toml") 

559 

560 @classmethod 

561 def from_input_file(cls, path: Path) -> SomesyInput: 

562 """Load somesy input from given file.""" 

563 content = get_input_content(path) 

564 ret = SomesyInput(**content) 

565 ret._origin = path 

566 return ret