Coverage for src/somesy/core/models.py: 95%
219 statements
« prev ^ index » next coverage.py v7.6.0, created at 2025-01-15 10:14 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2025-01-15 10:14 +0000
1"""Core models for the somesy package."""
3from __future__ import annotations
5import functools
6import json
7import re
8from datetime import date
9from pathlib import Path
10from typing import Any, Dict, List, Optional
12from pydantic import BaseModel, Field, PrivateAttr, field_validator, model_validator
13from rich.pretty import pretty_repr
14from typing_extensions import Annotated
16from .core import get_input_content
17from .log import SomesyLogLevel
18from .types import ContributionTypeEnum, Country, HttpUrlStr, LicenseEnum
20# --------
21# Somesy configuration model
24class SomesyBaseModel(BaseModel):
25 """Customized pydantic BaseModel for somesy.
27 Apart from some general tweaks for better defaults,
28 adds a private `_key_order` field, which is used to track the
29 preferred order for serialization (usually coming from some existing input).
31 It can be set on an instance using the set_key_order method,
32 and is preserved by `copy()`.
34 NOTE: The custom order is intended for leaf models (no further nested models),
35 custom order will not work correctly across nesting layers.
36 """
38 model_config = dict(
39 extra="forbid",
40 validate_assignment=True,
41 populate_by_name=True,
42 str_strip_whitespace=True,
43 str_min_length=1,
44 )
46 # ----
47 # Key order magic
49 _key_order: List[str] = PrivateAttr([])
50 """List of field names (NOT aliases!) in the order they should be written in."""
52 @classmethod
53 @functools.lru_cache() # compute once per class
54 def _aliases(cls) -> Dict[str, str]:
55 """Map back from alias field names to internal field names."""
56 return {v.alias or k: k for k, v in cls.model_fields.items()}
58 @classmethod
59 def make_partial(cls, dct):
60 """Construct unvalidated partial model from dict.
62 Handles aliases correctly, unlike `construct`.
63 """
64 un_alias = cls._aliases()
65 return cls.model_construct(**{un_alias.get(k) or k: v for k, v in dct.items()})
67 def set_key_order(self, keys: List[str]):
68 """Setter for custom key order used in serialization."""
69 un_alias = self._aliases()
70 # make sure we use the _actual_ field names
71 self._key_order = list(map(lambda k: un_alias.get(k) or k, keys))
73 def model_copy(self, *args, **kwargs):
74 """Patched copy method (to preserve custom key order)."""
75 ret = super().model_copy(*args, **kwargs)
76 ret.set_key_order(list(self._key_order))
77 return ret
79 @staticmethod
80 def _patch_kwargs_defaults(kwargs):
81 for key in ["exclude_defaults", "exclude_none", "exclude_unset"]:
82 if not kwargs.get(key):
83 kwargs[key] = True
85 def _reorder_dict(self, dct):
86 """Return dict with patched key order (according to `self._key_order`).
88 Keys in `dct` not listed in `self._key_order` come after all others.
90 Used to patch up `model_dump()` and `model_dump_json()`.
91 """
92 key_order = self._key_order or []
93 existing = set(key_order).intersection(set(dct.keys()))
94 key_order = [k for k in key_order if k in existing]
95 key_order += list(set(dct.keys()) - set(key_order))
96 return {k: dct[k] for k in key_order}
98 def model_dump(self, *args, **kwargs):
99 """Patched dict method (to preserve custom key order)."""
100 self._patch_kwargs_defaults(kwargs)
101 by_alias = kwargs.pop("by_alias", False)
103 dct = super().model_dump(*args, **kwargs, by_alias=False)
104 ret = self._reorder_dict(dct)
106 if by_alias:
107 ret = {self.model_fields[k].alias or k: v for k, v in ret.items()}
108 return ret
110 def model_dump_json(self, *args, **kwargs):
111 """Patched json method (to preserve custom key order)."""
112 self._patch_kwargs_defaults(kwargs)
113 by_alias = kwargs.pop("by_alias", False)
115 # loop back json through dict to apply custom key order
116 dct = json.loads(super().model_dump_json(*args, **kwargs, by_alias=False))
117 ret = self._reorder_dict(dct)
119 if by_alias:
120 ret = {self.model_fields[k].alias or k: v for k, v in ret.items()}
121 return json.dumps(ret, ensure_ascii=False)
124_SOMESY_TARGETS = [
125 "cff",
126 "pyproject",
127 "package_json",
128 "codemeta",
129 "julia",
130 "fortran",
131 "pom_xml",
132 "mkdocs",
133 "rust",
134]
137class SomesyConfig(SomesyBaseModel):
138 """Pydantic model for somesy tool configuration.
140 Note that all fields match CLI options, and CLI options will override the
141 values declared in a somesy input file (such as `somesy.toml`).
142 """
144 @model_validator(mode="before")
145 @classmethod
146 def at_least_one_target(cls, values):
147 """Check that at least one output file is enabled."""
148 if all(map(lambda x: values.get(f"no_sync_{x}"), _SOMESY_TARGETS)):
149 msg = "No sync target enabled, nothing to do. Probably this is a mistake?"
150 raise ValueError(msg)
152 return values
154 # cli flags
155 show_info: Annotated[
156 bool,
157 Field(
158 description="Show basic information messages on run (-v flag).",
159 ),
160 ] = False
161 verbose: Annotated[
162 bool, Field(description="Show verbose messages on run (-vv flag).")
163 ] = False
164 debug: Annotated[
165 bool, Field(description="Show debug messages on run (-vvv flag).")
166 ] = False
168 input_file: Annotated[
169 Path, Field(description="Project metadata input file path.")
170 ] = Path("somesy.toml")
172 no_sync_pyproject: Annotated[
173 bool, Field(description="Do not sync with pyproject.toml.")
174 ] = False
175 pyproject_file: Annotated[Path, Field(description="pyproject.toml file path.")] = (
176 Path("pyproject.toml")
177 )
179 no_sync_package_json: Annotated[
180 bool, Field(description="Do not sync with package.json.")
181 ] = False
182 package_json_file: Annotated[Path, Field(description="package.json file path.")] = (
183 Path("package.json")
184 )
186 no_sync_julia: Annotated[
187 bool, Field(description="Do not sync with Project.toml.")
188 ] = False
189 julia_file: Annotated[Path, Field(description="Project.toml file path.")] = Path(
190 "Project.toml"
191 )
193 no_sync_fortran: Annotated[
194 bool, Field(description="Do not sync with fpm.toml.")
195 ] = False
196 fortran_file: Annotated[Path, Field(description="fpm.toml file path.")] = Path(
197 "fpm.toml"
198 )
200 no_sync_pom_xml: Annotated[bool, Field(description="Do not sync with pom.xml.")] = (
201 False
202 )
203 pom_xml_file: Annotated[Path, Field(description="pom.xml file path.")] = Path(
204 "pom.xml"
205 )
207 no_sync_mkdocs: Annotated[
208 bool, Field(description="Do not sync with mkdocs.yml.")
209 ] = False
210 mkdocs_file: Annotated[Path, Field(description="mkdocs.yml file path.")] = Path(
211 "mkdocs.yml"
212 )
214 no_sync_rust: Annotated[bool, Field(description="Do not sync with Cargo.toml.")] = (
215 False
216 )
217 rust_file: Annotated[Path, Field(description="Cargo.toml file path.")] = Path(
218 "Cargo.toml"
219 )
221 no_sync_cff: Annotated[bool, Field(description="Do not sync with CFF.")] = False
222 cff_file: Annotated[Path, Field(description="CFF file path.")] = Path(
223 "CITATION.cff"
224 )
226 no_sync_codemeta: Annotated[
227 bool, Field(description="Do not sync with codemeta.json.")
228 ] = False
229 codemeta_file: Annotated[Path, Field(description="codemeta.json file path.")] = (
230 Path("codemeta.json")
231 )
233 def log_level(self) -> SomesyLogLevel:
234 """Return log level derived from this configuration."""
235 return SomesyLogLevel.from_flags(
236 info=self.show_info, verbose=self.verbose, debug=self.debug
237 )
239 def update_log_level(self, log_level: SomesyLogLevel):
240 """Update config flags according to passed log level."""
241 self.show_info = log_level == SomesyLogLevel.INFO
242 self.verbose = log_level == SomesyLogLevel.VERBOSE
243 self.debug = log_level == SomesyLogLevel.DEBUG
245 def get_input(self) -> SomesyInput:
246 """Based on the somesy config, load the complete somesy input."""
247 # get metadata+config from specified input file
248 somesy_input = SomesyInput.from_input_file(self.input_file)
249 # update input with merged config settings (cli overrides config file)
250 dct: Dict[str, Any] = {}
251 dct.update(somesy_input.config or {})
252 dct.update(self.model_dump())
253 somesy_input.config = SomesyConfig(**dct)
254 return somesy_input
257# --------
258# Project metadata model (modified from CITATION.cff)
261class Person(SomesyBaseModel):
262 """Metadata abount a person in the context of a software project.
264 This schema is based on CITATION.cff 1.2, modified and extended for the needs of somesy.
265 """
267 # NOTE: we rely on the defined aliases for direct CITATION.cff interoperability.
269 orcid: Annotated[
270 Optional[HttpUrlStr],
271 Field(
272 description="The person's ORCID url **(not required, but highly suggested)**."
273 ),
274 ] = None
276 email: Annotated[
277 Optional[str],
278 Field(
279 pattern=r"^[\S]+@[\S]+\.[\S]{2,}$",
280 description="The person's email address.",
281 ),
282 ] = None
284 family_names: Annotated[
285 str, Field(alias="family-names", description="The person's family names.")
286 ]
287 given_names: Annotated[
288 str, Field(alias="given-names", description="The person's given names.")
289 ]
291 name_particle: Annotated[
292 Optional[str],
293 Field(
294 alias="name-particle",
295 description="The person's name particle, e.g., a nobiliary particle or a preposition meaning 'of' or 'from'"
296 " (for example 'von' in 'Alexander von Humboldt').",
297 examples=["von"],
298 ),
299 ] = None
300 name_suffix: Annotated[
301 Optional[str],
302 Field(
303 alias="name-suffix",
304 description="The person's name-suffix, e.g. 'Jr.' for Sammy Davis Jr. or 'III' for Frank Edwin Wright III.",
305 examples=["Jr.", "III"],
306 ),
307 ] = None
308 alias: Annotated[Optional[str], Field(description="The person's alias.")] = None
310 affiliation: Annotated[
311 Optional[str], Field(description="The person's affiliation.")
312 ] = None
314 address: Annotated[Optional[str], Field(description="The person's address.")] = None
315 city: Annotated[Optional[str], Field(description="The person's city.")] = None
316 country: Annotated[
317 Optional[Country], Field(description="The person's country.")
318 ] = None
319 fax: Annotated[Optional[str], Field(description="The person's fax number.")] = None
320 post_code: Annotated[
321 Optional[str], Field(alias="post-code", description="The person's post-code.")
322 ] = None
323 region: Annotated[Optional[str], Field(description="The person's region.")] = None
324 tel: Annotated[Optional[str], Field(description="The person's phone number.")] = (
325 None
326 )
328 # ----
329 # somesy-specific extensions
330 author: Annotated[
331 bool,
332 Field(
333 description="Indicates whether the person is an author of the project (i.e. significant contributor)."
334 ),
335 ] = False
336 publication_author: Annotated[
337 Optional[bool],
338 Field(
339 description="Indicates whether the person is to be listed as an author in academic citations."
340 ),
341 ] = None
342 maintainer: Annotated[
343 bool,
344 Field(
345 description="Indicates whether the person is a maintainer of the project (i.e. for contact)."
346 ),
347 ] = False
349 # NOTE: CFF 1.3 (once done) might provide ways for refined contributor description. That should be implemented here.
350 contribution: Annotated[
351 Optional[str],
352 Field(description="Summary of how the person contributed to the project."),
353 ] = None
354 contribution_types: Annotated[
355 Optional[List[ContributionTypeEnum]],
356 Field(
357 description="Relevant types of contributions (see https://allcontributors.org/docs/de/emoji-key).",
358 min_length=1,
359 ),
360 ] = None
361 contribution_begin: Annotated[
362 Optional[date], Field(description="Beginning date of the contribution.")
363 ] = None
364 contribution_end: Annotated[
365 Optional[date], Field(description="Ending date of the contribution.")
366 ] = None
368 @model_validator(mode="before")
369 @classmethod
370 def author_implies_publication(cls, values):
371 """Ensure consistency of author and publication_author."""
372 if values.get("author"):
373 # NOTE: explicitly check for False (different case from None = missing!)
374 if values.get("publication_author") is False:
375 msg = "Combining author=true and publication_author=false is invalid!"
376 raise ValueError(msg)
377 values["publication_author"] = True
378 return values
380 # helper methods
382 @property
383 def full_name(self) -> str:
384 """Return the full name of the person."""
385 names = []
387 if self.given_names:
388 names.append(self.given_names)
390 if self.name_particle:
391 names.append(self.name_particle)
393 if self.family_names:
394 names.append(self.family_names)
396 if self.name_suffix:
397 names.append(self.name_suffix)
399 return " ".join(names) if names else ""
401 def to_name_email_string(self) -> str:
402 """Convert project metadata person object to poetry string for person format `full name <x@y.z>`."""
403 if self.email:
404 return f"{self.full_name} <{self.email}>"
405 else:
406 return self.full_name
408 @classmethod
409 def from_name_email_string(cls, person: str) -> Person:
410 """Return a `Person` based on an name/e-mail string like `full name <x@y.z>`.
412 If the name is `A B C`, then `A B` will be the given names and `C` will be the family name.
413 """
414 m = re.match(r"\s*([^<]+)<([^>]+)>", person)
415 if m is None:
416 names = list(map(lambda s: s.strip(), person.split()))
417 return Person(
418 **{
419 "given-names": " ".join(names[:-1]),
420 "family-names": names[-1],
421 }
422 )
423 names, mail = (
424 list(map(lambda s: s.strip(), m.group(1).split())),
425 m.group(2).strip(),
426 )
427 # NOTE: for our purposes, does not matter what are given or family names,
428 # we only compare on full_name anyway.
429 return Person(
430 **{
431 "given-names": " ".join(names[:-1]),
432 "family-names": names[-1],
433 "email": mail,
434 }
435 )
437 def same_person(self, other) -> bool:
438 """Return whether two Person metadata records are about the same real person.
440 Uses heuristic match based on orcid, email and name (whichever are provided).
441 """
442 if self.orcid is not None and other.orcid is not None:
443 # having orcids is the best case, a real identifier
444 # NOTE: converting to str from pydantic-internal Url object for == !
445 return str(self.orcid) == str(other.orcid)
447 # otherwise, try to match according to mail/name
448 # sourcery skip: merge-nested-ifs
449 if self.email is not None and other.email is not None:
450 if self.email == other.email:
451 # an email address belongs to exactly one person
452 # => same email -> same person
453 return True
454 # otherwise, need to check name
455 # (a person often has multiple email addresses)
457 # no orcids, no/distinct email address
458 # -> decide based on full_name (which is always present)
459 return self.full_name == other.full_name
462class ProjectMetadata(SomesyBaseModel):
463 """Pydantic model for Project Metadata Input."""
465 model_config = dict(extra="ignore")
467 @field_validator("people")
468 @classmethod
469 def ensure_distinct_people(cls, people):
470 """Make sure that no person is listed twice in the same person list."""
471 for i in range(len(people)):
472 for j in range(i + 1, len(people)):
473 if people[i].same_person(people[j]):
474 p1 = pretty_repr(json.loads(people[i].model_dump_json()))
475 p2 = pretty_repr(json.loads(people[j].model_dump_json()))
476 msg = f"Same person is listed twice:\n{p1}\n{p2}"
477 raise ValueError(msg)
478 return people
480 @field_validator("people")
481 @classmethod
482 def at_least_one_author(cls, people):
483 """Make sure there is at least one author."""
484 if not any(map(lambda p: p.author, people)):
485 raise ValueError("At least one person must be an author of this project.")
486 return people
488 name: Annotated[str, Field(description="Project name.")]
489 description: Annotated[str, Field(description="Project description.")]
490 version: Annotated[str, Field(description="Project version.")]
491 license: Annotated[LicenseEnum, Field(description="SPDX License string.")]
493 homepage: Annotated[
494 Optional[HttpUrlStr], Field(description="URL of the project homepage.")
495 ] = None
496 repository: Annotated[
497 Optional[HttpUrlStr],
498 Field(description="URL of the project source code repository."),
499 ] = None
500 documentation: Annotated[
501 Optional[HttpUrlStr], Field(description="URL of the project documentation.")
502 ] = None
504 keywords: Annotated[
505 Optional[List[str]],
506 Field(min_length=1, description="Keywords that describe the project."),
507 ] = None
509 people: Annotated[
510 List[Person],
511 Field(
512 min_length=1, description="Project authors, maintainers and contributors."
513 ),
514 ]
516 def authors(self):
517 """Return people explicitly marked as authors."""
518 return [p for p in self.people if p.author]
520 def publication_authors(self):
521 """Return people marked as publication authors.
523 This always includes people marked as authors.
524 """
525 # return an empty list if no publication authors are specified
526 if not any(map(lambda p: p.publication_author, self.people)):
527 return []
528 return [p for p in self.people if p.publication_author]
530 def maintainers(self):
531 """Return people marked as maintainers."""
532 return [p for p in self.people if p.maintainer]
534 def contributors(self):
535 """Return only people not marked as authors."""
536 return [p for p in self.people if not p.author]
539class SomesyInput(SomesyBaseModel):
540 """The complete somesy input file (`somesy.toml`) or section (`pyproject.toml`)."""
542 _origin: Optional[Path]
544 project: Annotated[
545 ProjectMetadata,
546 Field(description="Project metadata to be used and synchronized."),
547 ]
548 config: Annotated[
549 Optional[SomesyConfig],
550 Field(
551 description="somesy tool configuration (matches CLI flags).",
552 default_factory=lambda: SomesyConfig(),
553 ),
554 ]
556 def is_somesy_file(self) -> bool:
557 """Return whether this somesy input is from a somesy config file.
559 That means, returns False if it is from pyproject.toml or package.json.
560 """
561 return self.is_somesy_file_path(self._origin or Path("."))
563 @classmethod
564 def is_somesy_file_path(cls, path: Path) -> bool:
565 """Return whether the path looks like a somesy config file.
567 That means, returns False if it is e.g. pyproject.toml or package.json.
568 """
569 return str(path).endswith("somesy.toml")
571 @classmethod
572 def from_input_file(cls, path: Path) -> SomesyInput:
573 """Load somesy input from given file."""
574 content = get_input_content(path)
575 ret = SomesyInput(**content)
576 ret._origin = path
577 return ret