Coverage for src/metador_core/schema/base.py: 100%

45 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-02 09:33 +0000

1import json 

2from pathlib import Path 

3from typing import Union 

4 

5from pydantic import BaseModel, Extra, ValidationError 

6from pydantic_yaml import parse_yaml_file_as, parse_yaml_raw_as, to_yaml_str 

7 

8from .encoder import DynEncoderModelMetaclass 

9from .parser import ParserMixin 

10 

11 

12def _mod_def_dump_args(kwargs): 

13 """Set `by_alias=True` in given kwargs dict, if not set explicitly.""" 

14 if "by_alias" not in kwargs: 

15 kwargs["by_alias"] = True # e.g. so we get correct @id, etc fields 

16 if "exclude_none" not in kwargs: 

17 kwargs["exclude_none"] = True # we treat None as "missing" so leave it out 

18 return kwargs 

19 

20 

21class BaseModelPlus(ParserMixin, BaseModel, metaclass=DynEncoderModelMetaclass): 

22 """Extended pydantic BaseModel with some good defaults. 

23 

24 Used as basis for various entities, including: 

25 * Metadata schemas 

26 * Harvester arguments 

27 """ 

28 

29 class Config: 

30 # keep extra fields by default 

31 extra = Extra.allow 

32 # make PrivateAttr wrappers not always needed 

33 underscore_attrs_are_private = True 

34 # serialize enums properly 

35 use_enum_values = True 

36 # when alias is set, still allow using field name 

37 # (we use aliases for invalid attribute names in Python) 

38 allow_population_by_field_name = True 

39 # users should jump through hoops to add invalid stuff 

40 validate_assignment = True 

41 # defaults should also be validated 

42 validate_all = True 

43 # for JSON compat 

44 allow_inf_nan = False 

45 # pydantic anystr config: non-empty, non-whitespace 

46 # (but we prefer NonEmptyStr anyway for inheritance) 

47 anystr_strip_whitespace = True 

48 min_anystr_length = 1 

49 

50 def dict(self, *args, **kwargs): 

51 """Return a dict. 

52 

53 Nota that this will eliminate all pydantic models, 

54 but might still contain complex value types. 

55 """ 

56 return super().dict(*args, **_mod_def_dump_args(kwargs)) 

57 

58 def json(self, *args, **kwargs) -> str: 

59 """Return serialized JSON as string.""" 

60 return super().json(*args, **_mod_def_dump_args(kwargs)) 

61 

62 def json_dict(self, **kwargs): 

63 """Return a JSON-compatible dict. 

64 

65 Uses round-trip through JSON serialization. 

66 """ 

67 return json.loads(self.json(**kwargs)) 

68 

69 def yaml(self, **kwargs) -> str: 

70 """Return serialized YAML as string.""" 

71 # Current way: use round trip through JSON to kick out non-JSON entities 

72 # (more elegant: allow ruamel yaml to reuse defined custom JSON dumpers) 

73 # tmp = self.json_dict(**_mod_def_dump_args(kwargs)) 

74 return to_yaml_str(self) 

75 

76 @classmethod 

77 def parse_file(cls, path: Union[str, Path]): 

78 return parse_yaml_file_as(cls, path) 

79 

80 @classmethod 

81 def parse_raw(cls, dat: Union[str, bytes], **kwargs): 

82 try: 

83 return super().parse_raw(dat, **kwargs) 

84 except ValidationError: 

85 return parse_yaml_raw_as(cls, dat) 

86 

87 def __bytes__(self) -> bytes: 

88 """Serialize to JSON and return UTF-8 encoded bytes to be written in a file.""" 

89 # add a newline, as otherwise behaviour with text editors will be confusing 

90 # (e.g. vim automatically adds a trailing newline that it hides) 

91 # https://stackoverflow.com/questions/729692/why-should-text-files-end-with-a-newline 

92 return (self.json() + "\n").encode(encoding="utf-8") 

93 

94 def __str__(self) -> str: 

95 return self.json(indent=2)