Coverage for src/dirschema/json/parse.py: 100%

56 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-08 15:24 +0000

1"""Helper functions to allow using JSON and YAML interchangably + take care of $refs.""" 

2 

3import io 

4import json 

5from pathlib import Path 

6from typing import Any, Dict, Optional 

7from urllib.request import urlopen 

8 

9from jsonref import JsonLoader, JsonRef 

10from ruamel.yaml import YAML 

11 

12yaml = YAML(typ="safe") 

13 

14 

15def to_uri( 

16 path: str, local_basedir: Optional[Path] = None, relative_prefix: str = "" 

17) -> str: 

18 """Given a path or URI, normalize it to an absolute path. 

19 

20 If the path is relative and without protocol, it is prefixed with `relative_prefix` 

21 before attempting to resolve it (by default equal to prepending `cwd://`) 

22 

23 If path is already http(s):// or file://... path, do nothing to it. 

24 If the path is absolute (starts with a slash), just prepend file:// 

25 If the path is cwd://, resolve based on CWD (even if starting with a slash) 

26 If the path is local://, resolve based on `local_basedir` (if missing, CWD is used) 

27 

28 Result is either http(s):// or a file:// path that can be read with urlopen. 

29 """ 

30 local_basedir = local_basedir or Path("") 

31 if str(path)[0] != "/" and str(path).find("://") < 0: 

32 path = relative_prefix + path 

33 

34 prot, rest = "", "" 

35 prs = str(path).split("://") 

36 if len(prs) == 1: 

37 rest = prs[0] 

38 else: 

39 prot, rest = prs 

40 

41 if prot.startswith(("http", "file")): 

42 return path # nothing to do 

43 elif prot == "local": 

44 # relative, but not to CWD, but a custom path 

45 rest = str((local_basedir / rest.lstrip("/")).absolute()) 

46 elif prot == "cwd": 

47 # like normal resolution of relative, 

48 # but absolute paths are still interpreted relative, 

49 # so cwd:// and cwd:/// are lead to the same results 

50 rest = str((Path(rest.lstrip("/"))).absolute()) 

51 elif prot == "": 

52 # relative paths are made absolute 

53 if not Path(rest).is_absolute(): 

54 rest = str((Path(rest)).absolute()) 

55 else: 

56 raise ValueError(f"Unknown protocol: {prot}") 

57 

58 return f"file://{rest}" 

59 

60 

61class ExtJsonLoader(JsonLoader): 

62 """Extends JsonLoader with capabilities. 

63 

64 Adds support for: 

65 

66 * loading YAML 

67 * resolving relative paths 

68 """ 

69 

70 def __init__( 

71 self, *, local_basedir: Optional[Path] = None, relative_prefix: str = "" 

72 ): 

73 """Initialize loader with URI resolution arguments.""" 

74 super().__init__() 

75 self.local_basedir = local_basedir 

76 self.rel_prefix = relative_prefix 

77 

78 def __call__(self, uri: str, **kwargs): 

79 """Try loading passed uri as YAML if loading as JSON fails.""" 

80 uri = to_uri(uri, self.local_basedir, self.rel_prefix) # normalize path/uri 

81 try: 

82 return super().__call__(uri, **kwargs) 

83 except json.JSONDecodeError: 

84 strval = urlopen(uri).read().decode("utf-8") # nosec 

85 res = yaml.load(io.StringIO(strval, **kwargs)) 

86 if self.cache_results: 

87 self.store[uri] = res 

88 return res 

89 

90 

91def loads_json_or_yaml(dat: str): 

92 """Parse a JSON or YAML object from a string.""" 

93 try: 

94 return json.loads(dat) 

95 except json.JSONDecodeError: 

96 return yaml.load(io.StringIO(dat)) 

97 

98 

99def init_loader(kwargs): 

100 """Initialize JSON/YAML loader from passed kwargs dict, removing its arguments.""" 

101 return ExtJsonLoader( 

102 local_basedir=kwargs.pop("local_basedir", None), 

103 relative_prefix=kwargs.pop("relative_prefix", ""), 

104 ) 

105 

106 

107def loads_json(dat: str, **kwargs) -> Dict[str, Any]: 

108 """Load YAML/JSON from a string, resolving all refs, both local and remote.""" 

109 ldr = init_loader(kwargs) 

110 return JsonRef.replace_refs(loads_json_or_yaml(dat), loader=ldr, **kwargs) 

111 

112 

113def load_json(uri: str, **kwargs) -> Dict[str, Any]: 

114 """Load YAML/JSON from file/network + resolve all refs, both local and remote.""" 

115 ldr = init_loader(kwargs) 

116 return JsonRef.replace_refs(ldr(str(uri)), loader=ldr, **kwargs)