Coverage for src/metador_core/util/hashsums.py: 50%
64 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
1from __future__ import annotations
3import hashlib
4import os
5from io import BytesIO
6from pathlib import Path
7from typing import Any, BinaryIO, Dict, Optional, Union
9_hash_alg = {
10 # "md5": hashlib.md5,
11 # "sha1": hashlib.sha1,
12 "sha256": hashlib.sha256,
13 "sha512": hashlib.sha512,
14}
15"""Supported hashsum algorithms."""
18def hashsum(data: Union[bytes, BinaryIO], alg: str):
19 """Compute hashsum from given binary file stream using selected algorithm."""
20 if isinstance(data, bytes):
21 data = BytesIO(data)
22 try:
23 h = _hash_alg[alg]()
24 except KeyError:
25 raise ValueError(f"Unsupported hashsum: {alg}")
27 while True:
28 chunk = data.read(h.block_size)
29 if not chunk:
30 break
31 h.update(chunk)
33 return h.hexdigest()
36DEF_HASH_ALG = "sha256"
37"""Algorithm to use and string to prepend to a resulting hashsum."""
40def qualified_hashsum(data: Union[bytes, BinaryIO], alg: str = DEF_HASH_ALG):
41 """Like hashsum, but prepends the algorithm to the string."""
42 return f"{alg}:{hashsum(data, alg)}"
45def file_hashsum(path: Path, alg: str = DEF_HASH_ALG):
46 with open(path, "rb") as f:
47 return qualified_hashsum(f, alg)
50DirHashsums = Dict[str, Any]
51"""Nested dict representing a directory.
53str values represent files (checksum) or symlinks (target path),
54dict values represent sub-directories.
55"""
58def rel_symlink(base: Path, dir: Path) -> Optional[Path]:
59 """From base path and a symlink path, normalize it to be relative to base.
61 Mainly used to eliminate .. in paths.
63 If path points outside base, returns None.
64 """
65 path = dir.parent / os.readlink(str(dir))
66 try:
67 return path.resolve().relative_to(base.resolve())
68 except ValueError:
69 return None # link points outside of base directory
72def dir_hashsums(dir: Path, alg: str = DEF_HASH_ALG) -> DirHashsums:
73 """Return hashsums of all files.
75 Resulting paths are relative to the provided `dir`.
77 In-directory symlinks are treated like files and the target is stored
78 instead of computing a checksum.
80 Out-of-directory symlinks are not allowed.
81 """
82 ret: Dict[str, Any] = {}
83 for path in dir.rglob("*"):
84 is_file, is_sym = path.is_file(), path.is_symlink()
85 relpath = path.relative_to(dir)
87 fname = None
88 val = ""
90 if is_file or is_sym:
91 fname = relpath.name
92 relpath = relpath.parent # directory dicts to create = up to parent
94 if is_file:
95 val = file_hashsum(path, alg) # value = hashsum
96 elif is_sym:
97 sym_trg = rel_symlink(dir, path)
98 if sym_trg is None:
99 raise ValueError(f"Symlink inside '{dir}' points to the outside!")
100 val = "symlink:" + str(sym_trg) # value = symlink target
102 # create nested dicts, if not existing yet
103 curr = ret
104 for seg in str(relpath).split("/"):
105 if seg == ".":
106 continue
107 if seg not in curr:
108 curr[seg] = dict()
109 curr = curr[seg]
110 # store file hashsum or symlink target
111 if is_file or is_sym:
112 assert fname is not None
113 curr[fname] = val
115 return ret
118# ----