Coverage for src/metador_core/harvester/common.py: 100%
30 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-02 09:33 +0000
1import magic
2from PIL import Image
4from ..harvester import FileHarvester
5from ..plugins import schemas
6from ..util.hashsums import hashsum
8FileMeta = schemas["core.file"]
9ImageFileMeta = schemas["core.imagefile"]
10BibMeta = schemas["core.bib"]
11TableMeta = schemas["core.table"]
14class FileMetaHarvester(FileHarvester):
15 """Default harvester for basic common.file metadata.
17 Harvests file name, file size, mimetype and hashsum of the file.
18 """
20 class Plugin:
21 name = "core.file.generic"
22 version = (0, 1, 0)
23 returns = schemas.PluginRef(name="core.file", version=(0, 1, 0))
25 def run(self):
26 path = self.args.filepath
28 sz = path.stat().st_size
29 hs = hashsum(open(path, "rb"), "sha256")
30 mt = magic.from_file(path, mime=True)
31 return self.schema(
32 filename=path.name, contentSize=sz, sha256=hs, encodingFormat=mt
33 )
36class ImageFileMetaHarvester(FileHarvester):
37 """Harvester to obtain dimensions (width and height) of an image file."""
39 class Plugin:
40 name = "core.imagefile.dim"
41 version = (0, 1, 0)
42 returns = schemas.PluginRef(name="core.imagefile", version=(0, 1, 0))
44 def run(self):
45 path = self.args.filepath
47 with Image.open(path) as img:
48 width, height = img.size
49 return self.schema(width=width, height=height)