Coverage for src/metador_core/harvester/common.py: 100%

30 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-02 09:33 +0000

1import magic 

2from PIL import Image 

3 

4from ..harvester import FileHarvester 

5from ..plugins import schemas 

6from ..util.hashsums import hashsum 

7 

8FileMeta = schemas["core.file"] 

9ImageFileMeta = schemas["core.imagefile"] 

10BibMeta = schemas["core.bib"] 

11TableMeta = schemas["core.table"] 

12 

13 

14class FileMetaHarvester(FileHarvester): 

15 """Default harvester for basic common.file metadata. 

16 

17 Harvests file name, file size, mimetype and hashsum of the file. 

18 """ 

19 

20 class Plugin: 

21 name = "core.file.generic" 

22 version = (0, 1, 0) 

23 returns = schemas.PluginRef(name="core.file", version=(0, 1, 0)) 

24 

25 def run(self): 

26 path = self.args.filepath 

27 

28 sz = path.stat().st_size 

29 hs = hashsum(open(path, "rb"), "sha256") 

30 mt = magic.from_file(path, mime=True) 

31 return self.schema( 

32 filename=path.name, contentSize=sz, sha256=hs, encodingFormat=mt 

33 ) 

34 

35 

36class ImageFileMetaHarvester(FileHarvester): 

37 """Harvester to obtain dimensions (width and height) of an image file.""" 

38 

39 class Plugin: 

40 name = "core.imagefile.dim" 

41 version = (0, 1, 0) 

42 returns = schemas.PluginRef(name="core.imagefile", version=(0, 1, 0)) 

43 

44 def run(self): 

45 path = self.args.filepath 

46 

47 with Image.open(path) as img: 

48 width, height = img.size 

49 return self.schema(width=width, height=height)