VERIDEX.V1 / backend /utils /metadata_extractor.py
shadow55gh
VERIDEX initial commit
2edcc50
"""VERIDEX — Metadata Extractor v4.0"""
import io
def extract_metadata(content: bytes, content_type: str) -> dict:
meta = {}
try:
if content_type.startswith("image"):
from PIL import Image
from PIL.ExifTags import TAGS
img = Image.open(io.BytesIO(content))
meta["format"] = img.format or "Unknown"
meta["mode"] = img.mode
meta["dimensions"] = f"{img.width}x{img.height}"
try:
exif = img._getexif()
if exif:
for k, v in exif.items():
tag = TAGS.get(k, k)
if tag in ("Make","Model","DateTime","GPSInfo","Software",
"ExifVersion","FocalLength","ISOSpeedRatings",
"ExposureTime","FNumber"):
meta[str(tag)] = str(v)[:120]
except Exception:
pass
elif content_type == "application/pdf":
try:
import PyPDF2
reader = PyPDF2.PdfReader(io.BytesIO(content))
info = reader.metadata
if info:
for k, v in info.items():
meta[k.lstrip("/")] = str(v)[:120]
meta["pages"] = str(len(reader.pages))
except Exception:
pass
except Exception:
pass
return meta