omarirfa's picture
feat: adding test uploading feature
9e61e4e verified
Raw
History Blame Contribute Delete
10.4 kB
"""
Gradio EXIF / GPS metadata viewer โ€” multi-image edition.
Features:
- Upload MANY JPEGs at once.
- Extract the ENTIRE metadata set: file info, PIL container info (icc/jfif/dpi),
the base IFD, the Exif sub-IFD (camera/exposure), GPS IFD, and Interop IFD.
- Live progress bar that advances per image, naming the file in progress.
- Per-image NSFW gate + malformed/oversized guards (bad files are reported,
not fatal โ€” the batch keeps going).
- Stateless handler: nothing persisted, nothing shared between users.
Run locally: python app.py
Share with testers: GRADIO_SHARE=1 python app.py
"""
import io
import os
from fractions import Fraction
import gradio as gr
from PIL import Image, ExifTags, UnidentifiedImageError
from PIL.TiffImagePlugin import IFDRational
# ----------------------------------------------------------------------------
# Config
# ----------------------------------------------------------------------------
MAX_BYTES = 15 * 1024 * 1024
ALLOWED_FORMATS = {"JPEG", "MPO"}
MAX_DIMENSION = 12000
NSFW_THRESHOLD = 0.80
SHARE = os.environ.get("GRADIO_SHARE", "0") == "1"
Image.MAX_IMAGE_PIXELS = 100_000_000
# ----------------------------------------------------------------------------
# NSFW classifier (optional: skipped if transformers/torch absent)
# ----------------------------------------------------------------------------
_nsfw_pipe = None
def _get_nsfw_pipe():
global _nsfw_pipe
if _nsfw_pipe is None:
try:
from transformers import pipeline
_nsfw_pipe = pipeline(
"image-classification", model="Falconsai/nsfw_image_detection"
)
except Exception as e: # noqa: BLE001
print(f"[nsfw] classifier unavailable, skipping gate: {e}")
_nsfw_pipe = False
return _nsfw_pipe
def nsfw_score(img: Image.Image) -> float:
pipe = _get_nsfw_pipe()
if not pipe:
return 0.0
for r in pipe(img.convert("RGB")):
if r["label"].lower() == "nsfw":
return float(r["score"])
return 0.0
# ----------------------------------------------------------------------------
# JSON sanitizing โ€” EXIF values are full of rationals, bytes, nested tuples
# ----------------------------------------------------------------------------
def _clean(v, depth=0):
if depth > 6:
return str(v)
if isinstance(v, IFDRational):
try:
return float(v)
except (ZeroDivisionError, ValueError):
return f"{v.numerator}/{v.denominator}"
if isinstance(v, bytes):
if len(v) <= 64:
try:
return v.decode("utf-8", "replace").rstrip("\x00")
except Exception: # noqa: BLE001
return v.hex()
return f"<{len(v)} bytes>"
if isinstance(v, (tuple, list)):
return [_clean(x, depth + 1) for x in v]
if isinstance(v, dict):
return {str(k): _clean(val, depth + 1) for k, val in v.items()}
if isinstance(v, (int, float, str, bool)) or v is None:
return v
return str(v)
# ----------------------------------------------------------------------------
# GPS decoding (needs raw rationals, so done before _clean)
# ----------------------------------------------------------------------------
def _ratio_to_float(x) -> float:
try:
return float(x)
except (TypeError, ValueError):
if isinstance(x, tuple) and len(x) == 2:
return x[0] / x[1]
return float(Fraction(x))
def _dms_to_decimal(dms, ref) -> float:
deg, minute, sec = (_ratio_to_float(x) for x in dms)
dec = deg + minute / 60.0 + sec / 3600.0
return round(-dec if ref in ("S", "W") else dec, 6)
def _decode_gps(gps_ifd):
g = {ExifTags.GPSTAGS.get(k, k): v for k, v in gps_ifd.items()}
if "GPSLatitude" not in g or "GPSLongitude" not in g:
return None
try:
out = {
"latitude": _dms_to_decimal(g["GPSLatitude"], g.get("GPSLatitudeRef", "N")),
"longitude": _dms_to_decimal(g["GPSLongitude"], g.get("GPSLongitudeRef", "E")),
}
if "GPSAltitude" in g:
out["altitude_m"] = round(_ratio_to_float(g["GPSAltitude"]), 1)
return out
except Exception as e: # noqa: BLE001
print(f"[gps] parse error: {e}")
return None
# ----------------------------------------------------------------------------
# Full metadata extraction โ€” every IFD plus PIL container info
# ----------------------------------------------------------------------------
def extract_full_metadata(img: Image.Image, raw_size: int, fname: str):
meta = {
"file": {
"name": fname,
"size_bytes": raw_size,
"format": img.format,
"format_description": getattr(img, "format_description", None),
"mode": img.mode,
"width": img.width,
"height": img.height,
},
# PIL container info: jfif, dpi, icc_profile, progression, etc.
"info": {k: _clean(v) for k, v in img.info.items()},
}
exif = img.getexif()
meta["exif_base"] = {
ExifTags.TAGS.get(t, str(t)): _clean(v) for t, v in exif.items()
}
# Exif sub-IFD: the photographic metadata (ISO, exposure, lens, datesโ€ฆ)
try:
photo = exif.get_ifd(ExifTags.IFD.Exif)
meta["exif_photo"] = {
ExifTags.TAGS.get(t, str(t)): _clean(v) for t, v in photo.items()
}
except Exception: # noqa: BLE001
meta["exif_photo"] = {}
# GPS IFD: raw (named) + decoded decimal coords
gps_decoded = None
try:
gps_ifd = exif.get_ifd(ExifTags.IFD.GPSInfo)
except Exception: # noqa: BLE001
gps_ifd = {}
if gps_ifd:
meta["gps_raw"] = {
ExifTags.GPSTAGS.get(t, str(t)): _clean(v) for t, v in gps_ifd.items()
}
gps_decoded = _decode_gps(gps_ifd)
if gps_decoded:
meta["gps"] = gps_decoded
# Interop IFD (present on many camera JPEGs)
try:
interop = exif.get_ifd(ExifTags.IFD.Interop)
if interop:
meta["interop"] = {
ExifTags.TAGS.get(t, str(t)): _clean(v) for t, v in interop.items()
}
except Exception: # noqa: BLE001
pass
return meta, gps_decoded
# ----------------------------------------------------------------------------
# Validation
# ----------------------------------------------------------------------------
def load_and_validate(path: str):
size = os.path.getsize(path)
if size == 0:
raise ValueError("Empty file.")
if size > MAX_BYTES:
raise ValueError(f"Too large ({size/1e6:.1f} MB, max {MAX_BYTES/1e6:.0f} MB).")
with open(path, "rb") as f:
raw = f.read()
try:
Image.open(io.BytesIO(raw)).verify()
except (UnidentifiedImageError, OSError, ValueError) as e:
raise ValueError(f"Not a valid image: {e}")
img = Image.open(io.BytesIO(raw))
if img.format not in ALLOWED_FORMATS:
raise ValueError(f"Only JPEG accepted (got {img.format}).")
if max(img.size) > MAX_DIMENSION:
raise ValueError(f"Dimensions too large ({img.size}).")
img.load()
return img, size
# ----------------------------------------------------------------------------
# Handler โ€” multi-image, with a per-image progress bar
# ----------------------------------------------------------------------------
def process(files, progress=gr.Progress()):
if not files:
return [], {}, None, "Upload one or more JPEGs to begin."
gallery, all_meta, coords, rejected = [], {}, [], []
n = len(files)
for i, path in enumerate(files):
name = os.path.basename(path)
progress(i / n, desc=f"Processing {name} ({i + 1}/{n})")
try:
img, raw_size = load_and_validate(path)
except ValueError as e:
rejected.append((name, str(e)))
all_meta[name] = {"error": str(e)}
continue
except Exception as e: # noqa: BLE001
rejected.append((name, f"unreadable: {e}"))
all_meta[name] = {"error": f"unreadable: {e}"}
continue
score = nsfw_score(img)
if score >= NSFW_THRESHOLD:
rejected.append((name, f"content filter ({score:.2f})"))
all_meta[name] = {"error": f"rejected by content filter (score={score:.2f})"}
continue
meta, gps = extract_full_metadata(img, raw_size, name)
all_meta[name] = meta
gallery.append((img.convert("RGB"), name + (" ๐Ÿ“" if gps else "")))
if gps:
coords.append([name, gps["latitude"], gps["longitude"]])
progress(1.0, desc="Done")
summary = f"### Processed {len(gallery)}/{n} image(s)\n"
if coords:
summary += f"- ๐Ÿ“ {len(coords)} with GPS location\n"
if rejected:
summary += f"- โš ๏ธ {len(rejected)} rejected:\n"
for nm, why in rejected:
summary += f" - **{nm}**: {why}\n"
return gallery, all_meta, (coords or None), summary
# ----------------------------------------------------------------------------
# UI
# ----------------------------------------------------------------------------
with gr.Blocks(title="JPEG Metadata Viewer") as demo:
gr.Markdown(
"# JPEG Metadata & Location Viewer\n"
"Upload one or more JPEGs to inspect their full EXIF metadata. "
"The tool reads embedded GPS location data when present."
)
with gr.Row():
files_in = gr.File(
label="Upload JPEGs",
file_types=[".jpg", ".jpeg"],
file_count="multiple",
type="filepath",
)
status = gr.Markdown()
run_btn = gr.Button("Extract metadata", variant="primary")
gallery = gr.Gallery(label="Images", columns=4, height="auto")
coords_df = gr.Dataframe(headers=["file", "lat", "lon"], label="GPS locations")
meta_out = gr.JSON(label="Full metadata (per image)")
run_btn.click(
process,
inputs=files_in,
outputs=[gallery, meta_out, coords_df, status],
)
demo.queue(default_concurrency_limit=4, max_size=64)
if __name__ == "__main__":
demo.launch(
share=SHARE,
max_file_size=f"{MAX_BYTES // (1024 * 1024)}mb",
show_error=True,
)