import os import gradio as gr import pandas as pd from PIL import Image import fitz # PyMuPDF import torch from transformers import pipeline GPU_MODEL_ID = os.getenv("GPU_MODEL_ID", "microsoft/dit-base-finetuned-rvlcdip") CPU_MODEL_ID = os.getenv("CPU_MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip") # Optional override: set FORCE_CPU=1 in Space variables FORCE_CPU = os.getenv("FORCE_CPU", "0") == "1" def pick_device_and_model(): has_cuda = torch.cuda.is_available() and not FORCE_CPU if has_cuda: return 0, GPU_MODEL_ID, "cuda" return -1, CPU_MODEL_ID, "cpu" DEVICE, ACTIVE_MODEL_ID, ACTIVE_BACKEND = pick_device_and_model() clf = pipeline( task="image-classification", model=ACTIVE_MODEL_ID, device=DEVICE, ) def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150): doc = fitz.open(pdf_path) images = [] zoom = dpi / 72.0 mat = fitz.Matrix(zoom, zoom) for i in range(min(len(doc), max_pages)): page = doc.load_page(i) pix = page.get_pixmap(matrix=mat, alpha=False) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) images.append(img) doc.close() return images def run_infer(file_obj, max_pages: int = 6, top_k: int = 5): path = file_obj.name ext = os.path.splitext(path)[1].lower() if ext == ".pdf": images = pdf_to_images(path, max_pages=max_pages) items = [f"page_{i+1}" for i in range(len(images))] else: images = [Image.open(path).convert("RGB")] items = ["image"] rows = [] agg = {} # sum scores by label across pages for item, img in zip(items, images): preds = clf(img, top_k=top_k) for p in preds: lab = p["label"] sc = float(p["score"]) rows.append({"item": item, "label": lab, "score": sc}) agg[lab] = agg.get(lab, 0.0) + sc per_item = ( pd.DataFrame(rows) .sort_values(["item", "score"], ascending=[True, False]) .reset_index(drop=True) ) agg_df = ( pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()]) .sort_values("score_sum", ascending=False) .head(top_k) .reset_index(drop=True) ) meta = pd.DataFrame([{ "backend": ACTIVE_BACKEND, "model_id": ACTIVE_MODEL_ID, "torch_cuda_available": torch.cuda.is_available(), "force_cpu": FORCE_CPU, }]) return meta, per_item, agg_df demo = gr.Interface( fn=run_infer, inputs=[ gr.File(label="Upload PDF / PNG / JPG"), gr.Slider(1, 50, value=6, step=1, label="Max PDF pages"), gr.Slider(1, 20, value=5, step=1, label="Top-K labels"), ], outputs=[ gr.Dataframe(label="Runtime (device/model)"), gr.Dataframe(label="Per-page / per-image predictions"), gr.Dataframe(label="Aggregated across pages (sum of scores)"), ], title="Document Type Classifier (GPU-first, CPU fallback)", description=( "GPU model if available; otherwise CPU model. " "Set GPU_MODEL_ID / CPU_MODEL_ID / FORCE_CPU=1 as Space variables." ), ) if __name__ == "__main__": demo.launch()