Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import pandas as pd | |
| from PIL import Image | |
| import fitz # PyMuPDF | |
| import torch | |
| from transformers import pipeline | |
| GPU_MODEL_ID = os.getenv("GPU_MODEL_ID", "microsoft/dit-base-finetuned-rvlcdip") | |
| CPU_MODEL_ID = os.getenv("CPU_MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip") | |
| # Optional override: set FORCE_CPU=1 in Space variables | |
| FORCE_CPU = os.getenv("FORCE_CPU", "0") == "1" | |
| def pick_device_and_model(): | |
| has_cuda = torch.cuda.is_available() and not FORCE_CPU | |
| if has_cuda: | |
| return 0, GPU_MODEL_ID, "cuda" | |
| return -1, CPU_MODEL_ID, "cpu" | |
| DEVICE, ACTIVE_MODEL_ID, ACTIVE_BACKEND = pick_device_and_model() | |
| clf = pipeline( | |
| task="image-classification", | |
| model=ACTIVE_MODEL_ID, | |
| device=DEVICE, | |
| ) | |
| def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150): | |
| doc = fitz.open(pdf_path) | |
| images = [] | |
| zoom = dpi / 72.0 | |
| mat = fitz.Matrix(zoom, zoom) | |
| for i in range(min(len(doc), max_pages)): | |
| page = doc.load_page(i) | |
| pix = page.get_pixmap(matrix=mat, alpha=False) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| images.append(img) | |
| doc.close() | |
| return images | |
| def run_infer(file_obj, max_pages: int = 6, top_k: int = 5): | |
| path = file_obj.name | |
| ext = os.path.splitext(path)[1].lower() | |
| if ext == ".pdf": | |
| images = pdf_to_images(path, max_pages=max_pages) | |
| items = [f"page_{i+1}" for i in range(len(images))] | |
| else: | |
| images = [Image.open(path).convert("RGB")] | |
| items = ["image"] | |
| rows = [] | |
| agg = {} # sum scores by label across pages | |
| for item, img in zip(items, images): | |
| preds = clf(img, top_k=top_k) | |
| for p in preds: | |
| lab = p["label"] | |
| sc = float(p["score"]) | |
| rows.append({"item": item, "label": lab, "score": sc}) | |
| agg[lab] = agg.get(lab, 0.0) + sc | |
| per_item = ( | |
| pd.DataFrame(rows) | |
| .sort_values(["item", "score"], ascending=[True, False]) | |
| .reset_index(drop=True) | |
| ) | |
| agg_df = ( | |
| pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()]) | |
| .sort_values("score_sum", ascending=False) | |
| .head(top_k) | |
| .reset_index(drop=True) | |
| ) | |
| meta = pd.DataFrame([{ | |
| "backend": ACTIVE_BACKEND, | |
| "model_id": ACTIVE_MODEL_ID, | |
| "torch_cuda_available": torch.cuda.is_available(), | |
| "force_cpu": FORCE_CPU, | |
| }]) | |
| return meta, per_item, agg_df | |
| demo = gr.Interface( | |
| fn=run_infer, | |
| inputs=[ | |
| gr.File(label="Upload PDF / PNG / JPG"), | |
| gr.Slider(1, 50, value=6, step=1, label="Max PDF pages"), | |
| gr.Slider(1, 20, value=5, step=1, label="Top-K labels"), | |
| ], | |
| outputs=[ | |
| gr.Dataframe(label="Runtime (device/model)"), | |
| gr.Dataframe(label="Per-page / per-image predictions"), | |
| gr.Dataframe(label="Aggregated across pages (sum of scores)"), | |
| ], | |
| title="Document Type Classifier (GPU-first, CPU fallback)", | |
| description=( | |
| "GPU model if available; otherwise CPU model. " | |
| "Set GPU_MODEL_ID / CPU_MODEL_ID / FORCE_CPU=1 as Space variables." | |
| ), | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |