classifier / app.py
rgr4y's picture
CPU fallback
4d921db
import os
import gradio as gr
import pandas as pd
from PIL import Image
import fitz # PyMuPDF
import torch
from transformers import pipeline
GPU_MODEL_ID = os.getenv("GPU_MODEL_ID", "microsoft/dit-base-finetuned-rvlcdip")
CPU_MODEL_ID = os.getenv("CPU_MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip")
# Optional override: set FORCE_CPU=1 in Space variables
FORCE_CPU = os.getenv("FORCE_CPU", "0") == "1"
def pick_device_and_model():
has_cuda = torch.cuda.is_available() and not FORCE_CPU
if has_cuda:
return 0, GPU_MODEL_ID, "cuda"
return -1, CPU_MODEL_ID, "cpu"
DEVICE, ACTIVE_MODEL_ID, ACTIVE_BACKEND = pick_device_and_model()
clf = pipeline(
task="image-classification",
model=ACTIVE_MODEL_ID,
device=DEVICE,
)
def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150):
doc = fitz.open(pdf_path)
images = []
zoom = dpi / 72.0
mat = fitz.Matrix(zoom, zoom)
for i in range(min(len(doc), max_pages)):
page = doc.load_page(i)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(img)
doc.close()
return images
def run_infer(file_obj, max_pages: int = 6, top_k: int = 5):
path = file_obj.name
ext = os.path.splitext(path)[1].lower()
if ext == ".pdf":
images = pdf_to_images(path, max_pages=max_pages)
items = [f"page_{i+1}" for i in range(len(images))]
else:
images = [Image.open(path).convert("RGB")]
items = ["image"]
rows = []
agg = {} # sum scores by label across pages
for item, img in zip(items, images):
preds = clf(img, top_k=top_k)
for p in preds:
lab = p["label"]
sc = float(p["score"])
rows.append({"item": item, "label": lab, "score": sc})
agg[lab] = agg.get(lab, 0.0) + sc
per_item = (
pd.DataFrame(rows)
.sort_values(["item", "score"], ascending=[True, False])
.reset_index(drop=True)
)
agg_df = (
pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()])
.sort_values("score_sum", ascending=False)
.head(top_k)
.reset_index(drop=True)
)
meta = pd.DataFrame([{
"backend": ACTIVE_BACKEND,
"model_id": ACTIVE_MODEL_ID,
"torch_cuda_available": torch.cuda.is_available(),
"force_cpu": FORCE_CPU,
}])
return meta, per_item, agg_df
demo = gr.Interface(
fn=run_infer,
inputs=[
gr.File(label="Upload PDF / PNG / JPG"),
gr.Slider(1, 50, value=6, step=1, label="Max PDF pages"),
gr.Slider(1, 20, value=5, step=1, label="Top-K labels"),
],
outputs=[
gr.Dataframe(label="Runtime (device/model)"),
gr.Dataframe(label="Per-page / per-image predictions"),
gr.Dataframe(label="Aggregated across pages (sum of scores)"),
],
title="Document Type Classifier (GPU-first, CPU fallback)",
description=(
"GPU model if available; otherwise CPU model. "
"Set GPU_MODEL_ID / CPU_MODEL_ID / FORCE_CPU=1 as Space variables."
),
)
if __name__ == "__main__":
demo.launch()