rgr4y commited on
Commit
cc8abd5
·
verified ·
1 Parent(s): 4b3fd0d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from PIL import Image
5
+ import fitz # PyMuPDF
6
+ from transformers import pipeline
7
+
8
+ # Pick a lightweight doc classifier. Swap to your preferred HF model.
9
+ MODEL_ID = os.getenv("MODEL_ID", "HAMMALE/vit-tiny-classifier-rvlcdip")
10
+
11
+ clf = pipeline(
12
+ task="image-classification",
13
+ model=MODEL_ID,
14
+ device=0 if os.getenv("CUDA_VISIBLE_DEVICES") not in (None, "", "-1") else -1,
15
+ )
16
+
17
+ def pdf_to_images(pdf_path: str, max_pages: int = 6, dpi: int = 150):
18
+ doc = fitz.open(pdf_path)
19
+ images = []
20
+ zoom = dpi / 72.0
21
+ mat = fitz.Matrix(zoom, zoom)
22
+ for i in range(min(len(doc), max_pages)):
23
+ page = doc.load_page(i)
24
+ pix = page.get_pixmap(matrix=mat, alpha=False)
25
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
26
+ images.append(img)
27
+ doc.close()
28
+ return images
29
+
30
+ def run_infer(file_obj, max_pages: int = 6, top_k: int = 5):
31
+ path = file_obj.name
32
+ ext = os.path.splitext(path)[1].lower()
33
+
34
+ if ext == ".pdf":
35
+ images = pdf_to_images(path, max_pages=max_pages)
36
+ page_labels = [f"page_{i+1}" for i in range(len(images))]
37
+ else:
38
+ images = [Image.open(path).convert("RGB")]
39
+ page_labels = ["image"]
40
+
41
+ rows = []
42
+ # Aggregate by summing scores per label across pages (simple + robust)
43
+ agg = {}
44
+
45
+ for label, img in zip(page_labels, images):
46
+ preds = clf(img, top_k=top_k)
47
+ for p in preds:
48
+ rows.append({"item": label, "label": p["label"], "score": float(p["score"])})
49
+ agg[p["label"]] = agg.get(p["label"], 0.0) + float(p["score"])
50
+
51
+ per_page = pd.DataFrame(rows).sort_values(["item", "score"], ascending=[True, False])
52
+
53
+ agg_df = (
54
+ pd.DataFrame([{"label": k, "score_sum": v} for k, v in agg.items()])
55
+ .sort_values("score_sum", ascending=False)
56
+ .head(top_k)
57
+ .reset_index(drop=True)
58
+ )
59
+
60
+ return per_page, agg_df
61
+
62
+ demo = gr.Interface(
63
+ fn=run_infer,
64
+ inputs=[
65
+ gr.File(label="Upload PDF/PNG/JPG"),
66
+ gr.Slider(1, 30, value=6, step=1, label="Max PDF pages"),
67
+ gr.Slider(1, 20, value=5, step=1, label="Top-K labels"),
68
+ ],
69
+ outputs=[
70
+ gr.Dataframe(label="Per-page predictions"),
71
+ gr.Dataframe(label="Aggregated across pages (sum of scores)"),
72
+ ],
73
+ title="Document Classifier (PDF/PNG)",
74
+ description=f"Model: {MODEL_ID}. Upload a PDF or image to classify document type.",
75
+ )
76
+
77
+ if __name__ == "__main__":
78
+ demo.launch()