Jorick-python commited on
Commit
1a1604b
·
1 Parent(s): d19baf9

Add slide extraction server

Browse files
Files changed (2) hide show
  1. app.py +107 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io, os, json
2
+ from typing import Dict, List, Any
3
+ import gradio as gr
4
+ from PIL import Image
5
+ import pytesseract
6
+ import pdfplumber
7
+ from pptx import Presentation # pip: python-pptx
8
+ from transformers import BlipProcessor, BlipForConditionalGeneration
9
+ import torch
10
+
11
+ # --------- Image Caption Model (BLIP base) -----------
12
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
13
+ blip_model = BlipForConditionalGeneration.from_pretrained(
14
+ "Salesforce/blip-image-captioning-base",
15
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
16
+ ).eval()
17
+
18
+ def _caption_image(img: Image.Image) -> str:
19
+ """Run BLIP to caption a PIL image."""
20
+ inputs = processor(img.convert("RGB"), return_tensors="pt")
21
+ with torch.no_grad():
22
+ out = blip_model.generate(**{k: v.to(blip_model.device) for k, v in inputs.items()})
23
+ return processor.decode(out[0], skip_special_tokens=True)
24
+
25
+ # --------- Core analysis function -----------
26
+ def analyze_slidepack(file: gr.File) -> Dict[str, Any]:
27
+ """
28
+ Extract **all** text + AI-generated image captions from a PPTX or PDF.
29
+
30
+ Args:
31
+ file (File): Any `.pptx` or `.pdf` uploaded by the user/agent.
32
+
33
+ Returns:
34
+ dict: {
35
+ "file_name": str,
36
+ "slides": [
37
+ {
38
+ "slide_index": int,
39
+ "textBlocks": List[str],
40
+ "imageCaptions": List[str]
41
+ }, ...
42
+ ]
43
+ }
44
+ """
45
+ fname = os.path.basename(file.name)
46
+ slides_out: List[Dict[str, Any]] = []
47
+
48
+ # ---------- PPTX ----------
49
+ if fname.lower().endswith(".pptx"):
50
+ pres = Presentation(file.name)
51
+ for idx, slide in enumerate(pres.slides, start=1):
52
+ texts, caps = [], []
53
+ # Collect text
54
+ for shape in slide.shapes:
55
+ if hasattr(shape, "text"):
56
+ text = shape.text.strip()
57
+ if text:
58
+ texts.append(text)
59
+ # Collect images
60
+ if shape.shape_type == 13: # picture
61
+ img_blob = shape.image.blob
62
+ img = Image.open(io.BytesIO(img_blob))
63
+ caps.append(_caption_image(img))
64
+ slides_out.append({
65
+ "slide_index": idx,
66
+ "textBlocks": texts,
67
+ "imageCaptions": caps
68
+ })
69
+
70
+ # ---------- PDF ----------
71
+ elif fname.lower().endswith(".pdf"):
72
+ with pdfplumber.open(file.name) as pdf:
73
+ for idx, page in enumerate(pdf.pages, start=1):
74
+ texts = [page.extract_text() or ""]
75
+ caps = []
76
+ # Render page to image for captioning & OCR
77
+ img = page.to_image(resolution=200).original
78
+ caps.append(_caption_image(img))
79
+ # OCR any text that extract_text missed (diagrams)
80
+ ocr_text = pytesseract.image_to_string(img)
81
+ if ocr_text.strip():
82
+ texts.append(ocr_text)
83
+ slides_out.append({
84
+ "slide_index": idx,
85
+ "textBlocks": [t for t in texts if t.strip()],
86
+ "imageCaptions": caps
87
+ })
88
+
89
+ else:
90
+ raise gr.Error("Unsupported file type. Upload a .pptx or .pdf.")
91
+
92
+ return {"file_name": fname, "slides": slides_out}
93
+
94
+ # --------- Gradio Interface -----------
95
+ demo = gr.Interface(
96
+ fn=analyze_slidepack,
97
+ inputs=gr.File(label="Upload PPTX or PDF"),
98
+ outputs=gr.JSON(),
99
+ title="Slide-Pack Full Extractor",
100
+ description=(
101
+ "Returns **every** text fragment and BLIP-generated image caption in JSON. "
102
+ "No summarisation – perfect for downstream quiz agents."
103
+ )
104
+ )
105
+
106
+ if __name__ == "__main__":
107
+ demo.launch(mcp_server=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio[mcp]
2
+ python-pptx
3
+ pdfplumber
4
+ pillow
5
+ pytesseract
6
+ torch>=2.2,<3.0
7
+ torchvision
8
+ transformers