Jorick-python commited on
Commit
c99a3fd
·
1 Parent(s): 3dc6559

Update app.py with latest MCP-compatible changes

Browse files
Files changed (1) hide show
  1. app.py +26 -27
app.py CHANGED
@@ -1,12 +1,16 @@
 
1
  import io, os, json
2
  from typing import Dict, List, Any
3
  import gradio as gr
 
 
4
  from PIL import Image
5
  import pytesseract
6
  import pdfplumber
7
- from pptx import Presentation # pip: python-pptx
8
  from transformers import BlipProcessor, BlipForConditionalGeneration
9
  import torch
 
10
 
11
  # --------- Image Caption Model (BLIP base) -----------
12
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -23,41 +27,21 @@ def _caption_image(img: Image.Image) -> str:
23
  return processor.decode(out[0], skip_special_tokens=True)
24
 
25
  # --------- Core analysis function -----------
26
- def analyze_slidepack(file: gr.File) -> Dict[str, Any]:
27
- """
28
- Extract **all** text + AI-generated image captions from a PPTX or PDF.
29
-
30
- Args:
31
- file (File): Any `.pptx` or `.pdf` uploaded by the user/agent.
32
-
33
- Returns:
34
- dict: {
35
- "file_name": str,
36
- "slides": [
37
- {
38
- "slide_index": int,
39
- "textBlocks": List[str],
40
- "imageCaptions": List[str]
41
- }, ...
42
- ]
43
- }
44
- """
45
  fname = os.path.basename(file.name)
46
  slides_out: List[Dict[str, Any]] = []
47
 
48
- # ---------- PPTX ----------
49
  if fname.lower().endswith(".pptx"):
50
  pres = Presentation(file.name)
51
  for idx, slide in enumerate(pres.slides, start=1):
52
  texts, caps = [], []
53
- # Collect text
54
  for shape in slide.shapes:
55
  if hasattr(shape, "text"):
56
  text = shape.text.strip()
57
  if text:
58
  texts.append(text)
59
- # Collect images
60
- if shape.shape_type == 13: # picture
61
  img_blob = shape.image.blob
62
  img = Image.open(io.BytesIO(img_blob))
63
  caps.append(_caption_image(img))
@@ -73,10 +57,8 @@ def analyze_slidepack(file: gr.File) -> Dict[str, Any]:
73
  for idx, page in enumerate(pdf.pages, start=1):
74
  texts = [page.extract_text() or ""]
75
  caps = []
76
- # Render page to image for captioning & OCR
77
  img = page.to_image(resolution=200).original
78
  caps.append(_caption_image(img))
79
- # OCR any text that extract_text missed (diagrams)
80
  ocr_text = pytesseract.image_to_string(img)
81
  if ocr_text.strip():
82
  texts.append(ocr_text)
@@ -103,5 +85,22 @@ demo = gr.Interface(
103
  )
104
  )
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  if __name__ == "__main__":
107
- demo.launch(mcp_server=True)
 
1
+ # app.py (complete and updated)
2
  import io, os, json
3
  from typing import Dict, List, Any
4
  import gradio as gr
5
+ from fastapi import FastAPI, UploadFile
6
+ from fastapi.middleware.cors import CORSMiddleware
7
  from PIL import Image
8
  import pytesseract
9
  import pdfplumber
10
+ from pptx import Presentation
11
  from transformers import BlipProcessor, BlipForConditionalGeneration
12
  import torch
13
+ import uvicorn
14
 
15
  # --------- Image Caption Model (BLIP base) -----------
16
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 
27
  return processor.decode(out[0], skip_special_tokens=True)
28
 
29
  # --------- Core analysis function -----------
30
+ def analyze_slidepack(file: Any) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  fname = os.path.basename(file.name)
32
  slides_out: List[Dict[str, Any]] = []
33
 
34
+ # ---------- PPTX ----------
35
  if fname.lower().endswith(".pptx"):
36
  pres = Presentation(file.name)
37
  for idx, slide in enumerate(pres.slides, start=1):
38
  texts, caps = [], []
 
39
  for shape in slide.shapes:
40
  if hasattr(shape, "text"):
41
  text = shape.text.strip()
42
  if text:
43
  texts.append(text)
44
+ if shape.shape_type == 13:
 
45
  img_blob = shape.image.blob
46
  img = Image.open(io.BytesIO(img_blob))
47
  caps.append(_caption_image(img))
 
57
  for idx, page in enumerate(pdf.pages, start=1):
58
  texts = [page.extract_text() or ""]
59
  caps = []
 
60
  img = page.to_image(resolution=200).original
61
  caps.append(_caption_image(img))
 
62
  ocr_text = pytesseract.image_to_string(img)
63
  if ocr_text.strip():
64
  texts.append(ocr_text)
 
85
  )
86
  )
87
 
88
+ # --------- FastAPI Tool Endpoint -----------
89
+ api = FastAPI()
90
+ api.add_middleware(
91
+ CORSMiddleware,
92
+ allow_origins=["*"],
93
+ allow_credentials=True,
94
+ allow_methods=["*"],
95
+ allow_headers=["*"],
96
+ )
97
+
98
+ @api.post("/extract_slidepack")
99
+ async def extract_slidepack(file: UploadFile):
100
+ path = f"/tmp/{file.filename}"
101
+ with open(path, "wb") as f:
102
+ f.write(await file.read())
103
+ return analyze_slidepack(type("File", (object,), {"name": path}))
104
+
105
  if __name__ == "__main__":
106
+ demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)