Spaces:

Jorick-python
/

mcp-slidedeck

Sleeping

App Files Files Community

Jorick-python commited on May 28, 2025

Commit

c99a3fd

1 Parent(s): 3dc6559

Update app.py with latest MCP-compatible changes

Browse files

Files changed (1) hide show

app.py +26 -27

app.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import io, os, json
 from typing import Dict, List, Any
 import gradio as gr
 from PIL import Image
 import pytesseract
 import pdfplumber
-from pptx import Presentation          # pip: python-pptx
 from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
 # ---------  Image Caption Model (BLIP base) -----------
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -23,41 +27,21 @@ def _caption_image(img: Image.Image) -> str:
     return processor.decode(out[0], skip_special_tokens=True)
 # ---------  Core analysis function -----------
-def analyze_slidepack(file: gr.File) -> Dict[str, Any]:
-    """
-    Extract **all** text + AI-generated image captions from a PPTX or PDF.
-    Args:
-        file (File): Any `.pptx` or `.pdf` uploaded by the user/agent.
-    Returns:
-        dict: {
-            "file_name": str,
-            "slides": [
-                {
-                  "slide_index": int,
-                  "textBlocks": List[str],
-                  "imageCaptions": List[str]
-                }, ...
-            ]
-        }
-    """
     fname = os.path.basename(file.name)
     slides_out: List[Dict[str, Any]] = []
-     # ---------- PPTX ----------
     if fname.lower().endswith(".pptx"):
         pres = Presentation(file.name)
         for idx, slide in enumerate(pres.slides, start=1):
             texts, caps = [], []
-            # Collect text
             for shape in slide.shapes:
                 if hasattr(shape, "text"):
                     text = shape.text.strip()
                     if text:
                         texts.append(text)
-                # Collect images
-                if shape.shape_type == 13:                        # picture
                     img_blob = shape.image.blob
                     img = Image.open(io.BytesIO(img_blob))
                     caps.append(_caption_image(img))
@@ -73,10 +57,8 @@ def analyze_slidepack(file: gr.File) -> Dict[str, Any]:
             for idx, page in enumerate(pdf.pages, start=1):
                 texts = [page.extract_text() or ""]
                 caps = []
-                # Render page to image for captioning & OCR
                 img = page.to_image(resolution=200).original
                 caps.append(_caption_image(img))
-                # OCR any text that extract_text missed (diagrams)
                 ocr_text = pytesseract.image_to_string(img)
                 if ocr_text.strip():
                     texts.append(ocr_text)
@@ -103,5 +85,22 @@ demo = gr.Interface(
     )
 )
 if __name__ == "__main__":
-    demo.launch(mcp_server=True)

+# app.py (complete and updated)
 import io, os, json
 from typing import Dict, List, Any
 import gradio as gr
+from fastapi import FastAPI, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
 from PIL import Image
 import pytesseract
 import pdfplumber
+from pptx import Presentation
 from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
+import uvicorn
 # ---------  Image Caption Model (BLIP base) -----------
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     return processor.decode(out[0], skip_special_tokens=True)
 # ---------  Core analysis function -----------
+def analyze_slidepack(file: Any) -> Dict[str, Any]:
     fname = os.path.basename(file.name)
     slides_out: List[Dict[str, Any]] = []
+    # ---------- PPTX ----------
     if fname.lower().endswith(".pptx"):
         pres = Presentation(file.name)
         for idx, slide in enumerate(pres.slides, start=1):
             texts, caps = [], []
             for shape in slide.shapes:
                 if hasattr(shape, "text"):
                     text = shape.text.strip()
                     if text:
                         texts.append(text)
+                if shape.shape_type == 13:
                     img_blob = shape.image.blob
                     img = Image.open(io.BytesIO(img_blob))
                     caps.append(_caption_image(img))
             for idx, page in enumerate(pdf.pages, start=1):
                 texts = [page.extract_text() or ""]
                 caps = []
                 img = page.to_image(resolution=200).original
                 caps.append(_caption_image(img))
                 ocr_text = pytesseract.image_to_string(img)
                 if ocr_text.strip():
                     texts.append(ocr_text)
     )
 )
+# ---------  FastAPI Tool Endpoint -----------
+api = FastAPI()
+api.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@api.post("/extract_slidepack")
+async def extract_slidepack(file: UploadFile):
+    path = f"/tmp/{file.filename}"
+    with open(path, "wb") as f:
+        f.write(await file.read())
+    return analyze_slidepack(type("File", (object,), {"name": path}))
 if __name__ == "__main__":
+    demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)