Spaces:

randusertry
/

pdf_analysis

Sleeping

randusertry commited on Apr 14

Commit

a96d54a

verified ·

1 Parent(s): 775a142

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from fastapi import FastAPI, File, UploadFile
+import pdfplumber
+import io
+app = FastAPI()
+@app.post("/extract")
+async def extract(file: UploadFile = File(...)):
+    pdf_bytes = await file.read()
+    stream = []
+    with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
+        for page in pdf.pages:
+            # Extract text first
+            text = page.extract_text()
+            if text:
+                stream.append({
+                    "type": "text",
+                    "content": text
+                })
+            # Extract tables
+            tables = page.extract_tables()
+            for table in tables:
+                stream.append({
+                    "type": "table",
+                    "content": table
+                })
+    return {
+        "stream": stream
+    }
+@app.get("/health")
+async def health():
+    return {
+        "status": "ok"
+    }