randusertry commited on
Commit
a96d54a
·
verified ·
1 Parent(s): 775a142

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ import pdfplumber
3
+ import io
4
+
5
+ app = FastAPI()
6
+
7
+ @app.post("/extract")
8
+ async def extract(file: UploadFile = File(...)):
9
+ pdf_bytes = await file.read()
10
+
11
+ stream = []
12
+
13
+ with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
14
+ for page in pdf.pages:
15
+
16
+ # Extract text first
17
+ text = page.extract_text()
18
+ if text:
19
+ stream.append({
20
+ "type": "text",
21
+ "content": text
22
+ })
23
+
24
+ # Extract tables
25
+ tables = page.extract_tables()
26
+ for table in tables:
27
+ stream.append({
28
+ "type": "table",
29
+ "content": table
30
+ })
31
+
32
+ return {
33
+ "stream": stream
34
+ }
35
+
36
+ @app.get("/health")
37
+ async def health():
38
+ return {
39
+ "status": "ok"
40
+ }