redhairedshanks1 commited on
Commit
14da8ca
·
verified ·
1 Parent(s): 85ac02c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -7
app.py CHANGED
@@ -1,13 +1,25 @@
1
  import os
2
  import tempfile
 
3
  from fastapi import FastAPI, UploadFile, File, Form
4
  from fastapi.responses import JSONResponse
5
- from services.extract_text import extract_text_from_file
 
 
 
 
 
 
 
 
 
 
 
 
6
  from services.vector_store import get_entry, upsert_entry
7
  from services.s3_utils import upload_to_s3
8
 
9
- api = FastAPI()
10
-
11
  def save_temp_file(file: UploadFile) -> str:
12
  tmp = tempfile.NamedTemporaryFile(delete=False)
13
  tmp.write(file.file.read())
@@ -16,6 +28,32 @@ def save_temp_file(file: UploadFile) -> str:
16
  print(f"📤 Uploaded {file.filename} to S3")
17
  return tmp.name
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  @api.post("/api/text")
20
  async def extract_text_api(
21
  file: UploadFile = File(...),
@@ -26,17 +64,32 @@ async def extract_text_api(
26
  cache = get_entry(filename) or {}
27
  if "text" in cache:
28
  return {"text": cache["text"]}
29
-
30
  path = save_temp_file(file)
31
-
32
  with open(path, "rb") as fh:
33
  cache["text"] = extract_text_from_file(fh, start_page, end_page, filename)
34
-
35
  os.remove(path)
36
  cache.pop("filename", None)
37
  upsert_entry(filename, **cache)
38
  return {"text": cache["text"]}
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  if __name__ == "__main__":
41
  import uvicorn
42
- uvicorn.run("app:api", host="0.0.0.0", port=3001, reload=True)
 
1
  import os
2
  import tempfile
3
+ from typing import List
4
  from fastapi import FastAPI, UploadFile, File, Form
5
  from fastapi.responses import JSONResponse
6
+ from pydantic import BaseModel
7
+ from fastapi import Body
8
+ import traceback
9
+
10
+ from typing import TypedDict, Dict, Any
11
+
12
+ # === LangGraph ===
13
+ from langgraph.graph import StateGraph
14
+ # from langgraph.checkpoint import MemorySaver
15
+
16
+ # === Service Imports ===
17
+ from services.extract_text import extract_text_from_file, extract_images_with_fitz
18
+ from services.extract_table import extract_tables_from_file
19
  from services.vector_store import get_entry, upsert_entry
20
  from services.s3_utils import upload_to_s3
21
 
22
+ # === Shared helpers ===
 
23
  def save_temp_file(file: UploadFile) -> str:
24
  tmp = tempfile.NamedTemporaryFile(delete=False)
25
  tmp.write(file.file.read())
 
28
  print(f"📤 Uploaded {file.filename} to S3")
29
  return tmp.name
30
 
31
+ # === LangGraph Nodes ===
32
+ def extract_text_node(state):
33
+ filename = state["filename"]
34
+ path = state["temp_files"][filename]
35
+ start_page = state.get("start_page")
36
+ end_page = state.get("end_page")
37
+ with open(path, "rb") as fh:
38
+ state["text"] = extract_text_from_file(fh, start_page, end_page, filename)
39
+ return state
40
+
41
+ def extract_tables_node(state):
42
+ filename = state["filename"]
43
+ path = state["temp_files"][filename]
44
+ start_page = state.get("start_page")
45
+ end_page = state.get("end_page")
46
+ with open(path, "rb") as fh:
47
+ state["tables"] = extract_tables_from_file(fh, start_page, end_page, filename)
48
+ return state
49
+
50
+ node_map = {
51
+ "text": extract_text_node,
52
+ "table": extract_tables_node
53
+ }
54
+
55
+ # === Individual APIs ===
56
+
57
  @api.post("/api/text")
58
  async def extract_text_api(
59
  file: UploadFile = File(...),
 
64
  cache = get_entry(filename) or {}
65
  if "text" in cache:
66
  return {"text": cache["text"]}
 
67
  path = save_temp_file(file)
 
68
  with open(path, "rb") as fh:
69
  cache["text"] = extract_text_from_file(fh, start_page, end_page, filename)
 
70
  os.remove(path)
71
  cache.pop("filename", None)
72
  upsert_entry(filename, **cache)
73
  return {"text": cache["text"]}
74
 
75
+ @api.post("/api/tables")
76
+ async def extract_table_api(
77
+ file: UploadFile = File(...),
78
+ filename: str = Form(...),
79
+ start_page: int = Form(...),
80
+ end_page: int = Form(...)
81
+ ):
82
+ cache = get_entry(filename) or {}
83
+ if "tables" in cache:
84
+ return {"tables": cache["tables"]}
85
+ path = save_temp_file(file)
86
+ with open(path, "rb") as fh:
87
+ cache["tables"] = extract_tables_from_file(fh, start_page, end_page, filename)
88
+ os.remove(path)
89
+ cache.pop("filename", None)
90
+ upsert_entry(filename, **cache)
91
+ return {"tables": cache["tables"]}
92
+
93
  if __name__ == "__main__":
94
  import uvicorn
95
+ uvicorn.run("app:api", host="0.0.0.0", port=7860, reload=True)