redhairedshanks1 commited on
Commit
8900ccf
·
verified ·
1 Parent(s): a0113c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -16
app.py CHANGED
@@ -1,3 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import tempfile
3
  from typing import List
@@ -16,7 +116,7 @@ from langgraph.graph import StateGraph
16
  # === Service Imports ===
17
  from services.extract_text import extract_text_from_file, extract_images_with_fitz
18
  from services.extract_table import extract_tables_from_file
19
- from services.vector_store import get_entry, upsert_entry
20
  from services.s3_utils import upload_to_s3
21
 
22
  # === FastAPI Init ===
@@ -64,16 +164,14 @@ async def extract_text_api(
64
  start_page: int = Form(...),
65
  end_page: int = Form(...)
66
  ):
67
- cache = get_entry(filename) or {}
68
- if "text" in cache:
69
- return {"text": cache["text"]}
70
  path = save_temp_file(file)
71
  with open(path, "rb") as fh:
72
- cache["text"] = extract_text_from_file(fh, start_page, end_page, filename)
73
  os.remove(path)
74
- cache.pop("filename", None)
75
- upsert_entry(filename, **cache)
76
- return {"text": cache["text"]}
77
 
78
  @api.post("/api/tables")
79
  async def extract_table_api(
@@ -82,17 +180,15 @@ async def extract_table_api(
82
  start_page: int = Form(...),
83
  end_page: int = Form(...)
84
  ):
85
- cache = get_entry(filename) or {}
86
- if "tables" in cache:
87
- return {"tables": cache["tables"]}
88
  path = save_temp_file(file)
89
  with open(path, "rb") as fh:
90
- cache["tables"] = extract_tables_from_file(fh, start_page, end_page, filename)
91
  os.remove(path)
92
- cache.pop("filename", None)
93
- upsert_entry(filename, **cache)
94
- return {"tables": cache["tables"]}
95
 
96
  if __name__ == "__main__":
97
  import uvicorn
98
- uvicorn.run("app:api", host="0.0.0.0", port=7860, reload=True)
 
1
+ # import os
2
+ # import tempfile
3
+ # from typing import List
4
+ # from fastapi import FastAPI, UploadFile, File, Form
5
+ # from fastapi.responses import JSONResponse
6
+ # from pydantic import BaseModel
7
+ # from fastapi import Body
8
+ # import traceback
9
+
10
+ # from typing import TypedDict, Dict, Any
11
+
12
+ # # === LangGraph ===
13
+ # from langgraph.graph import StateGraph
14
+ # # from langgraph.checkpoint import MemorySaver
15
+
16
+ # # === Service Imports ===
17
+ # from services.extract_text import extract_text_from_file, extract_images_with_fitz
18
+ # from services.extract_table import extract_tables_from_file
19
+ # from services.vector_store import get_entry, upsert_entry
20
+ # from services.s3_utils import upload_to_s3
21
+
22
+ # # === FastAPI Init ===
23
+ # api = FastAPI()
24
+
25
+ # # === Shared helpers ===
26
+ # def save_temp_file(file: UploadFile) -> str:
27
+ # tmp = tempfile.NamedTemporaryFile(delete=False)
28
+ # tmp.write(file.file.read())
29
+ # tmp.flush()
30
+ # upload_to_s3(tmp.name, f"documents/{file.filename}")
31
+ # print(f"📤 Uploaded {file.filename} to S3")
32
+ # return tmp.name
33
+
34
+ # # === LangGraph Nodes ===
35
+ # def extract_text_node(state):
36
+ # filename = state["filename"]
37
+ # path = state["temp_files"][filename]
38
+ # start_page = state.get("start_page")
39
+ # end_page = state.get("end_page")
40
+ # with open(path, "rb") as fh:
41
+ # state["text"] = extract_text_from_file(fh, start_page, end_page, filename)
42
+ # return state
43
+
44
+ # def extract_tables_node(state):
45
+ # filename = state["filename"]
46
+ # path = state["temp_files"][filename]
47
+ # start_page = state.get("start_page")
48
+ # end_page = state.get("end_page")
49
+ # with open(path, "rb") as fh:
50
+ # state["tables"] = extract_tables_from_file(fh, start_page, end_page, filename)
51
+ # return state
52
+
53
+ # node_map = {
54
+ # "text": extract_text_node,
55
+ # "table": extract_tables_node
56
+ # }
57
+
58
+ # # === Individual APIs ===
59
+
60
+ # @api.post("/api/text")
61
+ # async def extract_text_api(
62
+ # file: UploadFile = File(...),
63
+ # filename: str = Form(...),
64
+ # start_page: int = Form(...),
65
+ # end_page: int = Form(...)
66
+ # ):
67
+ # cache = get_entry(filename) or {}
68
+ # if "text" in cache:
69
+ # return {"text": cache["text"]}
70
+ # path = save_temp_file(file)
71
+ # with open(path, "rb") as fh:
72
+ # cache["text"] = extract_text_from_file(fh, start_page, end_page, filename)
73
+ # os.remove(path)
74
+ # cache.pop("filename", None)
75
+ # upsert_entry(filename, **cache)
76
+ # return {"text": cache["text"]}
77
+
78
+ # @api.post("/api/tables")
79
+ # async def extract_table_api(
80
+ # file: UploadFile = File(...),
81
+ # filename: str = Form(...),
82
+ # start_page: int = Form(...),
83
+ # end_page: int = Form(...)
84
+ # ):
85
+ # cache = get_entry(filename) or {}
86
+ # if "tables" in cache:
87
+ # return {"tables": cache["tables"]}
88
+ # path = save_temp_file(file)
89
+ # with open(path, "rb") as fh:
90
+ # cache["tables"] = extract_tables_from_file(fh, start_page, end_page, filename)
91
+ # os.remove(path)
92
+ # cache.pop("filename", None)
93
+ # upsert_entry(filename, **cache)
94
+ # return {"tables": cache["tables"]}
95
+
96
+ # if __name__ == "__main__":
97
+ # import uvicorn
98
+ # uvicorn.run("app:api", host="0.0.0.0", port=7860, reload=True)
99
+
100
+
101
  import os
102
  import tempfile
103
  from typing import List
 
116
  # === Service Imports ===
117
  from services.extract_text import extract_text_from_file, extract_images_with_fitz
118
  from services.extract_table import extract_tables_from_file
119
+ # from services.vector_store import get_entry, upsert_entry # ❌ Disabled cache
120
  from services.s3_utils import upload_to_s3
121
 
122
  # === FastAPI Init ===
 
164
  start_page: int = Form(...),
165
  end_page: int = Form(...)
166
  ):
167
+ # cache = get_entry(filename) or {} # ❌ disabled
 
 
168
  path = save_temp_file(file)
169
  with open(path, "rb") as fh:
170
+ text = extract_text_from_file(fh, start_page, end_page, filename)
171
  os.remove(path)
172
+ # cache.pop("filename", None)
173
+ # upsert_entry(filename, **cache) # ❌ disabled
174
+ return {"text": text}
175
 
176
  @api.post("/api/tables")
177
  async def extract_table_api(
 
180
  start_page: int = Form(...),
181
  end_page: int = Form(...)
182
  ):
183
+ # cache = get_entry(filename) or {} # ❌ disabled
 
 
184
  path = save_temp_file(file)
185
  with open(path, "rb") as fh:
186
+ tables = extract_tables_from_file(fh, start_page, end_page, filename)
187
  os.remove(path)
188
+ # cache.pop("filename", None)
189
+ # upsert_entry(filename, **cache) # ❌ disabled
190
+ return {"tables": tables}
191
 
192
  if __name__ == "__main__":
193
  import uvicorn
194
+ uvicorn.run("app:api", host="0.0.0.0", port=7860, reload=True)