mallik-amaan commited on
Commit
f423540
·
verified ·
1 Parent(s): a1653cd

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +51 -49
main.py CHANGED
@@ -45,10 +45,6 @@ OUTPUT_DIR.mkdir(exist_ok=True)
45
  # Initialize redactor
46
  redactor = PDFRedactor()
47
 
48
- # ---------------- In-Memory Redaction Status Tracker ----------------
49
- # request_id -> status (pending | processing | completed | failed)
50
- redaction_status: Dict[str, str] = {}
51
-
52
  # ---------------- Response Models ----------------
53
  class RedactionEntity(BaseModel):
54
  entity_type: str
@@ -69,15 +65,35 @@ class RedactionStatusResponse(BaseModel):
69
  files: List[str]
70
  message: str
71
 
72
-
73
  class HealthResponse(BaseModel):
74
  status: str
75
  version: str
76
  model_loaded: bool
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # ---------------- Helper Functions ----------------
79
  def get_public_url(bucket: str, storage_path: str) -> str:
80
  return f"{os.getenv('SUPABASE_URL')}/storage/v1/object/public/{bucket}/{storage_path}"
 
81
  def cleanup_files(job_id: str):
82
  """Clean up temporary files after a delay"""
83
  try:
@@ -103,10 +119,8 @@ def download_file_from_supabase(bucket: str, storage_path: str, local_path: Path
103
 
104
  def upload_file_to_supabase(bucket: str, storage_path: str, local_path: Path):
105
  logger.info(f"Uploading {local_path} to {storage_path}")
106
-
107
  with local_path.open("rb") as f:
108
  content = f.read()
109
-
110
  supabase.storage.from_(bucket).upload(
111
  path=storage_path,
112
  file=content,
@@ -117,11 +131,15 @@ def upload_file_to_supabase(bucket: str, storage_path: str, local_path: Path):
117
  )
118
 
119
  def redact_request(request_id: str, bucket: str = "doc_storage"):
120
- """Background task: redact all files for a given request_id"""
 
 
 
 
121
  try:
122
- redaction_status[request_id] = "processing"
 
123
 
124
- # Fetch all files for this request_id
125
  response = (
126
  supabase
127
  .from_("request_files")
@@ -131,9 +149,6 @@ def redact_request(request_id: str, bucket: str = "doc_storage"):
131
  )
132
 
133
  files = response.data
134
-
135
- if not files:
136
- raise Exception(f"No files found for request {request_id}")
137
  if not files:
138
  raise Exception(f"No files found for request {request_id}")
139
 
@@ -142,23 +157,18 @@ def redact_request(request_id: str, bucket: str = "doc_storage"):
142
  local_upload = UPLOAD_DIR / f"{uuid.uuid4()}.pdf"
143
  local_output = OUTPUT_DIR / f"{uuid.uuid4()}_redacted.pdf"
144
 
145
- # Download from Supabase
146
  download_file_from_supabase(bucket, storage_path, local_upload)
147
-
148
- # Redact
149
  redactor.redact_document(pdf_path=str(local_upload), output_path=str(local_output))
150
-
151
- # Upload redacted back to same path
152
  upload_file_to_supabase(bucket, storage_path, local_output)
153
-
154
- # Cleanup local files
155
  cleanup_temp_files([local_upload, local_output])
156
 
157
- redaction_status[request_id] = "completed"
 
158
 
159
  except Exception as e:
160
  logger.error(f"Redaction failed for {request_id}: {str(e)}")
161
- redaction_status[request_id] = "failed"
 
162
 
163
  # ----------------- Existing Endpoints -----------------
164
  @app.get("/", response_model=HealthResponse)
@@ -266,15 +276,21 @@ async def get_stats():
266
  # ----------------- NEW Endpoints -----------------
267
  @app.post("/redact_by_request/{request_id}", response_model=RedactionStatusResponse)
268
  async def redact_by_request(request_id: str, background_tasks: BackgroundTasks):
269
- if redaction_status.get(request_id) == "processing":
 
 
 
270
  return RedactionStatusResponse(
271
  request_id=request_id,
272
- status="processing",
273
  files=[],
274
  message="Redaction already in progress"
275
  )
276
- redaction_status[request_id] = "pending"
 
 
277
  background_tasks.add_task(redact_request, request_id)
 
278
  return RedactionStatusResponse(
279
  request_id=request_id,
280
  status="pending",
@@ -284,13 +300,11 @@ async def redact_by_request(request_id: str, background_tasks: BackgroundTasks):
284
 
285
  @app.get("/redaction_status/{request_id}", response_model=RedactionStatusResponse)
286
  async def get_redaction_status(request_id: str):
287
- status = redaction_status.get(request_id, "not_found")
288
 
289
- # Default empty response
290
  files: List[str] = []
291
 
292
- if status == "completed":
293
- # Fetch file paths from DB
294
  response = (
295
  supabase
296
  .from_("request_files")
@@ -298,35 +312,23 @@ async def get_redaction_status(request_id: str):
298
  .eq("request_id", request_id)
299
  .execute()
300
  )
301
-
302
  if response.data:
303
  files = [
304
  get_public_url("doc_storage", row["storage_path"])
305
  for row in response.data
306
  ]
307
 
308
- message = (
309
- "Redaction completed"
310
- if status == "completed"
311
- else "Redaction pending"
312
- if status == "pending"
313
- else "Redaction failed"
314
- if status == "failed"
315
- else "Request not found"
316
- )
317
 
318
  return RedactionStatusResponse(
319
  request_id=request_id,
320
  status=status,
321
  files=files,
322
  message=message
323
- )
324
-
325
- # ----------------- Run Server -----------------
326
- # if __name__ == "__main__":
327
- # uvicorn.run(
328
- # "main:app",
329
- # host="localhost",
330
- # port=2700,
331
- # reload=False
332
- # )
 
45
  # Initialize redactor
46
  redactor = PDFRedactor()
47
 
 
 
 
 
48
  # ---------------- Response Models ----------------
49
  class RedactionEntity(BaseModel):
50
  entity_type: str
 
65
  files: List[str]
66
  message: str
67
 
 
68
  class HealthResponse(BaseModel):
69
  status: str
70
  version: str
71
  model_loaded: bool
72
 
73
+ # ---------------- DB Status Helpers ----------------
74
+ def set_request_status(request_id: str, status: str):
75
+ """Update the status column in document_requests for the given request_id."""
76
+ supabase.from_("document_requests").update({"status": status}).eq("id", request_id).execute()
77
+ logger.info(f"Request {request_id} status -> {status}")
78
+
79
+ def get_request_status(request_id: str) -> str:
80
+ """Fetch current status from document_requests."""
81
+ response = (
82
+ supabase
83
+ .from_("document_requests")
84
+ .select("status")
85
+ .eq("id", request_id)
86
+ .maybe_single()
87
+ .execute()
88
+ )
89
+ if response.data:
90
+ return response.data["status"]
91
+ return "not_found"
92
+
93
  # ---------------- Helper Functions ----------------
94
  def get_public_url(bucket: str, storage_path: str) -> str:
95
  return f"{os.getenv('SUPABASE_URL')}/storage/v1/object/public/{bucket}/{storage_path}"
96
+
97
  def cleanup_files(job_id: str):
98
  """Clean up temporary files after a delay"""
99
  try:
 
119
 
120
  def upload_file_to_supabase(bucket: str, storage_path: str, local_path: Path):
121
  logger.info(f"Uploading {local_path} to {storage_path}")
 
122
  with local_path.open("rb") as f:
123
  content = f.read()
 
124
  supabase.storage.from_(bucket).upload(
125
  path=storage_path,
126
  file=content,
 
131
  )
132
 
133
  def redact_request(request_id: str, bucket: str = "doc_storage"):
134
+ """
135
+ Background task: redact all files for a given request_id.
136
+ DB writes: 2 total — one at start (redacting), one at end (redacted | failed).
137
+ The 'pending' write is done by the endpoint before this task is dispatched.
138
+ """
139
  try:
140
+ # Write 1: mark as redacting
141
+ set_request_status(request_id, "redacting")
142
 
 
143
  response = (
144
  supabase
145
  .from_("request_files")
 
149
  )
150
 
151
  files = response.data
 
 
 
152
  if not files:
153
  raise Exception(f"No files found for request {request_id}")
154
 
 
157
  local_upload = UPLOAD_DIR / f"{uuid.uuid4()}.pdf"
158
  local_output = OUTPUT_DIR / f"{uuid.uuid4()}_redacted.pdf"
159
 
 
160
  download_file_from_supabase(bucket, storage_path, local_upload)
 
 
161
  redactor.redact_document(pdf_path=str(local_upload), output_path=str(local_output))
 
 
162
  upload_file_to_supabase(bucket, storage_path, local_output)
 
 
163
  cleanup_temp_files([local_upload, local_output])
164
 
165
+ # Write 2: mark as redacted
166
+ set_request_status(request_id, "redacted")
167
 
168
  except Exception as e:
169
  logger.error(f"Redaction failed for {request_id}: {str(e)}")
170
+ # Write 2 (error path): mark as failed
171
+ set_request_status(request_id, "failed")
172
 
173
  # ----------------- Existing Endpoints -----------------
174
  @app.get("/", response_model=HealthResponse)
 
276
  # ----------------- NEW Endpoints -----------------
277
  @app.post("/redact_by_request/{request_id}", response_model=RedactionStatusResponse)
278
  async def redact_by_request(request_id: str, background_tasks: BackgroundTasks):
279
+ # Check current DB status to avoid re-triggering an in-progress job
280
+ current_status = get_request_status(request_id)
281
+
282
+ if current_status == "redacting":
283
  return RedactionStatusResponse(
284
  request_id=request_id,
285
+ status="redacting",
286
  files=[],
287
  message="Redaction already in progress"
288
  )
289
+
290
+ # Write 1: set pending before dispatching background task
291
+ set_request_status(request_id, "pending")
292
  background_tasks.add_task(redact_request, request_id)
293
+
294
  return RedactionStatusResponse(
295
  request_id=request_id,
296
  status="pending",
 
300
 
301
  @app.get("/redaction_status/{request_id}", response_model=RedactionStatusResponse)
302
  async def get_redaction_status(request_id: str):
303
+ status = get_request_status(request_id)
304
 
 
305
  files: List[str] = []
306
 
307
+ if status == "redacted":
 
308
  response = (
309
  supabase
310
  .from_("request_files")
 
312
  .eq("request_id", request_id)
313
  .execute()
314
  )
 
315
  if response.data:
316
  files = [
317
  get_public_url("doc_storage", row["storage_path"])
318
  for row in response.data
319
  ]
320
 
321
+ message = {
322
+ "redacted": "Redaction completed",
323
+ "pending": "Redaction pending",
324
+ "redacting": "Redaction in progress",
325
+ "failed": "Redaction failed",
326
+ "not_found": "Request not found",
327
+ }.get(status, status)
 
 
328
 
329
  return RedactionStatusResponse(
330
  request_id=request_id,
331
  status=status,
332
  files=files,
333
  message=message
334
+ )