krishnachoudhary-hclguvi commited on
Commit
47e8500
·
unverified ·
1 Parent(s): a181751

Make /api/v1/extract compatible with bot payload variants

Browse files
Files changed (1) hide show
  1. main.py +43 -9
main.py CHANGED
@@ -6,8 +6,8 @@ import os
6
  import uuid
7
  import time
8
  import asyncio
9
- from typing import Dict, Optional
10
- from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header
11
  from fastapi.staticfiles import StaticFiles
12
  from fastapi.responses import FileResponse, JSONResponse
13
  from fastapi.middleware.cors import CORSMiddleware
@@ -289,30 +289,64 @@ async def upload_and_process(file: UploadFile = File(...)):
289
 
290
 
291
  @app.post("/api/v1/extract", response_model=ProcessingResult, dependencies=[Depends(get_api_key)])
292
- async def synchronous_extract(file: UploadFile = File(...)):
 
 
 
 
 
 
293
  """
294
  Synchronous extraction endpoint for API testers and bots.
295
  Directly returns the extraction results.
296
  """
297
- # 1. Validation
298
- filename = file.filename or "unknown"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
300
  if ext not in ALLOWED_EXTENSIONS:
301
  raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
302
 
303
- content = await file.read()
304
  if len(content) > MAX_FILE_SIZE_BYTES:
305
  raise HTTPException(status_code=400, detail="File too large.")
306
  if len(content) == 0:
307
  raise HTTPException(status_code=400, detail="Empty file.")
308
 
309
- # 2. Save temporary file
310
  file_id = f"sync_{str(uuid.uuid4())[:8]}"
311
  file_path = os.path.join(UPLOAD_DIR, f"{file_id}_{filename}")
312
  with open(file_path, "wb") as f:
313
  f.write(content)
314
 
315
- # 3. Process
316
  file_type = _get_file_type(filename)
317
  start_time = time.time()
318
 
@@ -326,7 +360,7 @@ async def synchronous_extract(file: UploadFile = File(...)):
326
  None, _perform_extraction_and_analysis, task, file_path, file_type, start_time
327
  )
328
 
329
- # 4. Cleanup
330
  try:
331
  if os.path.exists(file_path):
332
  os.remove(file_path)
 
6
  import uuid
7
  import time
8
  import asyncio
9
+ from typing import Dict, Optional, List
10
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header, Body
11
  from fastapi.staticfiles import StaticFiles
12
  from fastapi.responses import FileResponse, JSONResponse
13
  from fastapi.middleware.cors import CORSMiddleware
 
289
 
290
 
291
  @app.post("/api/v1/extract", response_model=ProcessingResult, dependencies=[Depends(get_api_key)])
292
+ async def synchronous_extract(
293
+ file: Optional[UploadFile] = File(None),
294
+ document: Optional[UploadFile] = File(None),
295
+ upload: Optional[UploadFile] = File(None),
296
+ files: Optional[List[UploadFile]] = File(None),
297
+ data: Optional[Dict[str, str]] = Body(None),
298
+ ):
299
  """
300
  Synchronous extraction endpoint for API testers and bots.
301
  Directly returns the extraction results.
302
  """
303
+ # 1. Resolve the input source (supports common bot field names)
304
+ selected_file = file or document or upload
305
+ if not selected_file and files:
306
+ selected_file = files[0]
307
+
308
+ # URL payload fallback for bots that send JSON to this endpoint.
309
+ if not selected_file and data and data.get("url"):
310
+ url = data.get("url", "")
311
+ if not url.startswith(("http://", "https://")):
312
+ raise HTTPException(status_code=400, detail="Invalid URL format. Must start with http:// or https://")
313
+
314
+ file_id = f"sync_{str(uuid.uuid4())[:8]}"
315
+ filename = url.split('/')[2] if '//' in url else url.split('/')[0]
316
+ task = ProcessingResult.create_pending(file_id=file_id, filename=filename, file_type="url")
317
+ start_time = time.time()
318
+ await asyncio.get_event_loop().run_in_executor(
319
+ None, _perform_extraction_and_analysis, task, url, "url", start_time
320
+ )
321
+ if task.status == TaskStatus.ERROR:
322
+ raise HTTPException(status_code=500, detail=task.error_message or "Processing failed.")
323
+ return task
324
+
325
+ if not selected_file:
326
+ raise HTTPException(
327
+ status_code=400,
328
+ detail="No input provided. Send multipart file field 'file' (or 'document'/'upload') or JSON with {'url': 'https://...'}"
329
+ )
330
+
331
+ # 2. Validation
332
+ filename = selected_file.filename or "unknown"
333
  ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
334
  if ext not in ALLOWED_EXTENSIONS:
335
  raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
336
 
337
+ content = await selected_file.read()
338
  if len(content) > MAX_FILE_SIZE_BYTES:
339
  raise HTTPException(status_code=400, detail="File too large.")
340
  if len(content) == 0:
341
  raise HTTPException(status_code=400, detail="Empty file.")
342
 
343
+ # 3. Save temporary file
344
  file_id = f"sync_{str(uuid.uuid4())[:8]}"
345
  file_path = os.path.join(UPLOAD_DIR, f"{file_id}_{filename}")
346
  with open(file_path, "wb") as f:
347
  f.write(content)
348
 
349
+ # 4. Process
350
  file_type = _get_file_type(filename)
351
  start_time = time.time()
352
 
 
360
  None, _perform_extraction_and_analysis, task, file_path, file_type, start_time
361
  )
362
 
363
+ # 5. Cleanup
364
  try:
365
  if os.path.exists(file_path):
366
  os.remove(file_path)