Spaces:
Sleeping
Sleeping
krishnachoudhary-hclguvi commited on
Make /api/v1/extract compatible with bot payload variants
Browse files
main.py
CHANGED
|
@@ -6,8 +6,8 @@ import os
|
|
| 6 |
import uuid
|
| 7 |
import time
|
| 8 |
import asyncio
|
| 9 |
-
from typing import Dict, Optional
|
| 10 |
-
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header
|
| 11 |
from fastapi.staticfiles import StaticFiles
|
| 12 |
from fastapi.responses import FileResponse, JSONResponse
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
|
@@ -289,30 +289,64 @@ async def upload_and_process(file: UploadFile = File(...)):
|
|
| 289 |
|
| 290 |
|
| 291 |
@app.post("/api/v1/extract", response_model=ProcessingResult, dependencies=[Depends(get_api_key)])
|
| 292 |
-
async def synchronous_extract(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
"""
|
| 294 |
Synchronous extraction endpoint for API testers and bots.
|
| 295 |
Directly returns the extraction results.
|
| 296 |
"""
|
| 297 |
-
# 1.
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
|
| 300 |
if ext not in ALLOWED_EXTENSIONS:
|
| 301 |
raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
|
| 302 |
|
| 303 |
-
content = await
|
| 304 |
if len(content) > MAX_FILE_SIZE_BYTES:
|
| 305 |
raise HTTPException(status_code=400, detail="File too large.")
|
| 306 |
if len(content) == 0:
|
| 307 |
raise HTTPException(status_code=400, detail="Empty file.")
|
| 308 |
|
| 309 |
-
#
|
| 310 |
file_id = f"sync_{str(uuid.uuid4())[:8]}"
|
| 311 |
file_path = os.path.join(UPLOAD_DIR, f"{file_id}_{filename}")
|
| 312 |
with open(file_path, "wb") as f:
|
| 313 |
f.write(content)
|
| 314 |
|
| 315 |
-
#
|
| 316 |
file_type = _get_file_type(filename)
|
| 317 |
start_time = time.time()
|
| 318 |
|
|
@@ -326,7 +360,7 @@ async def synchronous_extract(file: UploadFile = File(...)):
|
|
| 326 |
None, _perform_extraction_and_analysis, task, file_path, file_type, start_time
|
| 327 |
)
|
| 328 |
|
| 329 |
-
#
|
| 330 |
try:
|
| 331 |
if os.path.exists(file_path):
|
| 332 |
os.remove(file_path)
|
|
|
|
| 6 |
import uuid
|
| 7 |
import time
|
| 8 |
import asyncio
|
| 9 |
+
from typing import Dict, Optional, List
|
| 10 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header, Body
|
| 11 |
from fastapi.staticfiles import StaticFiles
|
| 12 |
from fastapi.responses import FileResponse, JSONResponse
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 289 |
|
| 290 |
|
| 291 |
@app.post("/api/v1/extract", response_model=ProcessingResult, dependencies=[Depends(get_api_key)])
|
| 292 |
+
async def synchronous_extract(
|
| 293 |
+
file: Optional[UploadFile] = File(None),
|
| 294 |
+
document: Optional[UploadFile] = File(None),
|
| 295 |
+
upload: Optional[UploadFile] = File(None),
|
| 296 |
+
files: Optional[List[UploadFile]] = File(None),
|
| 297 |
+
data: Optional[Dict[str, str]] = Body(None),
|
| 298 |
+
):
|
| 299 |
"""
|
| 300 |
Synchronous extraction endpoint for API testers and bots.
|
| 301 |
Directly returns the extraction results.
|
| 302 |
"""
|
| 303 |
+
# 1. Resolve the input source (supports common bot field names)
|
| 304 |
+
selected_file = file or document or upload
|
| 305 |
+
if not selected_file and files:
|
| 306 |
+
selected_file = files[0]
|
| 307 |
+
|
| 308 |
+
# URL payload fallback for bots that send JSON to this endpoint.
|
| 309 |
+
if not selected_file and data and data.get("url"):
|
| 310 |
+
url = data.get("url", "")
|
| 311 |
+
if not url.startswith(("http://", "https://")):
|
| 312 |
+
raise HTTPException(status_code=400, detail="Invalid URL format. Must start with http:// or https://")
|
| 313 |
+
|
| 314 |
+
file_id = f"sync_{str(uuid.uuid4())[:8]}"
|
| 315 |
+
filename = url.split('/')[2] if '//' in url else url.split('/')[0]
|
| 316 |
+
task = ProcessingResult.create_pending(file_id=file_id, filename=filename, file_type="url")
|
| 317 |
+
start_time = time.time()
|
| 318 |
+
await asyncio.get_event_loop().run_in_executor(
|
| 319 |
+
None, _perform_extraction_and_analysis, task, url, "url", start_time
|
| 320 |
+
)
|
| 321 |
+
if task.status == TaskStatus.ERROR:
|
| 322 |
+
raise HTTPException(status_code=500, detail=task.error_message or "Processing failed.")
|
| 323 |
+
return task
|
| 324 |
+
|
| 325 |
+
if not selected_file:
|
| 326 |
+
raise HTTPException(
|
| 327 |
+
status_code=400,
|
| 328 |
+
detail="No input provided. Send multipart file field 'file' (or 'document'/'upload') or JSON with {'url': 'https://...'}"
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
# 2. Validation
|
| 332 |
+
filename = selected_file.filename or "unknown"
|
| 333 |
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
|
| 334 |
if ext not in ALLOWED_EXTENSIONS:
|
| 335 |
raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
|
| 336 |
|
| 337 |
+
content = await selected_file.read()
|
| 338 |
if len(content) > MAX_FILE_SIZE_BYTES:
|
| 339 |
raise HTTPException(status_code=400, detail="File too large.")
|
| 340 |
if len(content) == 0:
|
| 341 |
raise HTTPException(status_code=400, detail="Empty file.")
|
| 342 |
|
| 343 |
+
# 3. Save temporary file
|
| 344 |
file_id = f"sync_{str(uuid.uuid4())[:8]}"
|
| 345 |
file_path = os.path.join(UPLOAD_DIR, f"{file_id}_{filename}")
|
| 346 |
with open(file_path, "wb") as f:
|
| 347 |
f.write(content)
|
| 348 |
|
| 349 |
+
# 4. Process
|
| 350 |
file_type = _get_file_type(filename)
|
| 351 |
start_time = time.time()
|
| 352 |
|
|
|
|
| 360 |
None, _perform_extraction_and_analysis, task, file_path, file_type, start_time
|
| 361 |
)
|
| 362 |
|
| 363 |
+
# 5. Cleanup
|
| 364 |
try:
|
| 365 |
if os.path.exists(file_path):
|
| 366 |
os.remove(file_path)
|