Spaces:

Krish-05
/

text-extraction-api

Sleeping

App Files Files Community

krishnachoudhary-hclguvi commited on Apr 4

Commit

47e8500

unverified ·

1 Parent(s): a181751

Make /api/v1/extract compatible with bot payload variants

Browse files

Files changed (1) hide show

main.py +43 -9

main.py CHANGED Viewed

@@ -6,8 +6,8 @@ import os
 import uuid
 import time
 import asyncio
-from typing import Dict, Optional
-from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
@@ -289,30 +289,64 @@ async def upload_and_process(file: UploadFile = File(...)):
 @app.post("/api/v1/extract", response_model=ProcessingResult, dependencies=[Depends(get_api_key)])
-async def synchronous_extract(file: UploadFile = File(...)):
     """
     Synchronous extraction endpoint for API testers and bots.
     Directly returns the extraction results.
     """
-    # 1. Validation
-    filename = file.filename or "unknown"
     ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
     if ext not in ALLOWED_EXTENSIONS:
         raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
-    content = await file.read()
     if len(content) > MAX_FILE_SIZE_BYTES:
         raise HTTPException(status_code=400, detail="File too large.")
     if len(content) == 0:
         raise HTTPException(status_code=400, detail="Empty file.")
-    # 2. Save temporary file
     file_id = f"sync_{str(uuid.uuid4())[:8]}"
     file_path = os.path.join(UPLOAD_DIR, f"{file_id}_{filename}")
     with open(file_path, "wb") as f:
         f.write(content)
-    # 3. Process
     file_type = _get_file_type(filename)
     start_time = time.time()
@@ -326,7 +360,7 @@ async def synchronous_extract(file: UploadFile = File(...)):
         None, _perform_extraction_and_analysis, task, file_path, file_type, start_time
     )
-    # 4. Cleanup
     try:
         if os.path.exists(file_path):
             os.remove(file_path)

 import uuid
 import time
 import asyncio
+from typing import Dict, Optional, List
+from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Header, Body
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 @app.post("/api/v1/extract", response_model=ProcessingResult, dependencies=[Depends(get_api_key)])
+async def synchronous_extract(
+    file: Optional[UploadFile] = File(None),
+    document: Optional[UploadFile] = File(None),
+    upload: Optional[UploadFile] = File(None),
+    files: Optional[List[UploadFile]] = File(None),
+    data: Optional[Dict[str, str]] = Body(None),
+):
     """
     Synchronous extraction endpoint for API testers and bots.
     Directly returns the extraction results.
     """
+    # 1. Resolve the input source (supports common bot field names)
+    selected_file = file or document or upload
+    if not selected_file and files:
+        selected_file = files[0]
+    # URL payload fallback for bots that send JSON to this endpoint.
+    if not selected_file and data and data.get("url"):
+        url = data.get("url", "")
+        if not url.startswith(("http://", "https://")):
+            raise HTTPException(status_code=400, detail="Invalid URL format. Must start with http:// or https://")
+        file_id = f"sync_{str(uuid.uuid4())[:8]}"
+        filename = url.split('/')[2] if '//' in url else url.split('/')[0]
+        task = ProcessingResult.create_pending(file_id=file_id, filename=filename, file_type="url")
+        start_time = time.time()
+        await asyncio.get_event_loop().run_in_executor(
+            None, _perform_extraction_and_analysis, task, url, "url", start_time
+        )
+        if task.status == TaskStatus.ERROR:
+            raise HTTPException(status_code=500, detail=task.error_message or "Processing failed.")
+        return task
+    if not selected_file:
+        raise HTTPException(
+            status_code=400,
+            detail="No input provided. Send multipart file field 'file' (or 'document'/'upload') or JSON with {'url': 'https://...'}"
+        )
+    # 2. Validation
+    filename = selected_file.filename or "unknown"
     ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
     if ext not in ALLOWED_EXTENSIONS:
         raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
+    content = await selected_file.read()
     if len(content) > MAX_FILE_SIZE_BYTES:
         raise HTTPException(status_code=400, detail="File too large.")
     if len(content) == 0:
         raise HTTPException(status_code=400, detail="Empty file.")
+    # 3. Save temporary file
     file_id = f"sync_{str(uuid.uuid4())[:8]}"
     file_path = os.path.join(UPLOAD_DIR, f"{file_id}_{filename}")
     with open(file_path, "wb") as f:
         f.write(content)
+    # 4. Process
     file_type = _get_file_type(filename)
     start_time = time.time()
         None, _perform_extraction_and_analysis, task, file_path, file_type, start_time
     )
+    # 5. Cleanup
     try:
         if os.path.exists(file_path):
             os.remove(file_path)