Spaces:

LovnishVerma
/

ResumeDataExtractor

Sleeping

App Files Files Community

LovnishVerma commited on 18 days ago

Commit

197a073

verified ·

1 Parent(s): 7305455

Update main.py

Browse files

Files changed (1) hide show

main.py +42 -11

main.py CHANGED Viewed

@@ -1,27 +1,58 @@
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from parser_logic import extract_text_from_stream, parse_resume_with_ai
-app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_methods=["POST"],
     allow_headers=["*"],
 )
 @app.post("/process-resume")
 async def process_resume(file: UploadFile = File(...)):
     if file.content_type != "application/pdf":
-        raise HTTPException(400, "Invalid file type.")
-    content = await file.read()
-    if len(content) > 5 * 1024 * 1024:
-        raise HTTPException(413, "File too large.")
-    text = extract_text_from_stream(content)
-    if not text.strip():
-        raise HTTPException(400, "PDF is empty or image-based.")
-    return parse_resume_with_ai(text)

+import logging
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from parser_logic import extract_text_from_stream, parse_resume_with_ai
+# Configure Logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Resume Parser API", version="1.0.0")
+# CORS Middleware (Crucial for production when frontend/backend are on different ports/domains)
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # In strict production, replace "*" with specific frontend domain
+    allow_credentials=True,
     allow_methods=["POST"],
     allow_headers=["*"],
 )
+MAX_FILE_SIZE = 5 * 1024 * 1024  # 5 MB limit
 @app.post("/process-resume")
 async def process_resume(file: UploadFile = File(...)):
+    """
+    Endpoint to process PDF resumes.
+    Validates file type and size, then processes in-memory.
+    """
     if file.content_type != "application/pdf":
+        raise HTTPException(status_code=400, detail="Invalid file type. Only PDF is allowed.")
+    try:
+        # Read file into memory (Async read)
+        file_content = await file.read()
+        # Validation: Check file size
+        if len(file_content) > MAX_FILE_SIZE:
+            raise HTTPException(status_code=413, detail="File too large. Max size is 5MB.")
+        logger.info(f"Processing file: {file.filename} ({len(file_content)} bytes)")
+        # Extract text (CPU bound task, but fast enough for small PDFs to run synchronously here)
+        # For very heavy loads, this should be offloaded to a background task (Celery/RQ)
+        raw_text = extract_text_from_stream(file_content)
+        if not raw_text.strip():
+            raise HTTPException(status_code=400, detail="Could not extract text. PDF might be image-based.")
+        # AI Processing
+        structured_result = parse_resume_with_ai(raw_text)
+        return structured_result
+    except HTTPException as he:
+        raise he
+    except Exception as e:
+        logger.error(f"Unexpected Error: {e}")
+        raise HTTPException(status_code=500, detail="Internal Server Error")