Spaces:

LovnishVerma
/

ResumeDataExtractor

Running

App Files Files Community

LovnishVerma commited on 8 days ago

Commit

c751485

verified ·

1 Parent(s): 69ff9c0

Upload 4 files

Browse files

Files changed (4) hide show

app.py +88 -0
main.py +58 -0
parser_logic.py +71 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import streamlit as st
+import requests
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Configuration
+# Defaults to localhost for dev, but can be overridden in production (e.g., Docker)
+BACKEND_URL = os.getenv("BACKEND_URL", "http://127.0.0.1:8000/process-resume")
+st.set_page_config(page_title="AI Resume Analyzer", page_icon="📄", layout="centered")
+st.title("📄 Intelligent Resume Parser")
+st.markdown("---")
+st.write("Upload a professional resume in PDF format to extract key insights using AI.")
+# Sidebar for status
+with st.sidebar:
+    st.info(f"Connected to Backend: `{BACKEND_URL}`")
+uploaded_file = st.file_uploader("Upload PDF Resume", type="pdf")
+if uploaded_file:
+    # Basic Frontend Validation
+    if uploaded_file.size > 5 * 1024 * 1024:
+        st.error("File is too large! Please upload a file smaller than 5MB.")
+    else:
+        if st.button("Analyze Resume", type="primary"):
+            with st.spinner("Processing with AI..."):
+                try:
+                    files = {
+                        "file": (uploaded_file.name, uploaded_file.getvalue(), "application/pdf")
+                    }
+                    # Set a timeout to prevent hanging
+                    response = requests.post(BACKEND_URL, files=files, timeout=30)
+                    if response.status_code == 200:
+                        data = response.json()
+                        # Handle case where AI returns an error key
+                        if "error" in data:
+                            st.error(data["error"])
+                        else:
+                            st.success("Extraction Complete!")
+                            # Summary Section
+                            st.markdown("### 📝 Professional Summary")
+                            st.info(data.get('summary', 'No summary available.'))
+                            # Contact Info
+                            st.markdown("### 📇 Contact Details")
+                            c1, c2, c3 = st.columns(3)
+                            c1.metric("Name", data.get('name', 'N/A'))
+                            c2.metric("Email", data.get('email', 'N/A'))
+                            c3.metric("Phone", data.get('phone', 'N/A'))
+                            # Skills Section
+                            st.markdown("### 🛠 Technical Skills")
+                            skills = data.get('skills', [])
+                            if skills and isinstance(skills, list):
+                                # CSS styling for tags
+                                st.markdown(
+                                    f"""
+                                    <div style="display: flex; flex-wrap: wrap; gap: 10px;">
+                                        {''.join([f'<span style="background-color: #e0f2f1; color: #00695c; padding: 5px 10px; border-radius: 15px; font-size: 14px;">{skill}</span>' for skill in skills])}
+                                    </div>
+                                    """,
+                                    unsafe_allow_html=True
+                                )
+                            else:
+                                st.write("No specific skills detected.")
+                            with st.expander("View Raw JSON Data"):
+                                st.json(data)
+                    elif response.status_code == 413:
+                        st.error("The file is too large for the server to process.")
+                    else:
+                        st.error(f"Server Error: {response.status_code} - {response.text}")
+                except requests.exceptions.ConnectionError:
+                    st.error("🚨 Connection Failed: Could not reach the backend server.")
+                except requests.exceptions.Timeout:
+                    st.error("🚨 Request Timed Out: The AI took too long to respond.")
+                except Exception as e:
+                    st.error(f"An unexpected error occurred: {e}")

main.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import logging
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from parser_logic import extract_text_from_stream, parse_resume_with_ai
+# Configure Logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(title="Resume Parser API", version="1.0.0")
+# CORS Middleware (Crucial for production when frontend/backend are on different ports/domains)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In strict production, replace "*" with specific frontend domain
+    allow_credentials=True,
+    allow_methods=["POST"],
+    allow_headers=["*"],
+)
+MAX_FILE_SIZE = 5 * 1024 * 1024  # 5 MB limit
+@app.post("/process-resume")
+async def process_resume(file: UploadFile = File(...)):
+    """
+    Endpoint to process PDF resumes.
+    Validates file type and size, then processes in-memory.
+    """
+    if file.content_type != "application/pdf":
+        raise HTTPException(status_code=400, detail="Invalid file type. Only PDF is allowed.")
+    try:
+        # Read file into memory (Async read)
+        file_content = await file.read()
+        # Validation: Check file size
+        if len(file_content) > MAX_FILE_SIZE:
+            raise HTTPException(status_code=413, detail="File too large. Max size is 5MB.")
+        logger.info(f"Processing file: {file.filename} ({len(file_content)} bytes)")
+        # Extract text (CPU bound task, but fast enough for small PDFs to run synchronously here)
+        # For very heavy loads, this should be offloaded to a background task (Celery/RQ)
+        raw_text = extract_text_from_stream(file_content)
+        if not raw_text.strip():
+            raise HTTPException(status_code=400, detail="Could not extract text. PDF might be image-based.")
+        # AI Processing
+        structured_result = parse_resume_with_ai(raw_text)
+        return structured_result
+    except HTTPException as he:
+        raise he
+    except Exception as e:
+        logger.error(f"Unexpected Error: {e}")
+        raise HTTPException(status_code=500, detail="Internal Server Error")

parser_logic.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import json
+import re
+import logging
+import fitz  # PyMuPDF
+import google.generativeai as genai
+from dotenv import load_dotenv
+# Configure Logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+load_dotenv()
+# Secure Configuration
+api_key = os.getenv("GEMINI_API_KEY")
+if not api_key:
+    logger.error("GEMINI_API_KEY not found in environment variables.")
+    raise ValueError("GEMINI_API_KEY is missing.")
+genai.configure(api_key=api_key)
+model = genai.GenerativeModel('gemini-1.5-flash')
+def extract_text_from_stream(file_bytes: bytes) -> str:
+    """Extracts raw text content from PDF bytes directly in memory."""
+    text = ""
+    try:
+        # stream=file_bytes tells PyMuPDF to read from memory, not disk
+        with fitz.open(stream=file_bytes, filetype="pdf") as doc:
+            for page in doc:
+                text += page.get_text()
+    except Exception as e:
+        logger.error(f"PDF Extraction Error: {e}")
+        raise ValueError("Failed to extract text from PDF. File may be corrupted.")
+    return text
+def parse_resume_with_ai(resume_text: str) -> dict:
+    """Uses GenAI to transform unstructured text into JSON."""
+    # Prompt Engineering: Added instructions for "null" values to keep schema consistent
+    prompt = f"""
+    Acting as an expert recruiter, extract the following data from this resume text:
+    - name (string)
+    - email (string)
+    - phone (string)
+    - skills (array of strings)
+    - summary (string, max 2 sentences)
+    If a field is not found, return null or an empty list.
+    Return strictly valid JSON. Do not include markdown formatting.
+    Resume Text:
+    {resume_text[:10000]}
+    """
+    # Truncate text to 10k chars to avoid token limits if user uploads a book
+    try:
+        response = model.generate_content(prompt)
+        # Robust Cleaning: Remove Markdown, newlines, and non-json text
+        raw_output = response.text.strip()
+        # Remove ```json and ``` identifiers if present
+        clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
+        return json.loads(clean_json)
+    except json.JSONDecodeError as e:
+        logger.error(f"JSON Decode Error. Raw AI Output: {response.text}")
+        return {"error": "AI response was not valid JSON", "raw_output": response.text}
+    except Exception as e:
+        logger.error(f"AI Processing Error: {e}")
+        return {"error": f"AI Processing failed: {str(e)}"}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.109.0
+uvicorn==0.27.0
+python-multipart==0.0.6
+streamlit==1.31.0
+requests==2.31.0
+google-generativeai==0.3.2
+pymupdf==1.23.8
+python-dotenv==1.0.1