Upload 4 files
Browse files- app.py +88 -0
- main.py +58 -0
- parser_logic.py +71 -0
- requirements.txt +8 -0
app.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import requests
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
# Configuration
|
| 9 |
+
# Defaults to localhost for dev, but can be overridden in production (e.g., Docker)
|
| 10 |
+
BACKEND_URL = os.getenv("BACKEND_URL", "http://127.0.0.1:8000/process-resume")
|
| 11 |
+
|
| 12 |
+
st.set_page_config(page_title="AI Resume Analyzer", page_icon="📄", layout="centered")
|
| 13 |
+
|
| 14 |
+
st.title("📄 Intelligent Resume Parser")
|
| 15 |
+
st.markdown("---")
|
| 16 |
+
st.write("Upload a professional resume in PDF format to extract key insights using AI.")
|
| 17 |
+
|
| 18 |
+
# Sidebar for status
|
| 19 |
+
with st.sidebar:
|
| 20 |
+
st.info(f"Connected to Backend: `{BACKEND_URL}`")
|
| 21 |
+
|
| 22 |
+
uploaded_file = st.file_uploader("Upload PDF Resume", type="pdf")
|
| 23 |
+
|
| 24 |
+
if uploaded_file:
|
| 25 |
+
# Basic Frontend Validation
|
| 26 |
+
if uploaded_file.size > 5 * 1024 * 1024:
|
| 27 |
+
st.error("File is too large! Please upload a file smaller than 5MB.")
|
| 28 |
+
else:
|
| 29 |
+
if st.button("Analyze Resume", type="primary"):
|
| 30 |
+
with st.spinner("Processing with AI..."):
|
| 31 |
+
try:
|
| 32 |
+
files = {
|
| 33 |
+
"file": (uploaded_file.name, uploaded_file.getvalue(), "application/pdf")
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
# Set a timeout to prevent hanging
|
| 37 |
+
response = requests.post(BACKEND_URL, files=files, timeout=30)
|
| 38 |
+
|
| 39 |
+
if response.status_code == 200:
|
| 40 |
+
data = response.json()
|
| 41 |
+
|
| 42 |
+
# Handle case where AI returns an error key
|
| 43 |
+
if "error" in data:
|
| 44 |
+
st.error(data["error"])
|
| 45 |
+
else:
|
| 46 |
+
st.success("Extraction Complete!")
|
| 47 |
+
|
| 48 |
+
# Summary Section
|
| 49 |
+
st.markdown("### 📝 Professional Summary")
|
| 50 |
+
st.info(data.get('summary', 'No summary available.'))
|
| 51 |
+
|
| 52 |
+
# Contact Info
|
| 53 |
+
st.markdown("### 📇 Contact Details")
|
| 54 |
+
c1, c2, c3 = st.columns(3)
|
| 55 |
+
c1.metric("Name", data.get('name', 'N/A'))
|
| 56 |
+
c2.metric("Email", data.get('email', 'N/A'))
|
| 57 |
+
c3.metric("Phone", data.get('phone', 'N/A'))
|
| 58 |
+
|
| 59 |
+
# Skills Section
|
| 60 |
+
st.markdown("### 🛠 Technical Skills")
|
| 61 |
+
skills = data.get('skills', [])
|
| 62 |
+
if skills and isinstance(skills, list):
|
| 63 |
+
# CSS styling for tags
|
| 64 |
+
st.markdown(
|
| 65 |
+
f"""
|
| 66 |
+
<div style="display: flex; flex-wrap: wrap; gap: 10px;">
|
| 67 |
+
{''.join([f'<span style="background-color: #e0f2f1; color: #00695c; padding: 5px 10px; border-radius: 15px; font-size: 14px;">{skill}</span>' for skill in skills])}
|
| 68 |
+
</div>
|
| 69 |
+
""",
|
| 70 |
+
unsafe_allow_html=True
|
| 71 |
+
)
|
| 72 |
+
else:
|
| 73 |
+
st.write("No specific skills detected.")
|
| 74 |
+
|
| 75 |
+
with st.expander("View Raw JSON Data"):
|
| 76 |
+
st.json(data)
|
| 77 |
+
|
| 78 |
+
elif response.status_code == 413:
|
| 79 |
+
st.error("The file is too large for the server to process.")
|
| 80 |
+
else:
|
| 81 |
+
st.error(f"Server Error: {response.status_code} - {response.text}")
|
| 82 |
+
|
| 83 |
+
except requests.exceptions.ConnectionError:
|
| 84 |
+
st.error("🚨 Connection Failed: Could not reach the backend server.")
|
| 85 |
+
except requests.exceptions.Timeout:
|
| 86 |
+
st.error("🚨 Request Timed Out: The AI took too long to respond.")
|
| 87 |
+
except Exception as e:
|
| 88 |
+
st.error(f"An unexpected error occurred: {e}")
|
main.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from parser_logic import extract_text_from_stream, parse_resume_with_ai
|
| 5 |
+
|
| 6 |
+
# Configure Logging
|
| 7 |
+
logging.basicConfig(level=logging.INFO)
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
app = FastAPI(title="Resume Parser API", version="1.0.0")
|
| 11 |
+
|
| 12 |
+
# CORS Middleware (Crucial for production when frontend/backend are on different ports/domains)
|
| 13 |
+
app.add_middleware(
|
| 14 |
+
CORSMiddleware,
|
| 15 |
+
allow_origins=["*"], # In strict production, replace "*" with specific frontend domain
|
| 16 |
+
allow_credentials=True,
|
| 17 |
+
allow_methods=["POST"],
|
| 18 |
+
allow_headers=["*"],
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MB limit
|
| 22 |
+
|
| 23 |
+
@app.post("/process-resume")
|
| 24 |
+
async def process_resume(file: UploadFile = File(...)):
|
| 25 |
+
"""
|
| 26 |
+
Endpoint to process PDF resumes.
|
| 27 |
+
Validates file type and size, then processes in-memory.
|
| 28 |
+
"""
|
| 29 |
+
if file.content_type != "application/pdf":
|
| 30 |
+
raise HTTPException(status_code=400, detail="Invalid file type. Only PDF is allowed.")
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# Read file into memory (Async read)
|
| 34 |
+
file_content = await file.read()
|
| 35 |
+
|
| 36 |
+
# Validation: Check file size
|
| 37 |
+
if len(file_content) > MAX_FILE_SIZE:
|
| 38 |
+
raise HTTPException(status_code=413, detail="File too large. Max size is 5MB.")
|
| 39 |
+
|
| 40 |
+
logger.info(f"Processing file: {file.filename} ({len(file_content)} bytes)")
|
| 41 |
+
|
| 42 |
+
# Extract text (CPU bound task, but fast enough for small PDFs to run synchronously here)
|
| 43 |
+
# For very heavy loads, this should be offloaded to a background task (Celery/RQ)
|
| 44 |
+
raw_text = extract_text_from_stream(file_content)
|
| 45 |
+
|
| 46 |
+
if not raw_text.strip():
|
| 47 |
+
raise HTTPException(status_code=400, detail="Could not extract text. PDF might be image-based.")
|
| 48 |
+
|
| 49 |
+
# AI Processing
|
| 50 |
+
structured_result = parse_resume_with_ai(raw_text)
|
| 51 |
+
|
| 52 |
+
return structured_result
|
| 53 |
+
|
| 54 |
+
except HTTPException as he:
|
| 55 |
+
raise he
|
| 56 |
+
except Exception as e:
|
| 57 |
+
logger.error(f"Unexpected Error: {e}")
|
| 58 |
+
raise HTTPException(status_code=500, detail="Internal Server Error")
|
parser_logic.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
import logging
|
| 5 |
+
import fitz # PyMuPDF
|
| 6 |
+
import google.generativeai as genai
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# Configure Logging
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
# Secure Configuration
|
| 16 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 17 |
+
if not api_key:
|
| 18 |
+
logger.error("GEMINI_API_KEY not found in environment variables.")
|
| 19 |
+
raise ValueError("GEMINI_API_KEY is missing.")
|
| 20 |
+
|
| 21 |
+
genai.configure(api_key=api_key)
|
| 22 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
| 23 |
+
|
| 24 |
+
def extract_text_from_stream(file_bytes: bytes) -> str:
|
| 25 |
+
"""Extracts raw text content from PDF bytes directly in memory."""
|
| 26 |
+
text = ""
|
| 27 |
+
try:
|
| 28 |
+
# stream=file_bytes tells PyMuPDF to read from memory, not disk
|
| 29 |
+
with fitz.open(stream=file_bytes, filetype="pdf") as doc:
|
| 30 |
+
for page in doc:
|
| 31 |
+
text += page.get_text()
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.error(f"PDF Extraction Error: {e}")
|
| 34 |
+
raise ValueError("Failed to extract text from PDF. File may be corrupted.")
|
| 35 |
+
return text
|
| 36 |
+
|
| 37 |
+
def parse_resume_with_ai(resume_text: str) -> dict:
|
| 38 |
+
"""Uses GenAI to transform unstructured text into JSON."""
|
| 39 |
+
|
| 40 |
+
# Prompt Engineering: Added instructions for "null" values to keep schema consistent
|
| 41 |
+
prompt = f"""
|
| 42 |
+
Acting as an expert recruiter, extract the following data from this resume text:
|
| 43 |
+
- name (string)
|
| 44 |
+
- email (string)
|
| 45 |
+
- phone (string)
|
| 46 |
+
- skills (array of strings)
|
| 47 |
+
- summary (string, max 2 sentences)
|
| 48 |
+
|
| 49 |
+
If a field is not found, return null or an empty list.
|
| 50 |
+
Return strictly valid JSON. Do not include markdown formatting.
|
| 51 |
+
|
| 52 |
+
Resume Text:
|
| 53 |
+
{resume_text[:10000]}
|
| 54 |
+
"""
|
| 55 |
+
# Truncate text to 10k chars to avoid token limits if user uploads a book
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
response = model.generate_content(prompt)
|
| 59 |
+
|
| 60 |
+
# Robust Cleaning: Remove Markdown, newlines, and non-json text
|
| 61 |
+
raw_output = response.text.strip()
|
| 62 |
+
# Remove ```json and ``` identifiers if present
|
| 63 |
+
clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
|
| 64 |
+
|
| 65 |
+
return json.loads(clean_json)
|
| 66 |
+
except json.JSONDecodeError as e:
|
| 67 |
+
logger.error(f"JSON Decode Error. Raw AI Output: {response.text}")
|
| 68 |
+
return {"error": "AI response was not valid JSON", "raw_output": response.text}
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.error(f"AI Processing Error: {e}")
|
| 71 |
+
return {"error": f"AI Processing failed: {str(e)}"}
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.109.0
|
| 2 |
+
uvicorn==0.27.0
|
| 3 |
+
python-multipart==0.0.6
|
| 4 |
+
streamlit==1.31.0
|
| 5 |
+
requests==2.31.0
|
| 6 |
+
google-generativeai==0.3.2
|
| 7 |
+
pymupdf==1.23.8
|
| 8 |
+
python-dotenv==1.0.1
|