LovnishVerma commited on
Commit
c751485
·
verified ·
1 Parent(s): 69ff9c0

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +88 -0
  2. main.py +58 -0
  3. parser_logic.py +71 -0
  4. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ # Configuration
9
+ # Defaults to localhost for dev, but can be overridden in production (e.g., Docker)
10
+ BACKEND_URL = os.getenv("BACKEND_URL", "http://127.0.0.1:8000/process-resume")
11
+
12
+ st.set_page_config(page_title="AI Resume Analyzer", page_icon="📄", layout="centered")
13
+
14
+ st.title("📄 Intelligent Resume Parser")
15
+ st.markdown("---")
16
+ st.write("Upload a professional resume in PDF format to extract key insights using AI.")
17
+
18
+ # Sidebar for status
19
+ with st.sidebar:
20
+ st.info(f"Connected to Backend: `{BACKEND_URL}`")
21
+
22
+ uploaded_file = st.file_uploader("Upload PDF Resume", type="pdf")
23
+
24
+ if uploaded_file:
25
+ # Basic Frontend Validation
26
+ if uploaded_file.size > 5 * 1024 * 1024:
27
+ st.error("File is too large! Please upload a file smaller than 5MB.")
28
+ else:
29
+ if st.button("Analyze Resume", type="primary"):
30
+ with st.spinner("Processing with AI..."):
31
+ try:
32
+ files = {
33
+ "file": (uploaded_file.name, uploaded_file.getvalue(), "application/pdf")
34
+ }
35
+
36
+ # Set a timeout to prevent hanging
37
+ response = requests.post(BACKEND_URL, files=files, timeout=30)
38
+
39
+ if response.status_code == 200:
40
+ data = response.json()
41
+
42
+ # Handle case where AI returns an error key
43
+ if "error" in data:
44
+ st.error(data["error"])
45
+ else:
46
+ st.success("Extraction Complete!")
47
+
48
+ # Summary Section
49
+ st.markdown("### 📝 Professional Summary")
50
+ st.info(data.get('summary', 'No summary available.'))
51
+
52
+ # Contact Info
53
+ st.markdown("### 📇 Contact Details")
54
+ c1, c2, c3 = st.columns(3)
55
+ c1.metric("Name", data.get('name', 'N/A'))
56
+ c2.metric("Email", data.get('email', 'N/A'))
57
+ c3.metric("Phone", data.get('phone', 'N/A'))
58
+
59
+ # Skills Section
60
+ st.markdown("### 🛠 Technical Skills")
61
+ skills = data.get('skills', [])
62
+ if skills and isinstance(skills, list):
63
+ # CSS styling for tags
64
+ st.markdown(
65
+ f"""
66
+ <div style="display: flex; flex-wrap: wrap; gap: 10px;">
67
+ {''.join([f'<span style="background-color: #e0f2f1; color: #00695c; padding: 5px 10px; border-radius: 15px; font-size: 14px;">{skill}</span>' for skill in skills])}
68
+ </div>
69
+ """,
70
+ unsafe_allow_html=True
71
+ )
72
+ else:
73
+ st.write("No specific skills detected.")
74
+
75
+ with st.expander("View Raw JSON Data"):
76
+ st.json(data)
77
+
78
+ elif response.status_code == 413:
79
+ st.error("The file is too large for the server to process.")
80
+ else:
81
+ st.error(f"Server Error: {response.status_code} - {response.text}")
82
+
83
+ except requests.exceptions.ConnectionError:
84
+ st.error("🚨 Connection Failed: Could not reach the backend server.")
85
+ except requests.exceptions.Timeout:
86
+ st.error("🚨 Request Timed Out: The AI took too long to respond.")
87
+ except Exception as e:
88
+ st.error(f"An unexpected error occurred: {e}")
main.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from fastapi import FastAPI, UploadFile, File, HTTPException
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from parser_logic import extract_text_from_stream, parse_resume_with_ai
5
+
6
+ # Configure Logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ app = FastAPI(title="Resume Parser API", version="1.0.0")
11
+
12
+ # CORS Middleware (Crucial for production when frontend/backend are on different ports/domains)
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"], # In strict production, replace "*" with specific frontend domain
16
+ allow_credentials=True,
17
+ allow_methods=["POST"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5 MB limit
22
+
23
+ @app.post("/process-resume")
24
+ async def process_resume(file: UploadFile = File(...)):
25
+ """
26
+ Endpoint to process PDF resumes.
27
+ Validates file type and size, then processes in-memory.
28
+ """
29
+ if file.content_type != "application/pdf":
30
+ raise HTTPException(status_code=400, detail="Invalid file type. Only PDF is allowed.")
31
+
32
+ try:
33
+ # Read file into memory (Async read)
34
+ file_content = await file.read()
35
+
36
+ # Validation: Check file size
37
+ if len(file_content) > MAX_FILE_SIZE:
38
+ raise HTTPException(status_code=413, detail="File too large. Max size is 5MB.")
39
+
40
+ logger.info(f"Processing file: {file.filename} ({len(file_content)} bytes)")
41
+
42
+ # Extract text (CPU bound task, but fast enough for small PDFs to run synchronously here)
43
+ # For very heavy loads, this should be offloaded to a background task (Celery/RQ)
44
+ raw_text = extract_text_from_stream(file_content)
45
+
46
+ if not raw_text.strip():
47
+ raise HTTPException(status_code=400, detail="Could not extract text. PDF might be image-based.")
48
+
49
+ # AI Processing
50
+ structured_result = parse_resume_with_ai(raw_text)
51
+
52
+ return structured_result
53
+
54
+ except HTTPException as he:
55
+ raise he
56
+ except Exception as e:
57
+ logger.error(f"Unexpected Error: {e}")
58
+ raise HTTPException(status_code=500, detail="Internal Server Error")
parser_logic.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import logging
5
+ import fitz # PyMuPDF
6
+ import google.generativeai as genai
7
+ from dotenv import load_dotenv
8
+
9
+ # Configure Logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ load_dotenv()
14
+
15
+ # Secure Configuration
16
+ api_key = os.getenv("GEMINI_API_KEY")
17
+ if not api_key:
18
+ logger.error("GEMINI_API_KEY not found in environment variables.")
19
+ raise ValueError("GEMINI_API_KEY is missing.")
20
+
21
+ genai.configure(api_key=api_key)
22
+ model = genai.GenerativeModel('gemini-1.5-flash')
23
+
24
+ def extract_text_from_stream(file_bytes: bytes) -> str:
25
+ """Extracts raw text content from PDF bytes directly in memory."""
26
+ text = ""
27
+ try:
28
+ # stream=file_bytes tells PyMuPDF to read from memory, not disk
29
+ with fitz.open(stream=file_bytes, filetype="pdf") as doc:
30
+ for page in doc:
31
+ text += page.get_text()
32
+ except Exception as e:
33
+ logger.error(f"PDF Extraction Error: {e}")
34
+ raise ValueError("Failed to extract text from PDF. File may be corrupted.")
35
+ return text
36
+
37
+ def parse_resume_with_ai(resume_text: str) -> dict:
38
+ """Uses GenAI to transform unstructured text into JSON."""
39
+
40
+ # Prompt Engineering: Added instructions for "null" values to keep schema consistent
41
+ prompt = f"""
42
+ Acting as an expert recruiter, extract the following data from this resume text:
43
+ - name (string)
44
+ - email (string)
45
+ - phone (string)
46
+ - skills (array of strings)
47
+ - summary (string, max 2 sentences)
48
+
49
+ If a field is not found, return null or an empty list.
50
+ Return strictly valid JSON. Do not include markdown formatting.
51
+
52
+ Resume Text:
53
+ {resume_text[:10000]}
54
+ """
55
+ # Truncate text to 10k chars to avoid token limits if user uploads a book
56
+
57
+ try:
58
+ response = model.generate_content(prompt)
59
+
60
+ # Robust Cleaning: Remove Markdown, newlines, and non-json text
61
+ raw_output = response.text.strip()
62
+ # Remove ```json and ``` identifiers if present
63
+ clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
64
+
65
+ return json.loads(clean_json)
66
+ except json.JSONDecodeError as e:
67
+ logger.error(f"JSON Decode Error. Raw AI Output: {response.text}")
68
+ return {"error": "AI response was not valid JSON", "raw_output": response.text}
69
+ except Exception as e:
70
+ logger.error(f"AI Processing Error: {e}")
71
+ return {"error": f"AI Processing failed: {str(e)}"}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.0
2
+ uvicorn==0.27.0
3
+ python-multipart==0.0.6
4
+ streamlit==1.31.0
5
+ requests==2.31.0
6
+ google-generativeai==0.3.2
7
+ pymupdf==1.23.8
8
+ python-dotenv==1.0.1