ResumeDataExtractor / parser_logic.py
LovnishVerma's picture
Update parser_logic.py
1f85315 verified
import os
import json
import re
import logging
import fitz # PyMuPDF
import google.generativeai as genai
from dotenv import load_dotenv
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY is missing.")
genai.configure(api_key=api_key)
def extract_text_from_stream(file_bytes: bytes) -> str:
text = ""
try:
with fitz.open(stream=file_bytes, filetype="pdf") as doc:
for page in doc:
text += page.get_text()
except Exception as e:
logger.error(f"PDF Extraction Error: {e}")
raise ValueError("Failed to extract text from PDF.")
return text
def get_available_model_name():
"""
Dynamically finds a working model from the user's account.
"""
try:
available_models = []
for m in genai.list_models():
if 'generateContent' in m.supported_generation_methods:
available_models.append(m.name)
if not available_models:
logger.error("No models found.")
return None
# Priority list: Try to find these specific powerful models first
preferred_order = [
"models/gemini-1.5-flash",
"models/gemini-1.5-pro",
"models/gemini-pro",
"models/gemini-1.0-pro"
]
# 1. Check if any preferred model is in the available list
for preferred in preferred_order:
if preferred in available_models:
logger.info(f"Selected Preferred Model: {preferred}")
return preferred
# 2. If none of the preferred ones exist, take the first available one
fallback = available_models[0]
logger.warning(f"Preferred models missing. Falling back to: {fallback}")
return fallback
except Exception as e:
logger.error(f"Error listing models: {e}")
return None
def analyze_resume(resume_text: str, job_description: str = None) -> dict:
# 1. FIND A WORKING MODEL (The Critical Fix)
model_name = get_available_model_name()
if not model_name:
return {"error": "CRITICAL: No available AI models found for this API Key."}
# 2. CONSTRUCT PROMPT
if job_description:
prompt = f"""
Act as a strict AI Recruiter. Compare the Resume against the Job Description.
RETURN JSON ONLY with this exact structure:
{{
"candidate": {{
"name": "string",
"email": "string",
"phone": "string",
"skills": ["list", "of", "candidate", "skills"],
"experience_years": "string or null"
}},
"match_analysis": {{
"score": integer_0_to_100,
"reasoning": "brief summary of why this score was given",
"matching_skills": ["skills in both resume and JD"],
"missing_skills": ["skills in JD but NOT in resume"],
"verdict": "Interview" | "Shortlist" | "Reject"
}}
}}
JOB DESCRIPTION:
{job_description[:5000]}
RESUME TEXT:
{resume_text[:10000]}
"""
else:
prompt = f"""
Extract structured data from the resume. Return JSON:
{{
"candidate": {{
"name": "string",
"email": "string",
"phone": "string",
"skills": ["list", "of", "skills"],
"summary": "string"
}}
}}
RESUME TEXT:
{resume_text[:10000]}
"""
# 3. GENERATE CONTENT
try:
model = genai.GenerativeModel(model_name)
response = model.generate_content(prompt)
raw = response.text.strip()
clean_json = re.sub(r'```json\s*|```', '', raw, flags=re.MULTILINE).strip()
return json.loads(clean_json)
except Exception as e:
logger.error(f"Analysis failed with model {model_name}: {e}")
return {"error": f"Analysis failed using {model_name}. Detail: {str(e)}"}