LovnishVerma commited on
Commit
29fc8f7
·
verified ·
1 Parent(s): 6dab387

Update parser_logic.py

Browse files
Files changed (1) hide show
  1. parser_logic.py +72 -38
parser_logic.py CHANGED
@@ -6,66 +6,100 @@ import fitz # PyMuPDF
6
  import google.generativeai as genai
7
  from dotenv import load_dotenv
8
 
9
- # Configure Logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
  load_dotenv()
14
 
15
- # Secure Configuration
16
  api_key = os.getenv("GEMINI_API_KEY")
17
  if not api_key:
18
- logger.error("GEMINI_API_KEY not found in environment variables.")
19
  raise ValueError("GEMINI_API_KEY is missing.")
20
 
21
  genai.configure(api_key=api_key)
22
- model = genai.GenerativeModel('gemini-1.5-flash')
23
 
24
  def extract_text_from_stream(file_bytes: bytes) -> str:
25
- """Extracts raw text content from PDF bytes directly in memory."""
26
  text = ""
27
  try:
28
- # stream=file_bytes tells PyMuPDF to read from memory, not disk
29
  with fitz.open(stream=file_bytes, filetype="pdf") as doc:
30
  for page in doc:
31
  text += page.get_text()
32
  except Exception as e:
33
  logger.error(f"PDF Extraction Error: {e}")
34
- raise ValueError("Failed to extract text from PDF. File may be corrupted.")
35
  return text
36
 
37
- def parse_resume_with_ai(resume_text: str) -> dict:
38
- """Uses GenAI to transform unstructured text into JSON."""
 
 
39
 
40
- # Prompt Engineering: Added instructions for "null" values to keep schema consistent
41
- prompt = f"""
42
- Acting as an expert recruiter, extract the following data from this resume text:
43
- - name (string)
44
- - email (string)
45
- - phone (string)
46
- - skills (array of strings)
47
- - summary (string, max 2 sentences)
48
-
49
- If a field is not found, return null or an empty list.
50
- Return strictly valid JSON. Do not include markdown formatting.
51
-
52
- Resume Text:
53
- {resume_text[:10000]}
54
- """
55
- # Truncate text to 10k chars to avoid token limits if user uploads a book
56
 
57
- try:
58
- response = model.generate_content(prompt)
 
 
59
 
60
- # Robust Cleaning: Remove Markdown, newlines, and non-json text
61
- raw_output = response.text.strip()
62
- # Remove ```json and ``` identifiers if present
63
- clean_json = re.sub(r'```json\s*|```', '', raw_output, flags=re.MULTILINE).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- return json.loads(clean_json)
66
- except json.JSONDecodeError as e:
67
- logger.error(f"JSON Decode Error. Raw AI Output: {response.text}")
68
- return {"error": "AI response was not valid JSON", "raw_output": response.text}
69
- except Exception as e:
70
- logger.error(f"AI Processing Error: {e}")
71
- return {"error": f"AI Processing failed: {str(e)}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import google.generativeai as genai
7
  from dotenv import load_dotenv
8
 
 
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
  load_dotenv()
13
 
 
14
  api_key = os.getenv("GEMINI_API_KEY")
15
  if not api_key:
 
16
  raise ValueError("GEMINI_API_KEY is missing.")
17
 
18
  genai.configure(api_key=api_key)
 
19
 
20
  def extract_text_from_stream(file_bytes: bytes) -> str:
 
21
  text = ""
22
  try:
 
23
  with fitz.open(stream=file_bytes, filetype="pdf") as doc:
24
  for page in doc:
25
  text += page.get_text()
26
  except Exception as e:
27
  logger.error(f"PDF Extraction Error: {e}")
28
+ raise ValueError("Failed to extract text from PDF.")
29
  return text
30
 
31
+ def analyze_resume(resume_text: str, job_description: str = None) -> dict:
32
+ """
33
+ Analyzes resume. If JD is provided, performs matching.
34
+ """
35
 
36
+ # Base prompt (Extraction only)
37
+ base_instructions = """
38
+ Extract structured data from the resume.
39
+ """
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Extended prompt (Matching)
42
+ if job_description:
43
+ prompt = f"""
44
+ Act as a strict AI Recruiter. Compare the Resume against the Job Description.
45
 
46
+ RETURN JSON ONLY with this exact structure:
47
+ {{
48
+ "candidate": {{
49
+ "name": "string",
50
+ "email": "string",
51
+ "phone": "string",
52
+ "skills": ["list", "of", "candidate", "skills"],
53
+ "experience_years": "string or null"
54
+ }},
55
+ "match_analysis": {{
56
+ "score": integer_0_to_100,
57
+ "reasoning": "brief summary of why this score was given",
58
+ "matching_skills": ["skills in both resume and JD"],
59
+ "missing_skills": ["skills in JD but NOT in resume"],
60
+ "verdict": "Interview" | "Shortlist" | "Reject"
61
+ }}
62
+ }}
63
+
64
+ JOB DESCRIPTION:
65
+ {job_description[:5000]}
66
+
67
+ RESUME TEXT:
68
+ {resume_text[:10000]}
69
+ """
70
+ else:
71
+ # Fallback to simple extraction if no JD
72
+ prompt = f"""
73
+ Extract structured data from the resume. Return JSON:
74
+ {{
75
+ "candidate": {{
76
+ "name": "string",
77
+ "email": "string",
78
+ "phone": "string",
79
+ "skills": ["list", "of", "skills"],
80
+ "summary": "string"
81
+ }}
82
+ }}
83
 
84
+ RESUME TEXT:
85
+ {resume_text[:10000]}
86
+ """
87
+
88
+ # Model Strategy: Try Flash first, fallback to Pro
89
+ models = ['gemini-1.5-flash', 'gemini-pro']
90
+
91
+ for model_name in models:
92
+ try:
93
+ model = genai.GenerativeModel(model_name)
94
+ response = model.generate_content(prompt)
95
+
96
+ # Clean JSON
97
+ raw = response.text.strip()
98
+ clean_json = re.sub(r'```json\s*|```', '', raw, flags=re.MULTILINE).strip()
99
+ return json.loads(clean_json)
100
+
101
+ except Exception as e:
102
+ logger.warning(f"Model {model_name} failed: {e}")
103
+ if model_name == models[-1]:
104
+ return {"error": f"Analysis failed. Detail: {str(e)}"}
105
+ continue