Hitika111 commited on
Commit
b22629f
Β·
verified Β·
1 Parent(s): a9ef1b6

Update utils/ai_analyzer.py

Browse files
Files changed (1) hide show
  1. utils/ai_analyzer.py +65 -95
utils/ai_analyzer.py CHANGED
@@ -13,63 +13,44 @@ if api_key:
13
  genai.configure(api_key=api_key)
14
 
15
 
16
- # ── Regex fallbacks ────────────────────────────────────────────────────────────
17
 
18
- def _regex_extract_email(text: str) -> str:
19
- """Extract first email address found in text."""
20
  match = re.search(r'[\w.\-+]+@[\w.\-]+\.[a-zA-Z]{2,}', text)
21
  return match.group(0).strip() if match else ""
22
 
23
- def _regex_extract_phone(text: str) -> str:
24
- """Extract first phone number found in text."""
25
- match = re.search(
26
- r'(\+?\d[\d\s\-().]{7,}\d)',
27
- text
28
- )
29
  return match.group(0).strip() if match else ""
30
 
31
- def _regex_extract_name(text: str) -> str:
32
- """
33
- Heuristic: the candidate's name is usually in the first 5 non-empty lines
34
- as a short (1-4 word) line that is NOT an email/phone/URL/address.
35
- """
36
- skip_patterns = re.compile(
37
- r'(@|http|www|linkedin|github|curriculum|resume|vitae|cv\b|'
38
  r'\d{6,}|\+\d|address|email|phone|mobile|tel:)',
39
  re.IGNORECASE
40
  )
41
  lines = [l.strip() for l in text.splitlines() if l.strip()]
42
  for line in lines[:15]:
43
  words = line.split()
44
- if 1 < len(words) <= 5 and not skip_patterns.search(line):
45
- # Looks like a proper name (each word title-cased or all caps)
46
  if all(w[0].isupper() for w in words if w.isalpha()):
47
  return line
48
  return ""
49
 
50
- def _ensure_fields(result: dict, resume_text: str) -> dict:
51
- """
52
- If AI returned empty/Unknown name or email, try regex fallback.
53
- """
54
  if not result.get("email"):
55
  result["email"] = _regex_extract_email(resume_text)
56
- if result["email"]:
57
- logger.info(f"[Fallback] Email extracted via regex: {result['email']}")
58
-
59
  if not result.get("phone"):
60
  result["phone"] = _regex_extract_phone(resume_text)
61
-
62
  name = result.get("name", "").strip()
63
  if not name or name.lower() in ("unknown", "n/a", ""):
64
- fallback_name = _regex_extract_name(resume_text)
65
- if fallback_name:
66
- result["name"] = fallback_name
67
- logger.info(f"[Fallback] Name extracted via heuristic: {fallback_name}")
68
-
69
  return result
70
 
71
 
72
- # ── Model selection ────────────────────────────────────────────────────────────
73
 
74
  def _get_model():
75
  preferred = [
@@ -85,17 +66,13 @@ def _get_model():
85
  return p
86
  return available[0] if available else None
87
  except Exception as e:
88
- logger.error(f"Model listing failed: {e}")
89
  return "models/gemini-1.5-flash"
90
 
91
 
92
- # ── Main analyzer ──────────────────────────────────────────────────────────────
93
 
94
- def analyze_resume_with_jd(resume_text: str, job_description: str = None, job_title: str = "") -> dict:
95
- """
96
- Analyze a resume against a job description using Gemini AI.
97
- Returns structured JSON with candidate info, score, and skill analysis.
98
- """
99
  if not api_key:
100
  logger.warning("No Gemini API key β€” returning mock data.")
101
  return _mock_analysis(resume_text)
@@ -104,80 +81,73 @@ def analyze_resume_with_jd(resume_text: str, job_description: str = None, job_ti
104
  if not model_name:
105
  return {"error": "No AI models available."}
106
 
107
- prompt = f"""
108
- You are an expert resume parser and AI recruiter. Your FIRST priority is to accurately extract contact information from the resume.
109
-
110
- CRITICAL EXTRACTION RULES:
111
- - "name": Look at the very top of the resume. It is almost always the largest/first text β€” usually the first 1-4 word line. Do NOT return "Unknown".
112
- - "email": Scan the ENTIRE resume for any text matching pattern user@domain.com. It is always present near the top. Do NOT return empty string if you see an email anywhere.
113
- - "phone": Look for any number sequence that looks like a phone number (with country code, dashes, spaces).
114
-
115
- {'Compare the resume against the Job Description to compute a match score.' if job_description else 'Score the resume on overall quality (0-100).'}
116
-
117
- Return ONLY a raw JSON object β€” no markdown fences, no explanation, no extra text before or after:
118
- {{
119
- "name": "<full name from top of resume β€” never Unknown if text exists>",
120
- "email": "<email found in resume β€” search carefully, never leave empty if present>",
121
- "phone": "<phone number or empty string>",
122
- "experience_years": "<e.g. 3 years or Fresher>",
123
- "current_role": "<current or most recent job title>",
124
- "skills": ["<skill1>", "<skill2>"],
125
- "education": "<highest degree and institution>",
126
- "score": <integer 0-100>,
127
- "reasoning": "<2-3 sentences explaining the score>",
128
- "matching_skills": ["<skills in both resume and JD>"],
129
- "missing_skills": ["<skills in JD but NOT in resume>"],
130
- "verdict": "<Interview | Shortlist | Reject>",
131
- "strengths": ["<strength1>", "<strength2>"],
132
- "red_flags": ["<concern1>"]
133
- }}
134
-
135
- {('JOB TITLE: ' + job_title) if job_title else ''}
136
- {('JOB DESCRIPTION:\n' + job_description[:4000]) if job_description else ''}
137
-
138
- RESUME TEXT (parse this carefully):
139
- ---
140
- {resume_text[:8000]}
141
- ---
142
- """
 
143
 
144
  try:
145
  model = genai.GenerativeModel(model_name)
146
  response = model.generate_content(prompt)
147
  raw = response.text.strip()
148
 
149
- # Strip markdown fences if present
150
  clean = re.sub(r'```(?:json)?\s*|\s*```', '', raw, flags=re.MULTILINE).strip()
151
-
152
- # Sometimes the model adds text before the JSON β€” find the first '{'
153
  brace_start = clean.find('{')
154
  brace_end = clean.rfind('}')
155
  if brace_start != -1 and brace_end != -1:
156
  clean = clean[brace_start:brace_end + 1]
157
 
158
  result = json.loads(clean)
159
-
160
- # Always apply regex fallback to catch anything the AI missed
161
  result = _ensure_fields(result, resume_text)
162
-
163
- logger.info(f"Analyzed: name={result.get('name')}, email={result.get('email')}, score={result.get('score')}")
164
  return result
165
 
166
  except json.JSONDecodeError as e:
167
- logger.error(f"JSON parse error: {e}\nRaw response: {raw[:600]}")
168
- # Return regex-extracted fields so at least name/email work
169
- fallback = {
170
- "error": "AI response parsing failed.",
171
- "score": 0,
172
  "name": _regex_extract_name(resume_text) or "Unknown",
173
  "email": _regex_extract_email(resume_text),
174
  "phone": _regex_extract_phone(resume_text),
175
  "matching_skills": [], "missing_skills": [],
176
  "reasoning": "", "verdict": "Reject"
177
  }
178
- return fallback
179
  except Exception as e:
180
- logger.error(f"AI analysis failed: {e}")
181
  return {
182
  "error": str(e), "score": 0,
183
  "name": _regex_extract_name(resume_text) or "Unknown",
@@ -187,20 +157,20 @@ RESUME TEXT (parse this carefully):
187
  "reasoning": "", "verdict": "Reject"
188
  }
189
 
190
- def _mock_analysis(resume_text: str) -> dict:
191
- """Fallback mock when no API key is set (for local dev/demo)."""
192
  import random
193
  score = random.randint(45, 95)
194
  return {
195
- "name": "Demo Candidate",
196
- "email": "demo@example.com",
197
- "phone": "+91-9876543210",
198
  "experience_years": "3 years",
199
  "current_role": "Software Engineer",
200
  "skills": ["Python", "Flask", "SQL", "REST APIs"],
201
  "education": "B.Tech Computer Science",
202
  "score": score,
203
- "reasoning": f"Candidate scored {score}/100 based on skills and experience alignment.",
204
  "matching_skills": ["Python", "Flask"],
205
  "missing_skills": ["AWS", "Docker"],
206
  "verdict": "Interview" if score >= 80 else "Shortlist" if score >= 60 else "Reject",
 
13
  genai.configure(api_key=api_key)
14
 
15
 
16
+ # ── Regex fallbacks ─────────────────────────────────────────────────────────
17
 
18
+ def _regex_extract_email(text):
 
19
  match = re.search(r'[\w.\-+]+@[\w.\-]+\.[a-zA-Z]{2,}', text)
20
  return match.group(0).strip() if match else ""
21
 
22
+ def _regex_extract_phone(text):
23
+ match = re.search(r'(\+?\d[\d\s\-().]{7,}\d)', text)
 
 
 
 
24
  return match.group(0).strip() if match else ""
25
 
26
+ def _regex_extract_name(text):
27
+ skip = re.compile(
28
+ r'(@|http|www|linkedin|github|curriculum|resume|vitae|\bcv\b|'
 
 
 
 
29
  r'\d{6,}|\+\d|address|email|phone|mobile|tel:)',
30
  re.IGNORECASE
31
  )
32
  lines = [l.strip() for l in text.splitlines() if l.strip()]
33
  for line in lines[:15]:
34
  words = line.split()
35
+ if 1 < len(words) <= 5 and not skip.search(line):
 
36
  if all(w[0].isupper() for w in words if w.isalpha()):
37
  return line
38
  return ""
39
 
40
+ def _ensure_fields(result, resume_text):
 
 
 
41
  if not result.get("email"):
42
  result["email"] = _regex_extract_email(resume_text)
 
 
 
43
  if not result.get("phone"):
44
  result["phone"] = _regex_extract_phone(resume_text)
 
45
  name = result.get("name", "").strip()
46
  if not name or name.lower() in ("unknown", "n/a", ""):
47
+ fallback = _regex_extract_name(resume_text)
48
+ if fallback:
49
+ result["name"] = fallback
 
 
50
  return result
51
 
52
 
53
+ # ── Model selection ──────────────────────────────────────────────────────────
54
 
55
  def _get_model():
56
  preferred = [
 
66
  return p
67
  return available[0] if available else None
68
  except Exception as e:
69
+ logger.error("Model listing failed: %s", e)
70
  return "models/gemini-1.5-flash"
71
 
72
 
73
+ # ── Main analyzer ────────────────────────────────────────────────────────────
74
 
75
+ def analyze_resume_with_jd(resume_text, job_description=None, job_title=""):
 
 
 
 
76
  if not api_key:
77
  logger.warning("No Gemini API key β€” returning mock data.")
78
  return _mock_analysis(resume_text)
 
81
  if not model_name:
82
  return {"error": "No AI models available."}
83
 
84
+ # Build prompt pieces WITHOUT backslashes inside f-string expressions
85
+ jd_title_line = ("JOB TITLE: " + job_title) if job_title else ""
86
+ jd_body_line = ("JOB DESCRIPTION:\n" + job_description[:4000]) if job_description else ""
87
+ compare_line = ("Compare the resume against the Job Description to compute a match score."
88
+ if job_description else "Score the resume on overall quality (0-100).")
89
+
90
+ prompt = (
91
+ "You are an expert resume parser and AI recruiter. "
92
+ "Your FIRST priority is to accurately extract contact information.\n\n"
93
+ "CRITICAL EXTRACTION RULES:\n"
94
+ '- "name": First 1-4 word line at top of resume. Never return Unknown.\n'
95
+ '- "email": Scan entire resume for user@domain.com pattern. Never leave empty if present.\n'
96
+ '- "phone": Any phone-like digit sequence.\n\n'
97
+ + compare_line + "\n\n"
98
+ "Return ONLY a raw JSON object β€” no markdown, no extra text:\n"
99
+ "{\n"
100
+ ' "name": "<full name>",\n'
101
+ ' "email": "<email>",\n'
102
+ ' "phone": "<phone or empty>",\n'
103
+ ' "experience_years": "<e.g. 3 years>",\n'
104
+ ' "current_role": "<job title>",\n'
105
+ ' "skills": ["skill1", "skill2"],\n'
106
+ ' "education": "<degree and institution>",\n'
107
+ ' "score": <0-100>,\n'
108
+ ' "reasoning": "<2-3 sentences>",\n'
109
+ ' "matching_skills": ["skill"],\n'
110
+ ' "missing_skills": ["skill"],\n'
111
+ ' "verdict": "<Interview|Shortlist|Reject>",\n'
112
+ ' "strengths": ["strength"],\n'
113
+ ' "red_flags": ["concern"]\n'
114
+ "}\n\n"
115
+ + jd_title_line + "\n"
116
+ + jd_body_line + "\n\n"
117
+ "RESUME TEXT:\n---\n"
118
+ + resume_text[:8000]
119
+ + "\n---"
120
+ )
121
 
122
  try:
123
  model = genai.GenerativeModel(model_name)
124
  response = model.generate_content(prompt)
125
  raw = response.text.strip()
126
 
 
127
  clean = re.sub(r'```(?:json)?\s*|\s*```', '', raw, flags=re.MULTILINE).strip()
 
 
128
  brace_start = clean.find('{')
129
  brace_end = clean.rfind('}')
130
  if brace_start != -1 and brace_end != -1:
131
  clean = clean[brace_start:brace_end + 1]
132
 
133
  result = json.loads(clean)
 
 
134
  result = _ensure_fields(result, resume_text)
135
+ logger.info("Analyzed: name=%s email=%s score=%s",
136
+ result.get('name'), result.get('email'), result.get('score'))
137
  return result
138
 
139
  except json.JSONDecodeError as e:
140
+ logger.error("JSON parse error: %s", e)
141
+ return {
142
+ "error": "AI response parsing failed.", "score": 0,
 
 
143
  "name": _regex_extract_name(resume_text) or "Unknown",
144
  "email": _regex_extract_email(resume_text),
145
  "phone": _regex_extract_phone(resume_text),
146
  "matching_skills": [], "missing_skills": [],
147
  "reasoning": "", "verdict": "Reject"
148
  }
 
149
  except Exception as e:
150
+ logger.error("AI analysis failed: %s", e)
151
  return {
152
  "error": str(e), "score": 0,
153
  "name": _regex_extract_name(resume_text) or "Unknown",
 
157
  "reasoning": "", "verdict": "Reject"
158
  }
159
 
160
+
161
+ def _mock_analysis(resume_text):
162
  import random
163
  score = random.randint(45, 95)
164
  return {
165
+ "name": _regex_extract_name(resume_text) or "Demo Candidate",
166
+ "email": _regex_extract_email(resume_text) or "demo@example.com",
167
+ "phone": _regex_extract_phone(resume_text) or "+91-9876543210",
168
  "experience_years": "3 years",
169
  "current_role": "Software Engineer",
170
  "skills": ["Python", "Flask", "SQL", "REST APIs"],
171
  "education": "B.Tech Computer Science",
172
  "score": score,
173
+ "reasoning": "Candidate scored %d/100 based on skills and experience." % score,
174
  "matching_skills": ["Python", "Flask"],
175
  "missing_skills": ["AWS", "Docker"],
176
  "verdict": "Interview" if score >= 80 else "Shortlist" if score >= 60 else "Reject",