nadish1210 commited on
Commit
2790b20
·
verified ·
1 Parent(s): 07c81c0

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +25 -14
model.py CHANGED
@@ -6,7 +6,6 @@ from transformers import pipeline
6
  import pandas as pd
7
  from datetime import datetime
8
  import zipfile
9
- import shutil
10
  import re
11
  import imaplib
12
  import email
@@ -21,6 +20,7 @@ MODELS = {
21
  loaded_models = {}
22
  skills_classifier = None
23
 
 
24
  def get_model(name):
25
  if name not in loaded_models:
26
  loaded_models[name] = SentenceTransformer(MODELS[name])
@@ -29,17 +29,20 @@ def get_model(name):
29
  def get_classifier():
30
  global skills_classifier
31
  if skills_classifier is None:
32
- skills_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 
 
 
33
  return skills_classifier
34
 
35
- # ================== FILE READER ==================
36
  def extract_text(file_path):
37
  ext = os.path.splitext(file_path)[1].lower()
38
 
39
  try:
40
  if ext == ".pdf":
41
  doc = fitz.open(file_path)
42
- text = "\n".join([p.get_text() for p in doc])
43
  doc.close()
44
  return text.strip()
45
 
@@ -52,7 +55,7 @@ def extract_text(file_path):
52
 
53
  return ""
54
 
55
- # ================== GMAIL FETCH ==================
56
  def fetch_from_gmail(email_user, app_password):
57
  mail = imaplib.IMAP4_SSL("imap.gmail.com")
58
  mail.login(email_user, app_password)
@@ -81,12 +84,18 @@ def fetch_from_gmail(email_user, app_password):
81
 
82
  # ================== AI FEATURES ==================
83
  def extract_skills(text):
84
- labels = ["Python", "Machine Learning", "Deep Learning", "SQL", "AWS", "Docker", "Communication"]
 
 
 
85
 
86
  try:
87
  clf = get_classifier()
88
  res = clf(text[:2000], labels, multi_label=True)
89
- return ", ".join([l for l, s in zip(res["labels"], res["scores"]) if s > 0.4])
 
 
 
90
  except:
91
  return "N/A"
92
 
@@ -95,11 +104,11 @@ def extract_qualifications(text):
95
  found = re.findall(pattern, text.lower())
96
  return ", ".join(set(found)).upper() if found else "Not mentioned"
97
 
98
- # ================== MAIN FUNCTION ==================
99
- def screen_resumes(job_desc, files, model_name="Fast (MiniLM)", threshold=0.65,
100
- gmail=None, password=None):
101
 
102
- # Gmail integration
103
  if gmail and password:
104
  files = fetch_from_gmail(gmail, password)
105
 
@@ -107,11 +116,11 @@ def screen_resumes(job_desc, files, model_name="Fast (MiniLM)", threshold=0.65,
107
  job_emb = model.encode(job_desc, convert_to_tensor=True)
108
 
109
  results = []
110
-
111
  os.makedirs("outputs", exist_ok=True)
112
 
113
  for f in files:
114
- # safe file handling
 
115
  if hasattr(f, "read"):
116
  name = f.name
117
  path = f"temp_{name}"
@@ -134,16 +143,18 @@ def screen_resumes(job_desc, files, model_name="Fast (MiniLM)", threshold=0.65,
134
 
135
  results.append({
136
  "Candidate": name,
137
- "Score": round(score * 100, 2),
138
  "Skills": extract_skills(text),
139
  "Qualification": extract_qualifications(text),
140
  "Status": status
141
  })
142
 
 
143
  df = pd.DataFrame(results)
144
  report_path = f"outputs/report_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
145
  df.to_csv(report_path, index=False)
146
 
 
147
  zip_path = None
148
  shortlisted = df[df["Status"] == "Shortlisted"]
149
 
 
6
  import pandas as pd
7
  from datetime import datetime
8
  import zipfile
 
9
  import re
10
  import imaplib
11
  import email
 
20
  loaded_models = {}
21
  skills_classifier = None
22
 
23
+ # ================== LOAD MODEL ==================
24
  def get_model(name):
25
  if name not in loaded_models:
26
  loaded_models[name] = SentenceTransformer(MODELS[name])
 
29
  def get_classifier():
30
  global skills_classifier
31
  if skills_classifier is None:
32
+ skills_classifier = pipeline(
33
+ "zero-shot-classification",
34
+ model="facebook/bart-large-mnli"
35
+ )
36
  return skills_classifier
37
 
38
+ # ================== TEXT EXTRACTION ==================
39
  def extract_text(file_path):
40
  ext = os.path.splitext(file_path)[1].lower()
41
 
42
  try:
43
  if ext == ".pdf":
44
  doc = fitz.open(file_path)
45
+ text = "\n".join([page.get_text() for page in doc])
46
  doc.close()
47
  return text.strip()
48
 
 
55
 
56
  return ""
57
 
58
+ # ================== GMAIL FETCH (OPTIONAL) ==================
59
  def fetch_from_gmail(email_user, app_password):
60
  mail = imaplib.IMAP4_SSL("imap.gmail.com")
61
  mail.login(email_user, app_password)
 
84
 
85
  # ================== AI FEATURES ==================
86
  def extract_skills(text):
87
+ labels = [
88
+ "Python", "Machine Learning", "Deep Learning",
89
+ "SQL", "AWS", "Docker", "Communication"
90
+ ]
91
 
92
  try:
93
  clf = get_classifier()
94
  res = clf(text[:2000], labels, multi_label=True)
95
+
96
+ return ", ".join([
97
+ l for l, s in zip(res["labels"], res["scores"]) if s > 0.4
98
+ ])
99
  except:
100
  return "N/A"
101
 
 
104
  found = re.findall(pattern, text.lower())
105
  return ", ".join(set(found)).upper() if found else "Not mentioned"
106
 
107
+ # ================== MAIN FUNCTION (FIXED NAME) ==================
108
+ def screen_resumes_backend(job_desc, files, model_name="Fast (MiniLM)", threshold=0.65,
109
+ gmail=None, password=None):
110
 
111
+ # Gmail integration (optional)
112
  if gmail and password:
113
  files = fetch_from_gmail(gmail, password)
114
 
 
116
  job_emb = model.encode(job_desc, convert_to_tensor=True)
117
 
118
  results = []
 
119
  os.makedirs("outputs", exist_ok=True)
120
 
121
  for f in files:
122
+
123
+ # handle uploaded files (HF / Gradio)
124
  if hasattr(f, "read"):
125
  name = f.name
126
  path = f"temp_{name}"
 
143
 
144
  results.append({
145
  "Candidate": name,
146
+ "Score (%)": round(score * 100, 2),
147
  "Skills": extract_skills(text),
148
  "Qualification": extract_qualifications(text),
149
  "Status": status
150
  })
151
 
152
+ # save report
153
  df = pd.DataFrame(results)
154
  report_path = f"outputs/report_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
155
  df.to_csv(report_path, index=False)
156
 
157
+ # zip shortlisted
158
  zip_path = None
159
  shortlisted = df[df["Status"] == "Shortlisted"]
160