nadish1210 commited on
Commit
bbd6234
·
verified ·
1 Parent(s): 3566fce

Create model.py

Browse files
Files changed (1) hide show
  1. model.py +157 -0
model.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz
3
+ from docx import Document
4
+ from sentence_transformers import SentenceTransformer, util
5
+ from transformers import pipeline
6
+ import pandas as pd
7
+ from datetime import datetime
8
+ import zipfile
9
+ import shutil
10
+ import re
11
+ import imaplib
12
+ import email
13
+
14
+ # ================== MODELS ==================
15
+ MODELS = {
16
+ "Fast (MiniLM)": "sentence-transformers/all-MiniLM-L6-v2",
17
+ "Balanced (Recommended)": "sentence-transformers/all-mpnet-base-v2",
18
+ "High Accuracy": "sentence-transformers/multi-qa-mpnet-base-dot-v1"
19
+ }
20
+
21
+ loaded_models = {}
22
+ skills_classifier = None
23
+
24
+ def get_model(name):
25
+ if name not in loaded_models:
26
+ loaded_models[name] = SentenceTransformer(MODELS[name])
27
+ return loaded_models[name]
28
+
29
+ def get_classifier():
30
+ global skills_classifier
31
+ if skills_classifier is None:
32
+ skills_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
33
+ return skills_classifier
34
+
35
+ # ================== FILE READER ==================
36
+ def extract_text(file_path):
37
+ ext = os.path.splitext(file_path)[1].lower()
38
+
39
+ try:
40
+ if ext == ".pdf":
41
+ doc = fitz.open(file_path)
42
+ text = "\n".join([p.get_text() for p in doc])
43
+ doc.close()
44
+ return text.strip()
45
+
46
+ elif ext in [".docx", ".doc"]:
47
+ doc = Document(file_path)
48
+ return "\n".join([p.text for p in doc.paragraphs]).strip()
49
+
50
+ except:
51
+ return ""
52
+
53
+ return ""
54
+
55
+ # ================== GMAIL FETCH ==================
56
+ def fetch_from_gmail(email_user, app_password):
57
+ mail = imaplib.IMAP4_SSL("imap.gmail.com")
58
+ mail.login(email_user, app_password)
59
+ mail.select("inbox")
60
+
61
+ result, data = mail.search(None, '(SUBJECT "resume")')
62
+ ids = data[0].split()
63
+
64
+ files = []
65
+
66
+ for i in ids[-10:]:
67
+ result, msg_data = mail.fetch(i, "(RFC822)")
68
+ msg = email.message_from_bytes(msg_data[0][1])
69
+
70
+ for part in msg.walk():
71
+ if part.get_content_disposition() == "attachment":
72
+ filename = part.get_filename()
73
+
74
+ if filename:
75
+ path = f"temp_{filename}"
76
+ with open(path, "wb") as f:
77
+ f.write(part.get_payload(decode=True))
78
+ files.append(path)
79
+
80
+ return files
81
+
82
+ # ================== AI FEATURES ==================
83
+ def extract_skills(text):
84
+ labels = ["Python", "Machine Learning", "Deep Learning", "SQL", "AWS", "Docker", "Communication"]
85
+
86
+ try:
87
+ clf = get_classifier()
88
+ res = clf(text[:2000], labels, multi_label=True)
89
+ return ", ".join([l for l, s in zip(res["labels"], res["scores"]) if s > 0.4])
90
+ except:
91
+ return "N/A"
92
+
93
+ def extract_qualifications(text):
94
+ pattern = r'\b(bba|bs|bsc|ba|mba|msc|phd|bachelor|master)\b'
95
+ found = re.findall(pattern, text.lower())
96
+ return ", ".join(set(found)).upper() if found else "Not mentioned"
97
+
98
+ # ================== MAIN FUNCTION ==================
99
+ def screen_resumes(job_desc, files, model_name="Fast (MiniLM)", threshold=0.65,
100
+ gmail=None, password=None):
101
+
102
+ # Gmail integration
103
+ if gmail and password:
104
+ files = fetch_from_gmail(gmail, password)
105
+
106
+ model = get_model(model_name)
107
+ job_emb = model.encode(job_desc, convert_to_tensor=True)
108
+
109
+ results = []
110
+
111
+ os.makedirs("outputs", exist_ok=True)
112
+
113
+ for f in files:
114
+ # safe file handling
115
+ if hasattr(f, "read"):
116
+ name = f.name
117
+ path = f"temp_{name}"
118
+ with open(path, "wb") as x:
119
+ x.write(f.read())
120
+ fpath = path
121
+ else:
122
+ fpath = f
123
+ name = os.path.basename(f)
124
+
125
+ text = extract_text(fpath)
126
+
127
+ if len(text) < 50:
128
+ continue
129
+
130
+ emb = model.encode(text, convert_to_tensor=True)
131
+ score = util.cos_sim(job_emb, emb)[0][0].item()
132
+
133
+ status = "Shortlisted" if score >= threshold else "Rejected"
134
+
135
+ results.append({
136
+ "Candidate": name,
137
+ "Score": round(score * 100, 2),
138
+ "Skills": extract_skills(text),
139
+ "Qualification": extract_qualifications(text),
140
+ "Status": status
141
+ })
142
+
143
+ df = pd.DataFrame(results)
144
+ report_path = f"outputs/report_{datetime.now().strftime('%Y%m%d_%H%M')}.csv"
145
+ df.to_csv(report_path, index=False)
146
+
147
+ zip_path = None
148
+ shortlisted = df[df["Status"] == "Shortlisted"]
149
+
150
+ if len(shortlisted) > 0:
151
+ zip_path = "outputs/shortlisted.zip"
152
+ with zipfile.ZipFile(zip_path, "w") as z:
153
+ for f in files:
154
+ if os.path.exists(f):
155
+ z.write(f)
156
+
157
+ return results, report_path, zip_path