indhupamula commited on
Commit
37c0d1a
·
verified ·
1 Parent(s): 188eec8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +285 -0
app.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sqlite3
3
+ import re
4
+ import numpy as np
5
+ import pandas as pd
6
+ from PyPDF2 import PdfReader
7
+ from docx import Document
8
+ from sentence_transformers import SentenceTransformer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+ import spacy
11
+ from datetime import datetime
12
+ from fpdf import FPDF
13
+ import hashlib
14
+
15
+ # ---------------------------
16
+ # Load models
17
+ # ---------------------------
18
+ nlp = spacy.load("en_core_web_sm")
19
+ model = SentenceTransformer('all-MiniLM-L6-v2')
20
+
21
+ # ---------------------------
22
+ # SQLite DB setup
23
+ # ---------------------------
24
+ conn = sqlite3.connect('resumes.db', check_same_thread=False)
25
+ cursor = conn.cursor()
26
+
27
+ cursor.execute("""
28
+ CREATE TABLE IF NOT EXISTS users (
29
+ id INTEGER PRIMARY KEY,
30
+ username TEXT UNIQUE,
31
+ password_hash TEXT
32
+ )
33
+ """)
34
+
35
+ cursor.execute("""
36
+ CREATE TABLE IF NOT EXISTS analyses (
37
+ id INTEGER PRIMARY KEY,
38
+ user_id INTEGER,
39
+ resume_text TEXT,
40
+ jd_text TEXT,
41
+ final_score REAL,
42
+ keyword_score REAL,
43
+ semantic_score REAL,
44
+ section_scores TEXT,
45
+ tips TEXT,
46
+ date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
47
+ FOREIGN KEY(user_id) REFERENCES users(id)
48
+ )
49
+ """)
50
+ conn.commit()
51
+
52
+ # ---------------------------
53
+ # Authentication Functions
54
+ # ---------------------------
55
+ def hash_password(password):
56
+ return hashlib.sha256(password.encode()).hexdigest()
57
+
58
+ def signup(username, password):
59
+ try:
60
+ cursor.execute("INSERT INTO users (username, password_hash) VALUES (?,?)",
61
+ (username, hash_password(password)))
62
+ conn.commit()
63
+ return "✅ Signup successful! Please login."
64
+ except sqlite3.IntegrityError:
65
+ return "❌ Username already exists. Try a different one."
66
+
67
+ def login(username, password):
68
+ cursor.execute("SELECT id, password_hash FROM users WHERE username=?", (username,))
69
+ row = cursor.fetchone()
70
+ if row and row[1] == hash_password(password):
71
+ return f"✅ Login successful! User ID: {row[0]}", row[0]
72
+ else:
73
+ return "❌ Invalid username or password", None
74
+
75
+ # ---------------------------
76
+ # Resume Parsing
77
+ # ---------------------------
78
+ def extract_text_from_pdf(file):
79
+ reader = PdfReader(file)
80
+ text = ""
81
+ for page in reader.pages:
82
+ text += page.extract_text() or ""
83
+ return text
84
+
85
+ def extract_text_from_docx(file):
86
+ doc = Document(file)
87
+ text = "\n".join([p.text for p in doc.paragraphs])
88
+ return text
89
+
90
+ def extract_skills(jd_text):
91
+ skills = re.split(r"[,\n;]", jd_text)
92
+ return [s.strip() for s in skills if s.strip()]
93
+
94
+ def split_sections(resume_text):
95
+ sections = {"Education":"","Experience":"","Skills":""}
96
+ edu = re.search(r'(Education|EDUCATION)(.*?)(Experience|EXPERIENCE|Skills|SKILLS|$)', resume_text, re.DOTALL)
97
+ exp = re.search(r'(Experience|EXPERIENCE)(.*?)(Skills|SKILLS|$)', resume_text, re.DOTALL)
98
+ skills = re.search(r'(Skills|SKILLS)(.*)', resume_text, re.DOTALL)
99
+ if edu: sections["Education"] = edu.group(2).strip()
100
+ if exp: sections["Experience"] = exp.group(2).strip()
101
+ if skills: sections["Skills"] = skills.group(2).strip()
102
+ return sections
103
+
104
+ def compute_scores(resume_text, jd_text, required_skills):
105
+ present_skills = [kw for kw in required_skills if kw.lower() in resume_text.lower()]
106
+ keyword_score = len(present_skills)/max(len(required_skills),1)
107
+ res_vec = model.encode(resume_text)
108
+ jd_vec = model.encode(jd_text)
109
+ semantic_score = cosine_similarity([res_vec],[jd_vec])[0][0]
110
+ sections = split_sections(resume_text)
111
+ section_scores = {}
112
+ for sec, text in sections.items():
113
+ sec_present = [kw for kw in required_skills if kw.lower() in text.lower()]
114
+ section_scores[sec] = len(sec_present)/max(len(required_skills),1)
115
+ final_score = 0.6*keyword_score + 0.4*semantic_score
116
+ tips = [f"⚠️ Add '{skill}' to improve ATS match" for skill in required_skills if skill.lower() not in resume_text.lower()]
117
+ return final_score, keyword_score, semantic_score, section_scores, tips
118
+
119
+ # ---------------------------
120
+ # CSV/PDF Export
121
+ # ---------------------------
122
+ def export_csv(df, filename="ats_report.csv"):
123
+ df.to_csv(filename, index=False)
124
+ return filename
125
+
126
+ def export_pdf(df, filename="ats_report.pdf"):
127
+ pdf = FPDF()
128
+ pdf.add_page()
129
+ pdf.set_font("Arial", size=12)
130
+ pdf.cell(200, 10, txt="ATS Resume Screening Report", ln=True, align="C")
131
+ pdf.ln(10)
132
+ for i, row in df.iterrows():
133
+ pdf.cell(200, 10, txt=f"JD {i+1}: {row['JD']}", ln=True)
134
+ pdf.cell(200, 10, txt=f"Final Score: {row['Final Score']}", ln=True)
135
+ pdf.cell(200, 10, txt=f"Keyword Score: {row['Keyword Score']}", ln=True)
136
+ pdf.cell(200, 10, txt=f"Semantic Score: {row['Semantic Score']}", ln=True)
137
+ pdf.cell(200, 10, txt="Section Scores:", ln=True)
138
+ pdf.multi_cell(0, 10, row["Section Scores"])
139
+ pdf.cell(200, 10, txt="Tips:", ln=True)
140
+ pdf.multi_cell(0, 10, row["Tips"])
141
+ pdf.ln(5)
142
+ pdf.output(filename)
143
+ return filename
144
+
145
+ # ---------------------------
146
+ # AI Resume Rewriter
147
+ # ---------------------------
148
+ def ai_resume_rewriter(resume_text, jd_text):
149
+ required_skills = extract_skills(jd_text)
150
+ rewritten = resume_text
151
+ for skill in required_skills:
152
+ if skill.lower() not in resume_text.lower():
153
+ rewritten += f"\n- Experience with {skill}"
154
+ return rewritten
155
+
156
+ # ---------------------------
157
+ # Feedback Generator
158
+ # ---------------------------
159
+ skill_course_mapping = {
160
+ "Python": ["Complete 'Python for Everybody' on Coursera", "Try Python projects on GitHub"],
161
+ "Machine Learning": ["Take 'Machine Learning' by Andrew Ng on Coursera", "Kaggle ML competitions"],
162
+ "Deep Learning": ["DeepLearning.AI TensorFlow Developer Course", "Build neural network projects"],
163
+ "SQL": ["SQL for Data Science - Coursera", "Practice on LeetCode SQL problems"],
164
+ "AWS": ["AWS Certified Solutions Architect - Associate", "AWS Free Tier practice"],
165
+ "TensorFlow": ["TensorFlow in Practice Specialization - Coursera", "Hands-on DL projects"]
166
+ }
167
+
168
+ certification_mapping = {
169
+ "AWS": "AWS Certified Solutions Architect",
170
+ "ML": "Machine Learning by Andrew Ng",
171
+ "Python": "PCAP: Python Certified Associate Programmer",
172
+ "TensorFlow": "TensorFlow Developer Certificate"
173
+ }
174
+
175
+ def generate_feedback(resume_text, jd_text):
176
+ required_skills = extract_skills(jd_text)
177
+ resume_lower = resume_text.lower()
178
+
179
+ missing_skills = [skill for skill in required_skills if skill.lower() not in resume_lower]
180
+ skill_suggestions = []
181
+ cert_suggestions = []
182
+
183
+ for skill in missing_skills:
184
+ if skill in skill_course_mapping:
185
+ skill_suggestions.append(f"{skill}: {', '.join(skill_course_mapping[skill])}")
186
+ if skill in certification_mapping:
187
+ cert_suggestions.append(f"Consider certification: {certification_mapping[skill]}")
188
+
189
+ resume_tips = []
190
+ if "Education" not in resume_text:
191
+ resume_tips.append("Include an Education section if missing.")
192
+ if "Experience" not in resume_text:
193
+ resume_tips.append("Include an Experience section with quantified achievements.")
194
+ if "Skills" not in resume_text:
195
+ resume_tips.append("Add a Skills section highlighting relevant skills.")
196
+ if len(resume_text.split()) < 200:
197
+ resume_tips.append("Consider adding more details to increase resume length and content richness.")
198
+
199
+ feedback_text = "### Missing Skills:\n"
200
+ feedback_text += "\n".join(missing_skills) if missing_skills else "None"
201
+ feedback_text += "\n\n### Suggested Courses:\n"
202
+ feedback_text += "\n".join(skill_suggestions) if skill_suggestions else "No suggestions"
203
+ feedback_text += "\n\n### Suggested Certifications:\n"
204
+ feedback_text += "\n".join(cert_suggestions) if cert_suggestions else "No suggestions"
205
+ feedback_text += "\n\n### Resume Optimization Tips:\n"
206
+ feedback_text += "\n".join(resume_tips) if resume_tips else "Your resume looks well-structured."
207
+
208
+ return feedback_text
209
+
210
+ # ---------------------------
211
+ # Multi-JD Analysis
212
+ # ---------------------------
213
+ def analyze_multi_jd(user_id, resume_file, jd_texts):
214
+ if resume_file.name.endswith(".pdf"):
215
+ resume_text = extract_text_from_pdf(resume_file)
216
+ elif resume_file.name.endswith(".docx"):
217
+ resume_text = extract_text_from_docx(resume_file)
218
+ else:
219
+ return None
220
+
221
+ jd_list = [jd.strip() for jd in jd_texts.split("\n\n") if jd.strip()]
222
+ results = []
223
+ for jd in jd_list:
224
+ required_skills = extract_skills(jd)
225
+ final_score, keyword_score, semantic_score, section_scores, tips = compute_scores(resume_text, jd, required_skills)
226
+ section_scores_str = "\n".join([f"{k}: {v:.2%}" for k,v in section_scores.items()])
227
+ tips_str = "\n".join(tips) if tips else "No suggestions"
228
+ results.append({
229
+ "JD": jd[:50]+"..." if len(jd)>50 else jd,
230
+ "Final Score": f"{final_score:.2%}",
231
+ "Keyword Score": f"{keyword_score:.2%}",
232
+ "Semantic Score": f"{semantic_score:.2%}",
233
+ "Section Scores": section_scores_str,
234
+ "Tips": tips_str
235
+ })
236
+ cursor.execute("""
237
+ INSERT INTO analyses (user_id, resume_text, jd_text, final_score, keyword_score, semantic_score, section_scores, tips)
238
+ VALUES (?,?,?,?,?,?,?,?)""",
239
+ (user_id, resume_text, jd, final_score, keyword_score, semantic_score, str(section_scores), tips_str))
240
+ conn.commit()
241
+
242
+ df = pd.DataFrame(results)
243
+ export_csv(df)
244
+ export_pdf(df)
245
+ return df
246
+
247
+ # ---------------------------
248
+ # Gradio SDK Interface
249
+ # ---------------------------
250
+ def analyze_gradio(resume_file, jd_text, username, password):
251
+ login_msg, user_id = login(username, password)
252
+ if not user_id:
253
+ return None, None, login_msg, ""
254
+
255
+ df = analyze_multi_jd(user_id, resume_file, jd_text)
256
+
257
+ if resume_file.name.endswith(".pdf"):
258
+ resume_text = extract_text_from_pdf(resume_file)
259
+ elif resume_file.name.endswith(".docx"):
260
+ resume_text = extract_text_from_docx(resume_file)
261
+
262
+ feedback = generate_feedback(resume_text, jd_text)
263
+ rewritten_resume = ai_resume_rewriter(resume_text, jd_text)
264
+
265
+ return "ats_report.csv", "ats_report.pdf", feedback, rewritten_resume
266
+
267
+ iface = gr.Interface(
268
+ fn=analyze_gradio,
269
+ inputs=[
270
+ gr.File(label="Upload Resume (PDF/DOCX)"),
271
+ gr.Textbox(label="Paste Job Description(s) (Separate multiple JDs with double line breaks)", lines=10),
272
+ gr.Textbox(label="Username"),
273
+ gr.Textbox(label="Password", type="password")
274
+ ],
275
+ outputs=[
276
+ gr.File(label="Download CSV Report"),
277
+ gr.File(label="Download PDF Report"),
278
+ gr.Textbox(label="Personalized Feedback", lines=15),
279
+ gr.Textbox(label="AI Suggested Resume Revisions", lines=15)
280
+ ],
281
+ title="AI-Powered Resume Screening System",
282
+ description="Upload your resume, paste job descriptions, and get ATS scoring, personalized feedback, and AI suggestions."
283
+ )
284
+
285
+ iface.launch()