indhupamula commited on
Commit
a7d4e72
·
verified ·
1 Parent(s): 75fa1b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -22
app.py CHANGED
@@ -1,33 +1,214 @@
1
  import gradio as gr
 
 
2
  import pandas as pd
3
  from PyPDF2 import PdfReader
 
4
  from sentence_transformers import SentenceTransformer
 
5
  import spacy
 
 
6
 
7
- # Load model
8
- nlp = spacy.load("en_core_web_sm")
9
- embedder = SentenceTransformer('all-MiniLM-L6-v2') # lightweight embedding model
 
 
 
 
 
10
 
11
- def extract_text(file):
12
- if file.name.endswith(".pdf"):
13
- reader = PdfReader(file.name)
 
 
 
 
 
 
14
  text = ""
15
  for page in reader.pages:
16
- text += page.extract_text()
 
 
 
 
 
 
 
 
17
  return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  else:
19
- return "Unsupported file type"
20
-
21
- def analyze_text(text):
22
- doc = nlp(text)
23
- sentences = [sent.text for sent in doc.sents]
24
- embeddings = embedder.encode(sentences)
25
- return "\n".join(sentences[:5]) # first 5 sentences for demo
26
-
27
- with gr.Blocks() as demo:
28
- gr.Markdown("# PDF Text Extractor & Analyzer")
29
- pdf_input = gr.File(label="Upload PDF")
30
- output_text = gr.Textbox(label="Extracted Text")
31
- pdf_input.upload(extract_text, pdf_input, output_text)
32
-
33
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import re
3
+ import numpy as np
4
  import pandas as pd
5
  from PyPDF2 import PdfReader
6
+ from docx import Document
7
  from sentence_transformers import SentenceTransformer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
  import spacy
10
+ from fpdf import FPDF
11
+ import subprocess
12
 
13
+ # ---------------------------
14
+ # Load SpaCy model (runtime download if needed)
15
+ # ---------------------------
16
+ try:
17
+ nlp = spacy.load("en_core_web_sm")
18
+ except OSError:
19
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
20
+ nlp = spacy.load("en_core_web_sm")
21
 
22
+ # Load sentence-transformers model
23
+ model = SentenceTransformer('all-MiniLM-L6-v2')
24
+
25
+ # ---------------------------
26
+ # Resume Parsing Functions
27
+ # ---------------------------
28
+ def extract_text_from_pdf(file):
29
+ try:
30
+ reader = PdfReader(file)
31
  text = ""
32
  for page in reader.pages:
33
+ text += page.extract_text() or ""
34
+ return text
35
+ except:
36
+ return ""
37
+
38
+ def extract_text_from_docx(file):
39
+ try:
40
+ doc = Document(file)
41
+ text = "\n".join([p.text for p in doc.paragraphs])
42
  return text
43
+ except:
44
+ return ""
45
+
46
+ def extract_skills(jd_text):
47
+ skills = re.split(r"[,\n;]", jd_text)
48
+ return [s.strip() for s in skills if s.strip()]
49
+
50
+ def split_sections(resume_text):
51
+ sections = {"Education":"","Experience":"","Skills":""}
52
+ try:
53
+ edu = re.search(r'(Education|EDUCATION)(.*?)(Experience|EXPERIENCE|Skills|SKILLS|$)', resume_text, re.DOTALL)
54
+ exp = re.search(r'(Experience|EXPERIENCE)(.*?)(Skills|SKILLS|$)', resume_text, re.DOTALL)
55
+ skills = re.search(r'(Skills|SKILLS)(.*)', resume_text, re.DOTALL)
56
+ if edu: sections["Education"] = edu.group(2).strip()
57
+ if exp: sections["Experience"] = exp.group(2).strip()
58
+ if skills: sections["Skills"] = skills.group(2).strip()
59
+ except:
60
+ pass
61
+ return sections
62
+
63
+ def compute_scores(resume_text, jd_text, required_skills):
64
+ try:
65
+ present_skills = [kw for kw in required_skills if kw.lower() in resume_text.lower()]
66
+ keyword_score = len(present_skills)/max(len(required_skills),1)
67
+ res_vec = model.encode(resume_text)
68
+ jd_vec = model.encode(jd_text)
69
+ semantic_score = cosine_similarity([res_vec],[jd_vec])[0][0]
70
+ sections = split_sections(resume_text)
71
+ section_scores = {}
72
+ for sec, text in sections.items():
73
+ sec_present = [kw for kw in required_skills if kw.lower() in text.lower()]
74
+ section_scores[sec] = len(sec_present)/max(len(required_skills),1)
75
+ final_score = 0.6*keyword_score + 0.4*semantic_score
76
+ tips = [f"⚠️ Add '{skill}' to improve ATS match" for skill in required_skills if skill.lower() not in resume_text.lower()]
77
+ return final_score, keyword_score, semantic_score, section_scores, tips
78
+ except:
79
+ return 0,0,0,{"Education":0,"Experience":0,"Skills":0},[]
80
+
81
+ # ---------------------------
82
+ # CSV & PDF Export
83
+ # ---------------------------
84
+ def export_csv(df, filename="ats_report.csv"):
85
+ try:
86
+ df.to_csv(filename, index=False)
87
+ except:
88
+ pass
89
+ return filename
90
+
91
+ def export_pdf(df, filename="ats_report.pdf"):
92
+ try:
93
+ pdf = FPDF()
94
+ pdf.add_page()
95
+ pdf.set_font("Arial", size=12)
96
+ pdf.cell(200, 10, txt="ATS Resume Screening Report", ln=True, align="C")
97
+ pdf.ln(10)
98
+ for i, row in df.iterrows():
99
+ pdf.cell(200, 10, txt=f"JD {i+1}: {row['JD']}", ln=True)
100
+ pdf.cell(200, 10, txt=f"Final Score: {row['Final Score']}", ln=True)
101
+ pdf.cell(200, 10, txt=f"Keyword Score: {row['Keyword Score']}", ln=True)
102
+ pdf.cell(200, 10, txt=f"Semantic Score: {row['Semantic Score']}", ln=True)
103
+ pdf.cell(200, 10, txt="Section Scores:", ln=True)
104
+ pdf.multi_cell(0, 10, row["Section Scores"])
105
+ pdf.cell(200, 10, txt="Tips:", ln=True)
106
+ pdf.multi_cell(0, 10, row["Tips"])
107
+ pdf.ln(5)
108
+ pdf.output(filename)
109
+ except:
110
+ pass
111
+ return filename
112
+
113
+ # ---------------------------
114
+ # AI Resume Rewriter & Feedback
115
+ # ---------------------------
116
+ def ai_resume_rewriter(resume_text, jd_text):
117
+ required_skills = extract_skills(jd_text)
118
+ missing_skills = [skill for skill in required_skills if skill.lower() not in resume_text.lower()]
119
+ rewritten = resume_text
120
+ if missing_skills:
121
+ rewritten += "\n\n### Suggested Skills to Add:\n" + "\n".join([f"- {s}" for s in missing_skills])
122
+ return rewritten
123
+
124
+ skill_course_mapping = {
125
+ "Python": ["Complete 'Python for Everybody' on Coursera", "Try Python projects on GitHub"],
126
+ "Machine Learning": ["Take 'Machine Learning' by Andrew Ng on Coursera", "Kaggle ML competitions"],
127
+ "Deep Learning": ["DeepLearning.AI TensorFlow Developer Course", "Build neural network projects"],
128
+ "SQL": ["SQL for Data Science - Coursera", "Practice on LeetCode SQL problems"],
129
+ "AWS": ["AWS Certified Solutions Architect - Associate", "AWS Free Tier practice"],
130
+ "TensorFlow": ["TensorFlow in Practice Specialization - Coursera", "Hands-on DL projects"]
131
+ }
132
+
133
+ certification_mapping = {
134
+ "AWS": "AWS Certified Solutions Architect",
135
+ "ML": "Machine Learning by Andrew Ng",
136
+ "Python": "PCAP: Python Certified Associate Programmer",
137
+ "TensorFlow": "TensorFlow Developer Certificate"
138
+ }
139
+
140
+ def generate_feedback(resume_text, jd_text):
141
+ required_skills = extract_skills(jd_text)
142
+ resume_lower = resume_text.lower()
143
+ missing_skills = [skill for skill in required_skills if skill.lower() not in resume_lower]
144
+ skill_suggestions = [f"{s}: {', '.join(skill_course_mapping[s])}" for s in missing_skills if s in skill_course_mapping]
145
+ cert_suggestions = [f"Consider certification: {certification_mapping[s]}" for s in missing_skills if s in certification_mapping]
146
+ resume_tips = []
147
+ if "Education" not in resume_text:
148
+ resume_tips.append("Include an Education section if missing.")
149
+ if "Experience" not in resume_text:
150
+ resume_tips.append("Include an Experience section with quantified achievements.")
151
+ if "Skills" not in resume_text:
152
+ resume_tips.append("Add a Skills section highlighting relevant skills.")
153
+ if len(resume_text.split()) < 200:
154
+ resume_tips.append("Consider adding more details to increase resume length and content richness.")
155
+ feedback_text = "### Missing Skills:\n" + ("\n".join(missing_skills) if missing_skills else "None")
156
+ feedback_text += "\n\n### Suggested Courses:\n" + ("\n".join(skill_suggestions) if skill_suggestions else "No suggestions")
157
+ feedback_text += "\n\n### Suggested Certifications:\n" + ("\n".join(cert_suggestions) if cert_suggestions else "No suggestions")
158
+ feedback_text += "\n\n### Resume Optimization Tips:\n" + ("\n".join(resume_tips) if resume_tips else "Your resume looks well-structured.")
159
+ return feedback_text
160
+
161
+ # ---------------------------
162
+ # Multi-JD Analysis
163
+ # ---------------------------
164
+ def analyze_multi_jd(resume_file, jd_texts):
165
+ file_ext = resume_file.name.split('.')[-1].lower()
166
+ if file_ext == "pdf":
167
+ resume_text = extract_text_from_pdf(resume_file)
168
+ elif file_ext == "docx":
169
+ resume_text = extract_text_from_docx(resume_file)
170
  else:
171
+ resume_text = ""
172
+
173
+ jd_list = [jd.strip() for jd in jd_texts.split("\n\n") if jd.strip()]
174
+ results = []
175
+ for jd in jd_list:
176
+ required_skills = extract_skills(jd)
177
+ final_score, keyword_score, semantic_score, section_scores, tips = compute_scores(resume_text, jd, required_skills)
178
+ section_scores_str = "\n".join([f"{k}: {v:.2%}" for k,v in section_scores.items()])
179
+ tips_str = "\n".join(tips) if tips else "No suggestions"
180
+ results.append({
181
+ "JD": jd[:50]+"..." if len(jd)>50 else jd,
182
+ "Final Score": f"{final_score:.2%}",
183
+ "Keyword Score": f"{keyword_score:.2%}",
184
+ "Semantic Score": f"{semantic_score:.2%}",
185
+ "Section Scores": section_scores_str,
186
+ "Tips": tips_str
187
+ })
188
+ df = pd.DataFrame(results)
189
+ export_csv(df)
190
+ export_pdf(df)
191
+ feedback = generate_feedback(resume_text, jd_texts)
192
+ rewritten_resume = ai_resume_rewriter(resume_text, jd_texts)
193
+ return "ats_report.csv", "ats_report.pdf", feedback, rewritten_resume
194
+
195
+ # ---------------------------
196
+ # Gradio Interface
197
+ # ---------------------------
198
+ iface = gr.Interface(
199
+ fn=analyze_multi_jd,
200
+ inputs=[
201
+ gr.File(label="Upload Resume (PDF/DOCX)"),
202
+ gr.Textbox(label="Paste Job Description(s) (Separate multiple JDs with double line breaks)", lines=10)
203
+ ],
204
+ outputs=[
205
+ gr.File(label="Download CSV Report"),
206
+ gr.File(label="Download PDF Report"),
207
+ gr.Textbox(label="Personalized Feedback", lines=15),
208
+ gr.Textbox(label="AI Suggested Resume Revisions", lines=15)
209
+ ],
210
+ title="AI-Powered Resume Screening System",
211
+ description="Upload your resume, paste job descriptions, and get ATS scoring, personalized feedback, and AI suggestions."
212
+ )
213
+
214
+ iface.launch()