indhupamula commited on
Commit
f7e38ff
·
verified ·
1 Parent(s): f86db50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -206
app.py CHANGED
@@ -1,214 +1,33 @@
1
  import gradio as gr
2
- import sqlite3
3
- import re
4
- import numpy as np
5
  import pandas as pd
6
  from PyPDF2 import PdfReader
7
- from docx import Document
8
  from sentence_transformers import SentenceTransformer
9
- from sklearn.metrics.pairwise import cosine_similarity
10
  import spacy
11
- from datetime import datetime
12
- from fpdf import FPDF
13
- import hashlib
14
- import subprocess
15
 
16
- # ---------------------------
17
- # Load SpaCy model
18
- # ---------------------------
19
- try:
20
- nlp = spacy.load("en_core_web_sm")
21
- except OSError:
22
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
23
- nlp = spacy.load("en_core_web_sm")
24
-
25
- # ---------------------------
26
- # Load Sentence Transformer
27
- # ---------------------------
28
- model = SentenceTransformer('all-MiniLM-L6-v2')
29
-
30
- # ---------------------------
31
- # SQLite DB setup
32
- # ---------------------------
33
- conn = sqlite3.connect('resumes.db', check_same_thread=False)
34
- cursor = conn.cursor()
35
-
36
- cursor.execute("""
37
- CREATE TABLE IF NOT EXISTS users (
38
- id INTEGER PRIMARY KEY,
39
- username TEXT UNIQUE,
40
- password_hash TEXT
41
- )
42
- """)
43
-
44
- cursor.execute("""
45
- CREATE TABLE IF NOT EXISTS analyses (
46
- id INTEGER PRIMARY KEY,
47
- user_id INTEGER,
48
- resume_text TEXT,
49
- jd_text TEXT,
50
- final_score REAL,
51
- keyword_score REAL,
52
- semantic_score REAL,
53
- section_scores TEXT,
54
- tips TEXT,
55
- date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
56
- FOREIGN KEY(user_id) REFERENCES users(id)
57
- )
58
- """)
59
- conn.commit()
60
-
61
- # ---------------------------
62
- # Authentication Functions
63
- # ---------------------------
64
- def hash_password(password):
65
- return hashlib.sha256(password.encode()).hexdigest()
66
-
67
- def signup(username, password):
68
- try:
69
- cursor.execute("INSERT INTO users (username, password_hash) VALUES (?,?)",
70
- (username, hash_password(password)))
71
- conn.commit()
72
- return "✅ Signup successful! Please login."
73
- except sqlite3.IntegrityError:
74
- return "❌ Username already exists. Try a different one."
75
-
76
- def login(username, password):
77
- cursor.execute("SELECT id, password_hash FROM users WHERE username=?", (username,))
78
- row = cursor.fetchone()
79
- if row and row[1] == hash_password(password):
80
- return f"✅ Login successful! User ID: {row[0]}", row[0]
81
  else:
82
- return " Invalid username or password", None
83
-
84
- # ---------------------------
85
- # Resume Parsing
86
- # ---------------------------
87
- def extract_text_from_pdf(file):
88
- reader = PdfReader(file)
89
- text = ""
90
- for page in reader.pages:
91
- text += page.extract_text() or ""
92
- return text
93
-
94
- def extract_text_from_docx(file):
95
- doc = Document(file)
96
- text = "\n".join([p.text for p in doc.paragraphs])
97
- return text
98
-
99
- def extract_skills(jd_text):
100
- skills = re.split(r"[,\n;]", jd_text)
101
- return [s.strip() for s in skills if s.strip()]
102
-
103
- def split_sections(resume_text):
104
- sections = {"Education":"","Experience":"","Skills":""}
105
- edu = re.search(r'(Education|EDUCATION)(.*?)(Experience|EXPERIENCE|Skills|SKILLS|$)', resume_text, re.DOTALL)
106
- exp = re.search(r'(Experience|EXPERIENCE)(.*?)(Skills|SKILLS|$)', resume_text, re.DOTALL)
107
- skills = re.search(r'(Skills|SKILLS)(.*)', resume_text, re.DOTALL)
108
- if edu: sections["Education"] = edu.group(2).strip()
109
- if exp: sections["Experience"] = exp.group(2).strip()
110
- if skills: sections["Skills"] = skills.group(2).strip()
111
- return sections
112
-
113
- def compute_scores(resume_text, jd_text, required_skills):
114
- present_skills = [kw for kw in required_skills if kw.lower() in resume_text.lower()]
115
- keyword_score = len(present_skills)/max(len(required_skills),1)
116
- res_vec = model.encode(resume_text)
117
- jd_vec = model.encode(jd_text)
118
- semantic_score = cosine_similarity([res_vec],[jd_vec])[0][0]
119
- sections = split_sections(resume_text)
120
- section_scores = {}
121
- for sec, text in sections.items():
122
- sec_present = [kw for kw in required_skills if kw.lower() in text.lower()]
123
- section_scores[sec] = len(sec_present)/max(len(required_skills),1)
124
- final_score = 0.6*keyword_score + 0.4*semantic_score
125
- tips = [f"⚠️ Add '{skill}' to improve ATS match" for skill in required_skills if skill.lower() not in resume_text.lower()]
126
- return final_score, keyword_score, semantic_score, section_scores, tips
127
-
128
- # ---------------------------
129
- # CSV/PDF Export
130
- # ---------------------------
131
- def export_csv(df, filename="ats_report.csv"):
132
- df.to_csv(filename, index=False)
133
- return filename
134
-
135
- def export_pdf(df, filename="ats_report.pdf"):
136
- pdf = FPDF()
137
- pdf.add_page()
138
- pdf.set_font("Arial", size=12)
139
- pdf.cell(200, 10, txt="ATS Resume Screening Report", ln=True, align="C")
140
- pdf.ln(10)
141
- for i, row in df.iterrows():
142
- pdf.cell(200, 10, txt=f"JD {i+1}: {row['JD']}", ln=True)
143
- pdf.cell(200, 10, txt=f"Final Score: {row['Final Score']}", ln=True)
144
- pdf.cell(200, 10, txt=f"Keyword Score: {row['Keyword Score']}", ln=True)
145
- pdf.cell(200, 10, txt=f"Semantic Score: {row['Semantic Score']}", ln=True)
146
- pdf.cell(200, 10, txt="Section Scores:", ln=True)
147
- pdf.multi_cell(0, 10, row["Section Scores"])
148
- pdf.cell(200, 10, txt="Tips:", ln=True)
149
- pdf.multi_cell(0, 10, row["Tips"])
150
- pdf.ln(5)
151
- pdf.output(filename)
152
- return filename
153
-
154
- # ---------------------------
155
- # AI Resume Rewriter
156
- # ---------------------------
157
- def ai_resume_rewriter(resume_text, jd_text):
158
- required_skills = extract_skills(jd_text)
159
- rewritten = resume_text
160
- for skill in required_skills:
161
- if skill.lower() not in resume_text.lower():
162
- rewritten += f"\n- Experience with {skill}"
163
- return rewritten
164
-
165
- # ---------------------------
166
- # Feedback Generator
167
- # ---------------------------
168
- skill_course_mapping = {
169
- "Python": ["Complete 'Python for Everybody' on Coursera", "Try Python projects on GitHub"],
170
- "Machine Learning": ["Take 'Machine Learning' by Andrew Ng on Coursera", "Kaggle ML competitions"],
171
- "Deep Learning": ["DeepLearning.AI TensorFlow Developer Course", "Build neural network projects"],
172
- "SQL": ["SQL for Data Science - Coursera", "Practice on LeetCode SQL problems"],
173
- "AWS": ["AWS Certified Solutions Architect - Associate", "AWS Free Tier practice"],
174
- "TensorFlow": ["TensorFlow in Practice Specialization - Coursera", "Hands-on DL projects"]
175
- }
176
-
177
- certification_mapping = {
178
- "AWS": "AWS Certified Solutions Architect",
179
- "ML": "Machine Learning by Andrew Ng",
180
- "Python": "PCAP: Python Certified Associate Programmer",
181
- "TensorFlow": "TensorFlow Developer Certificate"
182
- }
183
-
184
- def generate_feedback(resume_text, jd_text):
185
- required_skills = extract_skills(jd_text)
186
- resume_lower = resume_text.lower()
187
-
188
- missing_skills = [skill for skill in required_skills if skill.lower() not in resume_lower]
189
- skill_suggestions = []
190
- cert_suggestions = []
191
-
192
- for skill in missing_skills:
193
- if skill in skill_course_mapping:
194
- skill_suggestions.append(f"{skill}: {', '.join(skill_course_mapping[skill])}")
195
- if skill in certification_mapping:
196
- cert_suggestions.append(f"Consider certification: {certification_mapping[skill]}")
197
-
198
- resume_tips = []
199
- if "Education" not in resume_text:
200
- resume_tips.append("Include an Education section if missing.")
201
- if "Experience" not in resume_text:
202
- resume_tips.append("Include an Experience section with quantified achievements.")
203
- if "Skills" not in resume_text:
204
- resume_tips.append("Add a Skills section highlighting relevant skills.")
205
- if len(resume_text.split()) < 200:
206
- resume_tips.append("Consider expanding your resume to at least 1 page (200+ words).")
207
 
208
- feedback = {
209
- "Missing Skills": missing_skills,
210
- "Skill Suggestions": skill_suggestions,
211
- "Certifications": cert_suggestions,
212
- "Resume Tips": resume_tips
213
- }
214
- return feedback
 
1
  import gradio as gr
 
 
 
2
  import pandas as pd
3
  from PyPDF2 import PdfReader
 
4
  from sentence_transformers import SentenceTransformer
 
5
  import spacy
 
 
 
 
6
 
7
+ # Load model
8
+ nlp = spacy.load("en_core_web_sm")
9
+ embedder = SentenceTransformer('all-MiniLM-L6-v2') # lightweight embedding model
10
+
11
+ def extract_text(file):
12
+ if file.name.endswith(".pdf"):
13
+ reader = PdfReader(file.name)
14
+ text = ""
15
+ for page in reader.pages:
16
+ text += page.extract_text()
17
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  else:
19
+ return "Unsupported file type"
20
+
21
+ def analyze_text(text):
22
+ doc = nlp(text)
23
+ sentences = [sent.text for sent in doc.sents]
24
+ embeddings = embedder.encode(sentences)
25
+ return "\n".join(sentences[:5]) # first 5 sentences for demo
26
+
27
+ with gr.Blocks() as demo:
28
+ gr.Markdown("# PDF Text Extractor & Analyzer")
29
+ pdf_input = gr.File(label="Upload PDF")
30
+ output_text = gr.Textbox(label="Extracted Text")
31
+ pdf_input.upload(extract_text, pdf_input, output_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ demo.launch()