mkshari commited on
Commit
a25590b
Β·
verified Β·
1 Parent(s): ed1f07a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -219
app.py CHANGED
@@ -3,257 +3,171 @@ import spacy
3
  import pdfplumber
4
  from docx import Document
5
  from sentence_transformers import SentenceTransformer, util
6
- import pandas as pd
7
  import re
 
8
 
9
- # Load models
10
- print("Loading models...")
11
  try:
12
  nlp = spacy.load("en_core_web_sm")
13
- print("spaCy model loaded successfully.")
14
- except Exception as e:
15
- print(f"spaCy load error: {e}. Trying direct import...")
16
- try:
17
- import en_core_web_sm
18
- nlp = en_core_web_sm.load()
19
- except:
20
- print("Model not found. Using fallback keyword matching only.")
21
- nlp = None
22
 
23
  model = SentenceTransformer('all-MiniLM-L6-v2')
24
- print("Sentence Transformer loaded.")
25
 
26
- # Common Skill Dictionary (Expanded)
27
- SKILLS_DB = [
28
  "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
29
- "git", "machine learning", "nlp", "tensorflow", "pytorch", "java", "c++", "golang",
30
- "postgresql", "mongodb", "redis", "cloud computing", "devops", "rest api", "graphql",
31
- "scikit-learn", "pandas", "numpy", "django", "flask", "typescript", "angular", "vue",
32
- "html", "css", "node.js", "express", "azure", "gcp", "linux", "bash", "jenkins",
33
- "terraform", "ansible", "prompt engineering", "openai", "llm", "bert", "transformer",
34
- "tableau", "powerbi", "excel", "dynamic programming", "data structures", "algorithms"
35
  ]
36
 
37
  ROADMAP_DB = {
38
- # (previous content kept, adding more)
39
- "python": "Master Python: [Real Python](https://realpython.com/) | [Programming with Mosh](https://www.youtube.com/user/programmingwithmosh)",
40
- "react": "Build UI with React: [Official Docs](https://react.dev/) | [FreeCodeCamp React Course](https://www.freecodecamp.org/news/free-react-course-2024/)",
41
- "aws": "Cloud Mastery: [AWS Skill Builder](https://explore.skillbuilder.aws/) | [Cloud Guru](https://www.pluralsight.com/cloud-computing/aws)",
42
- "docker": "Containerization: [Docker Get Started](https://docs.docker.com/get-started/) | [Docker Tutorial for Beginners](https://www.youtube.com/watch?v=pg19Z8LL06w)",
43
- "kubernetes": "Orchestration: [K8s Basics](https://kubernetes.io/docs/tutorials/kubernetes-basics/) | [Nana's K8s Course](https://www.youtube.com/c/TechWorldwithNana)",
44
- "fastapi": "Modern APIs: [FastAPI Docs](https://fastapi.tiangolo.com/) | [TestDriven.io FastAPI](https://testdriven.io/blog/fastapi-crud/)",
45
- "nlp": "Language Processing: [Hugging Face NLP Course](https://huggingface.co/learn/nlp-course/) | [Stanford CS224N](https://web.stanford.edu/class/cs224n/)",
46
- "machine learning": "AI Fundamentals: [ML Specialization by Andrew Ng](https://www.coursera.org/specializations/machine-learning-introduction)",
47
- "sql": "Database Management: [SQLZoo](https://sqlzoo.net/) | [Mode SQL Tutorial](https://mode.com/sql-tutorial/)",
48
- "git": "Version Control: [Git Immersion](https://gitimmersion.com/) | [GitHub Learning Path](https://skills.github.com/)",
49
- "javascript": "JS Deep Dive: [MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript) | [JavaScript.info](https://javascript.info/)",
50
- "typescript": "Strict Typing: [TypeScript Handbook](https://www.typescriptlang.org/docs/handbook/intro.html)",
51
- "postgresql": "Advanced Data: [Postgres Tutorial](https://www.postgresqltutorial.com/)",
52
- "rest api": "API Design: [RESTful API Guide](https://restfulapi.net/)",
53
- "html": "Web Basics: [W3Schools HTML](https://www.w3schools.com/html/)",
54
- "css": "Styling: [CSS-Tricks](https://css-tricks.com/)",
55
- "node.js": "Backend JS: [Node.js Guide](https://nodejs.dev/en/learn/)"
56
  }
57
 
58
- def extract_text_from_pdf(pdf_file):
59
- try:
60
- with pdfplumber.open(pdf_file) as pdf:
61
- text = ""
62
- for page in pdf.pages:
63
- text += page.extract_text() or ""
64
- return text
65
- except Exception as e:
66
- print(f"PDF Extraction Error: {e}")
67
  return ""
68
-
69
- def extract_text_from_docx(docx_file):
 
 
70
  try:
71
- doc = Document(docx_file)
72
- text = ""
73
- for para in doc.paragraphs:
74
- text += para.text + "\n"
75
- return text
 
76
  except Exception as e:
77
- print(f"DOCX Extraction Error: {e}")
78
- return ""
79
-
80
- def clean_text(text):
81
- text = re.sub(r'[^a-zA-Z0-9\s#\.\+]', ' ', text) # Preserve # for C#, . for Node.js, + for C++
82
- return text.lower().strip()
83
-
84
- def get_skills(text):
85
- clean_t = clean_text(text)
86
- found_skills = set()
87
- for skill in SKILLS_DB:
88
- # Improved regex to handle skills with dots or pluses
89
- pattern = r'\b' + re.escape(skill) + r'\b'
90
- if re.search(pattern, clean_t):
91
- found_skills.add(skill)
92
- return found_skills
93
-
94
- def analyze_resume(resume_file, jd_text):
95
- print("Analysis started...")
96
- if resume_file is None or not jd_text.strip():
97
- return "Please upload a resume and provide a job description.", "", "", 0, []
 
 
 
 
 
 
 
 
98
 
99
- # Step 1: Extract text
100
- resume_path = resume_file.name
101
- print(f"Extracting text from: {resume_path}")
102
-
103
- if resume_path.lower().endswith('.pdf'):
104
- resume_text = extract_text_from_pdf(resume_path)
105
- elif resume_path.lower().endswith('.docx'):
106
- resume_text = extract_text_from_docx(resume_path)
107
- else:
108
- return "Unsupported file format. Please upload PDF or DOCX.", "", "", 0, []
109
 
 
 
110
  if not resume_text.strip():
111
- return "Could not extract text from the file. Please check the file content.", "", "", 0, []
112
-
113
- # Step 2: Skill Extraction
114
- resume_skills = get_skills(resume_text)
115
- jd_skills = get_skills(jd_text)
116
-
117
- print(f"Resume Skills: {resume_skills}")
118
- print(f"JD Skills: {jd_skills}")
119
-
120
- present_skills = list(resume_skills.intersection(jd_skills))
121
- missing_skills = list(jd_skills - resume_skills)
122
-
123
- # Step 3: Similarity Score
124
- embeddings1 = model.encode(resume_text, convert_to_tensor=True)
125
- embeddings2 = model.encode(jd_text, convert_to_tensor=True)
126
- cosine_score = util.pytorch_cos_sim(embeddings1, embeddings2)
127
- match_percentage = round(cosine_score.item() * 100, 2)
128
-
129
- # Adjust score if no skills overlap but similarity is high
130
- if not present_skills and match_percentage > 50:
131
- match_percentage -= 20 # Penalize for lack of keyword match
132
-
133
- present_str = ", ".join([s.capitalize() for s in present_skills]) if present_skills else "No matching skills found."
134
- missing_str = ", ".join([s.capitalize() for s in missing_skills]) if missing_skills else "All JD skills found in resume!"
135
-
136
- print(f"Match: {match_percentage}%")
137
- return f"{match_percentage}%", present_str, missing_str, match_percentage, missing_skills
138
-
139
- def get_roadmap(missing_skills):
140
- if not missing_skills:
141
- return "### πŸŽ‰ Perfect Match!\nYou already possess all the key skills mentioned in the job description. Tip: ensure you've highlighted these clearly in your experience sections."
142
 
143
- roadmap_items = []
144
- for skill in missing_skills:
145
- resource = ROADMAP_DB.get(skill.lower(), f"Suggested resource for {skill}: Check out specialized courses on Coursera, Udemy, or YouTube.")
146
- roadmap_items.append(f"#### πŸ“– {skill.capitalize()}\n{resource}")
147
-
148
- return "\n\n".join(roadmap_items)
149
-
150
- # Custom CSS for Premium Look
151
- custom_css = """
152
- #logo-img {
153
- margin: auto;
154
- display: block;
155
- max-width: 150px;
156
- }
157
- .gradio-container {
158
- background-color: #f0f2f5;
159
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
160
- }
161
- .main-header {
162
- text-align: center;
163
- color: #003366;
164
- margin-bottom: 5px;
165
- }
166
- .sub-header {
167
- text-align: center;
168
- color: #b8860b;
169
- margin-top: 0;
170
- font-style: italic;
171
- }
172
- .sastra-text {
173
- text-align: center;
174
- font-size: 1em;
175
- color: #444;
176
- font-weight: bold;
177
- }
178
- #analyze-btn {
179
- background: linear-gradient(135deg, #003366 0%, #00509d 100%) !important;
180
- border: none !important;
181
- color: white !important;
182
- height: 50px;
183
- font-size: 1.1em;
184
- }
185
- #roadmap-btn {
186
- background: linear-gradient(135deg, #b8860b 0%, #daa520 100%) !important;
187
- border: none !important;
188
- color: white !important;
189
- height: 45px;
190
- }
191
- .output-label {
192
- text-align: center;
193
- font-size: 2em;
194
- }
195
- """
196
-
197
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as demo:
198
  with gr.Row():
199
  with gr.Column(scale=1):
200
- try:
201
- gr.Image("logo.png", show_label=False, height=130, container=False, elem_id="logo-img")
202
- except:
203
- gr.Markdown("### [LOGO MISSING]")
204
- with gr.Column(scale=3):
205
- gr.Markdown("# SETHU AI", elem_classes=["main-header"])
206
- gr.Markdown("### From Resume to Career Readiness", elem_classes=["sub-header"])
207
- gr.Markdown("SASTRA DEEMED UNIVERSITY", elem_classes=["sastra-text"])
208
-
209
  gr.Markdown("---")
210
-
211
  with gr.Row():
212
  with gr.Column(scale=1):
213
- gr.Markdown("### πŸ“₯ Step 1: Upload & Paste")
214
- resume_input = gr.File(label="Upload Resume (PDF/DOCX)", file_types=[".pdf", ".docx"], type="filepath")
215
- jd_input = gr.Textbox(label="Job Description", placeholder="Paste the job requirements here...", lines=10)
216
- analyze_btn = gr.Button("Analyze Match", variant="primary", elem_id="analyze-btn")
217
 
218
  with gr.Column(scale=1):
219
- gr.Markdown("### πŸ“Š Step 2: Evaluation")
220
- match_score_output = gr.Label(label="Match Quality", elem_classes=["output-label"])
221
-
222
- with gr.Tabs():
223
- with gr.TabItem("βœ… Skills Matched"):
224
- present_skills_output = gr.Textbox(label="", interactive=False, lines=4)
225
- with gr.TabItem("❌ Missing Skills"):
226
- missing_skills_output = gr.Textbox(label="", interactive=False, lines=4)
227
-
228
  gr.Markdown("---")
229
- roadmap_btn = gr.Button("πŸš€ Generate Learning Roadmap", interactive=True, elem_id="roadmap-btn")
 
230
 
231
- roadmap_output = gr.Markdown(visible=False)
232
- missing_skills_state = gr.State([])
233
 
234
- def on_analyze(resume, jd):
235
- score_str, present, missing, score_val, missing_list = analyze_resume(resume, jd)
236
- return {
237
- match_score_output: score_str,
238
- present_skills_output: present,
239
- missing_skills_output: missing,
240
- missing_skills_state: missing_list,
241
- roadmap_output: gr.update(visible=False)
242
- }
243
-
244
- def on_roadmap(missing_list):
245
- content = get_roadmap(missing_list)
246
- return gr.update(value=content, visible=True)
247
-
248
- analyze_btn.click(
249
- on_analyze,
250
- inputs=[resume_input, jd_input],
251
- outputs=[match_score_output, present_skills_output, missing_skills_output, missing_skills_state, roadmap_output]
252
  )
253
-
254
  roadmap_btn.click(
255
- on_roadmap,
256
- inputs=[missing_skills_state],
257
  outputs=[roadmap_output]
258
  )
259
 
 
3
  import pdfplumber
4
  from docx import Document
5
  from sentence_transformers import SentenceTransformer, util
 
6
  import re
7
+ import plotly.graph_objects as go
8
 
9
+ # Initialize Models once at startup
10
+ print("πŸš€ Initializing SETHU AI Engine...")
11
  try:
12
  nlp = spacy.load("en_core_web_sm")
13
+ except:
14
+ import os
15
+ os.system("python -m spacy download en_core_web_sm")
16
+ nlp = spacy.load("en_core_web_sm")
 
 
 
 
 
17
 
18
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
19
 
20
+ TECH_SKILLS = [
 
21
  "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
22
+ "git", "ml", "nlp", "tensorflow", "pytorch", "java", "golang", "postgresql",
23
+ "mongodb", "redis", "devops", "rest api", "graphql", "scikit-learn", "pandas",
24
+ "numpy", "django", "flask", "typescript", "angular", "vue", "html", "css",
25
+ "node.js", "express", "azure", "gcp", "linux", "terraform", "ansible", "jenkins",
26
+ "prompt engineering", "openai", "llm", "bert", "tableau", "powerbi", "excel",
27
+ "c#", "c++", "rust", "r", "spark", "hadoop", "kafka", "mysql", "oracle", "snowflake"
28
  ]
29
 
30
  ROADMAP_DB = {
31
+ "python": "🐍 [Master Python](https://realpython.com/) - Focus on Backend Automation & Data Science.",
32
+ "react": "βš›οΈ [React.dev](https://react.dev/) - Master Hooks & State Management.",
33
+ "aws": "☁️ [AWS Builder](https://explore.skillbuilder.aws/) - Get Certified (Solutions Architect).",
34
+ "docker": "🐳 [Docker Guide](https://docs.docker.com/) - Learn Container Architecture.",
35
+ "kubernetes": "☸️ [K8s Certification](https://kubernetes.io/docs/tutorials/) - Master Orchestration.",
36
+ "ml": "πŸ€– [ML Specialization](https://www.coursera.org/specializations/machine-learning-introduction) - Focus on Scikit-Learn.",
37
+ "nlp": "✍️ [HF NLP Course](https://huggingface.co/learn/nlp-course/) - Master Transformers.",
38
+ "sql": "πŸ’Ύ [SQL Practice](https://sqlzoo.net/) - Master Joins & Query Optmization.",
39
+ "javascript": "πŸ“œ [JS.info](https://javascript.info/) - Master ES6+ Features.",
40
+ "devops": "βš™οΈ [Roadmap.sh/devops](https://roadmap.sh/devops/) - Learn CI/CD & Infrastructure as Code."
 
 
 
 
 
 
 
 
41
  }
42
 
43
+ def extract_text(file_obj):
44
+ """Robust text extraction for PDF and DOCX."""
45
+ if file_obj is None:
 
 
 
 
 
 
46
  return ""
47
+
48
+ # Gradio might pass a file-like object or a string path
49
+ file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
50
+
51
  try:
52
+ if file_path.lower().endswith('.pdf'):
53
+ with pdfplumber.open(file_path) as pdf:
54
+ return "".join([page.extract_text() or "" for page in pdf.pages])
55
+ elif file_path.lower().endswith('.docx'):
56
+ doc = Document(file_path)
57
+ return "\n".join([p.text for p in doc.paragraphs])
58
  except Exception as e:
59
+ print(f"Extraction error on {file_path}: {e}")
60
+ return ""
61
+
62
+ def discover_skills(text):
63
+ if not text: return set()
64
+ found = set()
65
+ text_l = text.lower()
66
+ for skill in TECH_SKILLS:
67
+ if re.search(r'\b' + re.escape(skill) + r'\b', text_l):
68
+ found.add(skill)
69
+ return found
70
+
71
+ def create_gauge(score):
72
+ fig = go.Figure(go.Indicator(
73
+ mode = "gauge+number",
74
+ value = score,
75
+ domain = {'x': [0, 1], 'y': [0, 1]},
76
+ gauge = {
77
+ 'axis': {'range': [0, 100]},
78
+ 'bar': {'color': "#003366"},
79
+ 'steps': [
80
+ {'range': [0, 50], 'color': "#ffcccc"},
81
+ {'range': [50, 80], 'color': "#fff3cd"},
82
+ {'range': [80, 100], 'color': "#d4edda"}
83
+ ],
84
+ }
85
+ ))
86
+ fig.update_layout(height=250, margin=dict(l=30, r=30, t=30, b=30), paper_bgcolor="rgba(0,0,0,0)")
87
+ return fig
88
 
89
+ def main_process(resume_file, jd_text):
90
+ print("--- New Analysis Request ---")
91
+ if not resume_file or not jd_text.strip():
92
+ return "⚠️ Error: Please upload a resume and paste the JD.", "", None, [], gr.update(visible=False)
 
 
 
 
 
 
93
 
94
+ # 1. Extraction
95
+ resume_text = extract_text(resume_file)
96
  if not resume_text.strip():
97
+ return "⚠️ Error: Failed to extract text from resume. Ensure it's not and image-only PDF.", "", None, [], gr.update(visible=False)
98
+
99
+ # 2. Skill Matching
100
+ r_skills = discover_skills(resume_text)
101
+ j_skills = discover_skills(jd_text)
102
+ match_skills = sorted(list(r_skills.intersection(j_skills)))
103
+ gap_skills = sorted(list(j_skills - r_skills))
104
+
105
+ # 3. AI scoring
106
+ emb1 = model.encode(resume_text, convert_to_tensor=True)
107
+ emb2 = model.encode(jd_text, convert_to_tensor=True)
108
+ score = round(util.pytorch_cos_sim(emb1, emb2).item() * 100, 1)
109
+
110
+ # 4. Results Formatting
111
+ present_str = ", ".join([s.upper() for s in match_skills]) if match_skills else "No direct skill matches found."
112
+ gap_str = ", ".join([s.upper() for s in gap_skills]) if gap_skills else "No major skill gaps detected!"
113
+ plot = create_gauge(score)
114
+
115
+ print(f"Analysis Complete. Score: {score}")
116
+ return present_str, gap_str, plot, gap_skills, gr.update(visible=True)
117
+
118
+ def generate_roadmap(gap_skills):
119
+ if not gap_skills:
120
+ return "### 🌟 Career Ready!\nYour profile is an excellent match for this role. Focus on practicing behavioral interview questions."
 
 
 
 
 
 
 
121
 
122
+ roadmap = "### πŸ›€οΈ Personalized Readiness Roadmap\n\n"
123
+ for s in gap_skills:
124
+ res = ROADMAP_DB.get(s.lower(), f"Learn **{s.upper()}** through hands-on projects and documentation.")
125
+ roadmap += f"- **{s.upper()}**: {res}\n"
126
+ return roadmap
127
+
128
+ # UI Layout
129
+ with gr.Blocks(theme=gr.themes.Soft(), title="SETHU AI") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  with gr.Row():
131
  with gr.Column(scale=1):
132
+ gr.Image("logo.png", show_label=False, height=120, container=False)
133
+ with gr.Column(scale=4):
134
+ gr.Markdown("# SETHU AI - Career Intelligence Hub")
135
+ gr.Markdown("### From Resume to Career Readiness | Powered by SASTRA DEEMED UNIVERSITY")
136
+
 
 
 
 
137
  gr.Markdown("---")
138
+
139
  with gr.Row():
140
  with gr.Column(scale=1):
141
+ gr.Markdown("### πŸ“₯ 1. Upload Requirements")
142
+ resume_input = gr.File(label="Upload Resume (PDF/DOCX)")
143
+ jd_input = gr.Textbox(label="Job Description", lines=12, placeholder="Paste the job requirements here...")
144
+ run_btn = gr.Button("πŸ” Run AI Analysis", variant="primary")
145
 
146
  with gr.Column(scale=1):
147
+ gr.Markdown("### πŸ“Š 2. Match Intelligence")
148
+ gauge_plot = gr.Plot()
149
+ match_display = gr.Textbox(label="Identified Matching Skills", interactive=False)
150
+ gap_display = gr.Textbox(label="Identified Skill Gaps", interactive=False)
151
+
152
+ with gr.Row(visible=False) as roadmap_container:
153
+ with gr.Column():
 
 
154
  gr.Markdown("---")
155
+ roadmap_btn = gr.Button("πŸš€ Generate Knowledge Upgrade Roadmap", variant="secondary")
156
+ roadmap_output = gr.Markdown()
157
 
158
+ # Shared State
159
+ gap_state = gr.State([])
160
 
161
+ # Event Mapping
162
+ run_btn.click(
163
+ fn=main_process,
164
+ inputs=[resume_input, jd_input],
165
+ outputs=[match_display, gap_display, gauge_plot, gap_state, roadmap_container]
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  )
167
+
168
  roadmap_btn.click(
169
+ fn=generate_roadmap,
170
+ inputs=[gap_state],
171
  outputs=[roadmap_output]
172
  )
173