ZainabFatimaa commited on
Commit
d1a1c63
Β·
verified Β·
1 Parent(s): 47f0895

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +343 -330
src/app.py CHANGED
@@ -58,8 +58,6 @@ from reportlab.lib.pagesizes import letter
58
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
59
  from reportlab.lib.styles import getSampleStyleSheet
60
  from reportlab.lib.units import inch
61
- from reportlab.graphics.charts.barcharts import VerticalBarChart
62
- from reportlab.graphics.shapes import Drawing
63
 
64
  # Claude Chatbot Class
65
  class ClaudeChatbot:
@@ -80,7 +78,7 @@ class ClaudeChatbot:
80
  headers = {
81
  "Authorization": f"Bearer {self.api_key}",
82
  "Content-Type": "application/json",
83
- "HTTP-Referer": "https://your-app-url.com", # Replace with your actual URL
84
  "X-Title": "AI Resume Analyzer"
85
  }
86
 
@@ -125,7 +123,7 @@ def download_nltk_data():
125
  nltk.download('punkt', quiet=True)
126
  nltk.download('stopwords', quiet=True)
127
  nltk.download('wordnet', quiet=True)
128
- nltk.download('punkt_tab', quiet=True) # For newer NLTK versions
129
 
130
  # Initialize tools with better error handling
131
  @st.cache_resource
@@ -139,7 +137,6 @@ def init_tools():
139
  st.success("βœ… spaCy model loaded successfully")
140
  except OSError:
141
  try:
142
- # Try to download the model automatically
143
  import subprocess
144
  import sys
145
  with st.spinner("Downloading spaCy model..."):
@@ -163,17 +160,14 @@ def init_tools():
163
 
164
  return nlp, grammar_tool
165
 
166
- # Fallback functions for when dependencies are missing
167
  def simple_fuzzy_match(keyword, text):
168
  """Simple fuzzy matching fallback when fuzzywuzzy is not available"""
169
  keyword_lower = keyword.lower()
170
  text_lower = text.lower()
171
 
172
- # Exact match
173
  if keyword_lower in text_lower:
174
  return 100
175
 
176
- # Check for partial matches with some tolerance
177
  keyword_words = keyword_lower.split()
178
  matches = sum(1 for word in keyword_words if word in text_lower)
179
  return (matches / len(keyword_words)) * 100 if keyword_words else 0
@@ -182,15 +176,12 @@ def basic_grammar_check(text):
182
  """Basic grammar check when language_tool_python is not available"""
183
  issues = []
184
 
185
- # Check for common issues
186
  sentences = sent_tokenize(text)
187
 
188
  for i, sentence in enumerate(sentences):
189
- # Check for sentences that are too long
190
  if len(sentence.split()) > 30:
191
  issues.append(f"Sentence {i+1} might be too long ({len(sentence.split())} words)")
192
 
193
- # Check for repeated words
194
  words = sentence.lower().split()
195
  for j in range(len(words) - 1):
196
  if words[j] == words[j + 1] and len(words[j]) > 3:
@@ -206,12 +197,11 @@ class ResumeAnalyzer:
206
  try:
207
  self.stop_words = set(stopwords.words('english'))
208
  except LookupError:
209
- # Fallback stop words
210
  self.stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should', 'could', 'can', 'may', 'might', 'must'}
211
 
212
  self.lemmatizer = WordNetLemmatizer()
213
 
214
- # Expanded job role keywords dictionary (15 total roles)
215
  self.job_keywords = {
216
  "Data Scientist": ["python", "machine learning", "statistics", "pandas", "numpy", "scikit-learn",
217
  "tensorflow", "pytorch", "sql", "data analysis", "visualization", "jupyter", "r", "statistics", "deep learning"],
@@ -233,16 +223,6 @@ class ResumeAnalyzer:
233
  "business process", "gap analysis", "user stories", "workflow", "project management"],
234
  "Full Stack Developer": ["html", "css", "javascript", "react", "angular", "vue", "node.js", "express",
235
  "mongodb", "postgresql", "rest api", "graphql", "version control", "responsive design"],
236
- "Machine Learning Engineer": ["tensorflow", "pytorch", "keras", "scikit-learn", "mlops", "model deployment",
237
- "feature engineering", "model optimization", "docker", "kubernetes", "python", "deep learning"],
238
- "Cloud Architect": ["aws", "azure", "gcp", "cloud migration", "serverless", "microservices", "containerization",
239
- "infrastructure as code", "cost optimization", "scalability", "security"],
240
- "Sales Manager": ["sales", "crm", "lead generation", "client relationship", "negotiation", "revenue growth",
241
- "pipeline management", "forecasting", "team leadership", "quota attainment"],
242
- "Project Manager": ["project management", "pmp", "agile", "scrum", "kanban", "risk management",
243
- "stakeholder communication", "budget management", "timeline", "resource allocation"],
244
- "Quality Assurance Engineer": ["testing", "automation", "selenium", "junit", "test cases", "bug tracking",
245
- "regression testing", "performance testing", "api testing", "quality standards"]
246
  }
247
 
248
  # Common skills database
@@ -265,14 +245,12 @@ class ResumeAnalyzer:
265
  def extract_text_from_pdf(self, file):
266
  """Extract text from PDF file"""
267
  try:
268
- # Try pdfplumber first
269
  with pdfplumber.open(file) as pdf:
270
  text = ""
271
  for page in pdf.pages:
272
  text += page.extract_text() or ""
273
  return text
274
  except:
275
- # Fallback to PyPDF2
276
  try:
277
  pdf_reader = PyPDF2.PdfReader(file)
278
  text = ""
@@ -302,23 +280,18 @@ class ResumeAnalyzer:
302
 
303
  def preprocess_text(self, text):
304
  """Clean and preprocess text"""
305
- # Remove special characters and digits
306
  text = re.sub(r'[^a-zA-Z\s]', '', text)
307
- # Convert to lowercase
308
  text = text.lower()
309
- # Tokenize
310
  try:
311
  tokens = word_tokenize(text)
312
  except LookupError:
313
- # Fallback tokenization
314
  tokens = text.split()
315
 
316
- # Remove stopwords and lemmatize
317
  try:
318
  tokens = [self.lemmatizer.lemmatize(token) for token in tokens
319
  if token not in self.stop_words and len(token) > 2]
320
  except LookupError:
321
- # Fallback without lemmatization
322
  tokens = [token for token in tokens
323
  if token not in self.stop_words and len(token) > 2]
324
 
@@ -328,7 +301,6 @@ class ResumeAnalyzer:
328
  """Extract different sections from resume"""
329
  sections = {}
330
 
331
- # Define section patterns
332
  section_patterns = {
333
  'education': r'(education|academic|qualification|degree|university|college)',
334
  'experience': r'(experience|employment|work|career|professional|job|position)',
@@ -351,7 +323,6 @@ class ResumeAnalyzer:
351
  continue
352
 
353
  if capturing:
354
- # Stop if we hit another section
355
  if any(re.search(p, line.lower()) for p in section_patterns.values() if p != pattern):
356
  break
357
  if line.strip():
@@ -373,7 +344,6 @@ class ResumeAnalyzer:
373
  found_technical.append(skill)
374
 
375
  for skill in self.soft_skills:
376
- # Use more flexible matching for soft skills
377
  skill_words = skill.lower().split()
378
  if all(word in text_lower for word in skill_words):
379
  found_soft.append(skill)
@@ -391,11 +361,9 @@ class ResumeAnalyzer:
391
  found_keywords = []
392
  for keyword in keywords:
393
  if FUZZYWUZZY_AVAILABLE:
394
- # Use fuzzy matching
395
  if fuzz.partial_ratio(keyword, text_lower) > 80:
396
  found_keywords.append(keyword)
397
  else:
398
- # Use simple matching
399
  if simple_fuzzy_match(keyword, text_lower) > 80:
400
  found_keywords.append(keyword)
401
 
@@ -406,7 +374,7 @@ class ResumeAnalyzer:
406
  """Check grammar and language quality"""
407
  if self.grammar_tool and GRAMMAR_TOOL_AVAILABLE:
408
  try:
409
- matches = self.grammar_tool.check(text[:5000]) # Limit text length
410
  return matches
411
  except:
412
  return basic_grammar_check(text)
@@ -455,12 +423,10 @@ class ResumeAnalyzer:
455
  experience = sections.get('experience', '')
456
  skills = sections.get('skills', '')
457
 
458
- # Extract key information
459
  degree_match = re.search(r'(bachelor|master|phd|degree|engineering|science|business)',
460
  education.lower())
461
  experience_years = len(re.findall(r'\b\d{4}\b', experience))
462
 
463
- # Create summary template
464
  summary_parts = []
465
 
466
  if degree_match:
@@ -472,7 +438,6 @@ class ResumeAnalyzer:
472
  if experience_years > 0:
473
  summary_parts.append(f"with {experience_years}+ years of experience")
474
 
475
- # Add skills context
476
  tech_skills, soft_skills = self.extract_skills(text)
477
  if tech_skills:
478
  main_skills = ', '.join(tech_skills[:3])
@@ -603,7 +568,11 @@ def main():
603
  st.error("❌ Claude API Not Available")
604
 
605
  # Initialize analyzer
606
- analyzer = ResumeAnalyzer()
 
 
 
 
607
 
608
  # Sidebar for job role selection
609
  st.sidebar.header("Analysis Settings")
@@ -629,321 +598,365 @@ def main():
629
  file_type = uploaded_file.type
630
 
631
  with st.spinner("Extracting text from resume..."):
632
- if file_type == "application/pdf":
633
- text = analyzer.extract_text_from_pdf(uploaded_file)
634
- elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
635
- text = analyzer.extract_text_from_docx(uploaded_file)
636
- else: # txt
637
- text = analyzer.extract_text_from_txt(uploaded_file)
638
-
639
- if "Error" not in text:
 
 
 
 
640
  # Process the resume
641
  st.success("βœ… Resume uploaded and processed successfully!")
642
 
643
  # Store resume context for chatbot
644
  st.session_state.resume_context = text
645
 
646
- # Extract data for analysis
647
- sections = analyzer.extract_sections(text)
648
- tech_skills, soft_skills = analyzer.extract_skills(text)
649
- found_keywords, match_percentage = analyzer.keyword_matching(text, selected_role)
650
- ats_score = analyzer.calculate_ats_score(text, sections)
651
-
652
- # Create tabs for different analyses
653
- tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
654
- "πŸ“Š Overview", "🎯 Skills Analysis", "πŸ“ Section Breakdown",
655
- "πŸ” ATS Analysis", "πŸ“‹ Report & Suggestions", "πŸ€– AI Assistant"
656
- ])
657
-
658
- with tab1:
659
- st.header("Resume Overview")
660
-
661
- col1, col2 = st.columns(2)
662
-
663
- with col1:
664
- # Basic stats
665
- word_count = len(text.split())
666
- char_count = len(text)
667
 
668
- st.metric("Word Count", word_count)
669
- st.metric("Character Count", char_count)
670
- st.metric("Sections Found", len([s for s in sections.values() if s]))
671
-
672
- with col2:
673
- # Generate persona summary
674
- persona_summary = analyzer.generate_persona_summary(text, sections)
675
- st.subheader("🎭 AI Persona Summary")
676
- st.info(persona_summary)
677
-
678
- # Word cloud
679
- st.subheader("☁️ Word Cloud")
680
- preprocessed_tokens = analyzer.preprocess_text(text)
681
- if preprocessed_tokens:
682
- wordcloud_text = ' '.join(preprocessed_tokens)
683
- try:
684
- wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)
685
 
686
- fig, ax = plt.subplots(figsize=(12, 6))
687
- ax.imshow(wordcloud, interpolation='bilinear')
688
- ax.axis('off')
689
- st.pyplot(fig)
690
- except Exception as e:
691
- st.warning("Could not generate word cloud. Showing top words instead.")
692
- word_freq = Counter(preprocessed_tokens)
693
- top_words = word_freq.most_common(20)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
- words_df = pd.DataFrame(top_words, columns=['Word', 'Frequency'])
696
- fig = px.bar(words_df, x='Word', y='Frequency', title='Top 20 Words')
697
- st.plotly_chart(fig)
698
-
699
- with tab2:
700
- st.header("Skills Analysis")
701
-
702
- col1, col2 = st.columns(2)
703
 
704
- with col1:
705
- st.subheader("πŸ”§ Technical Skills")
706
- if tech_skills:
707
- # Create a nice display for skills
708
- skills_text = " β€’ ".join(tech_skills)
709
- st.success(f"Found {len(tech_skills)} technical skills:")
710
- st.write(skills_text)
711
-
712
- # Skills distribution chart
713
- if len(tech_skills) > 5:
714
- skills_df = pd.DataFrame({
715
- 'Skill': tech_skills[:10],
716
- 'Count': [1] * len(tech_skills[:10])
717
- })
718
- fig = px.pie(skills_df, values='Count', names='Skill',
719
- title='Technical Skills Distribution')
720
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
721
  else:
722
- # Simple bar chart for fewer skills
723
- skills_df = pd.DataFrame({
724
- 'Skill': tech_skills,
725
- 'Count': [1] * len(tech_skills)
726
- })
727
- fig = px.bar(skills_df, x='Skill', y='Count',
728
- title='Technical Skills Found')
729
- fig.update_xaxis(tickangle=45)
730
- st.plotly_chart(fig, use_container_width=True)
731
- else:
732
- st.warning("No technical skills detected")
733
- st.info("πŸ’‘ Consider adding technical skills relevant to your field")
734
 
735
- with col2:
736
- st.subheader("🀝 Soft Skills")
737
- if soft_skills:
738
- skills_text = " β€’ ".join(soft_skills)
739
- st.success(f"Found {len(soft_skills)} soft skills:")
740
- st.write(skills_text)
 
 
741
 
742
- # Soft skills chart
743
- if len(soft_skills) > 3:
744
- soft_df = pd.DataFrame({
745
- 'Skill': soft_skills[:8],
746
- 'Count': [1] * len(soft_skills[:8])
747
- })
748
- fig = px.bar(soft_df, x='Skill', y='Count',
749
- title='Soft Skills Found',
750
- color='Skill')
751
- fig.update_xaxis(tickangle=45)
752
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
753
  else:
754
- # Display as simple list for fewer skills
755
- for skill in soft_skills:
756
- st.write(f"βœ… {skill}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
  else:
758
- st.warning("No soft skills detected")
759
- st.info("πŸ’‘ Consider highlighting leadership, communication, and teamwork skills")
760
-
761
- # Skills comparison section
762
- st.subheader("πŸ“Š Skills Overview")
763
-
764
- # Create metrics row
765
- col1, col2, col3, col4 = st.columns(4)
766
-
767
- with col1:
768
- st.metric("Technical Skills", len(tech_skills))
769
-
770
- with col2:
771
- st.metric("Soft Skills", len(soft_skills))
772
-
773
- with col3:
774
- total_skills = len(tech_skills) + len(soft_skills)
775
- st.metric("Total Skills", total_skills)
776
 
777
- with col4:
778
- # Calculate skills balance
779
- if total_skills > 0:
780
- tech_ratio = len(tech_skills) / total_skills * 100
781
- st.metric("Tech/Soft Ratio", f"{tech_ratio:.0f}%/{100-tech_ratio:.0f}%")
 
 
 
 
 
 
782
  else:
783
- st.metric("Tech/Soft Ratio", "0%/0%")
784
-
785
- # Skills recommendations
786
- st.subheader("πŸ’‘ Skills Recommendations")
787
-
788
- recommendations = []
789
-
790
- # Technical skills recommendations
791
- if len(tech_skills) < 5:
792
- recommendations.append("πŸ“ˆ Add more technical skills relevant to your field")
793
-
794
- # Soft skills recommendations
795
- if len(soft_skills) < 3:
796
- recommendations.append("🀝 Highlight more soft skills like leadership and communication")
797
-
798
- # Balance recommendations
799
- if len(tech_skills) > 0 and len(soft_skills) == 0:
800
- recommendations.append("βš–οΈ Balance technical skills with soft skills")
801
- elif len(soft_skills) > 0 and len(tech_skills) == 0:
802
- recommendations.append("βš–οΈ Add technical skills to complement your soft skills")
803
-
804
- if recommendations:
 
805
  for rec in recommendations:
806
- st.info(rec)
807
- else:
808
- st.success("βœ… Good balance of technical and soft skills!")
809
-
810
- # Role-specific keyword analysis
811
- st.subheader(f"🎯 {selected_role} Keywords Analysis")
812
-
813
- col1, col2 = st.columns(2)
814
-
815
- with col1:
816
- # Match percentage visualization
817
- fig = go.Figure(go.Indicator(
818
- mode = "gauge+number",
819
- value = match_percentage,
820
- domain = {'x': [0, 1], 'y': [0, 1]},
821
- title = {'text': f"{selected_role} Match"},
822
- gauge = {
823
- 'axis': {'range': [None, 100]},
824
- 'bar': {'color': "darkgreen"},
825
- 'steps': [
826
- {'range': [0, 40], 'color': "lightcoral"},
827
- {'range': [40, 70], 'color': "yellow"},
828
- {'range': [70, 100], 'color': "lightgreen"}
829
- ],
830
- 'threshold': {
831
- 'line': {'color': "red", 'width': 4},
832
- 'thickness': 0.75,
833
- 'value': 80
834
- }
835
- }
836
- ))
837
- fig.update_layout(height=300)
838
- st.plotly_chart(fig, use_container_width=True)
839
-
840
- with col2:
841
- st.metric("Keywords Found", len(found_keywords))
842
- st.metric("Match Percentage", f"{match_percentage:.1f}%")
843
 
844
- # Match level indicator
845
- if match_percentage >= 80:
846
- st.success("πŸŽ‰ Excellent match!")
847
- elif match_percentage >= 60:
848
- st.warning("πŸ‘ Good match")
849
- elif match_percentage >= 40:
850
- st.warning("⚠️ Fair match")
851
- else:
852
- st.error("❌ Poor match")
853
-
854
- # Keywords found section
855
- if found_keywords:
856
- st.subheader("βœ… Keywords Found")
 
 
 
 
 
 
 
857
 
858
- # Display found keywords in a nice format
859
- keyword_cols = st.columns(3)
860
- for i, keyword in enumerate(found_keywords):
861
- with keyword_cols[i % 3]:
862
- st.success(f"βœ“ {keyword}")
863
- else:
864
- st.warning("❌ No role-specific keywords found")
865
-
866
- # Missing keywords section
867
- all_keywords = analyzer.job_keywords[selected_role]
868
- missing_keywords = [kw for kw in all_keywords if kw not in found_keywords]
869
-
870
- if missing_keywords:
871
- st.subheader("πŸ” Suggested Keywords to Add")
872
- st.info(f"Consider adding these {selected_role}-specific keywords to improve your match score:")
873
 
874
- # Show missing keywords in expandable sections
875
- with st.expander(f"View all {len(missing_keywords)} missing keywords", expanded=len(missing_keywords) <= 10):
876
- missing_cols = st.columns(3)
877
- for i, keyword in enumerate(missing_keywords):
878
- with missing_cols[i % 3]:
879
- st.write(f"πŸ“ {keyword}")
880
-
881
- # Skills gap analysis
882
- st.subheader("πŸ“‹ Skills Gap Analysis")
883
-
884
- # Calculate skills coverage for the role
885
- role_technical_skills = [skill for skill in analyzer.technical_skills
886
- if skill in analyzer.job_keywords[selected_role]]
887
-
888
- found_role_skills = [skill for skill in tech_skills if skill in role_technical_skills]
889
- missing_role_skills = [skill for skill in role_technical_skills if skill not in tech_skills]
890
-
891
- if role_technical_skills:
892
- coverage_percentage = (len(found_role_skills) / len(role_technical_skills)) * 100
893
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
894
  col1, col2 = st.columns(2)
895
 
896
  with col1:
897
- st.metric("Role Skills Coverage", f"{coverage_percentage:.1f}%")
898
-
899
- if coverage_percentage >= 80:
900
- st.success("🎯 Excellent coverage of role-specific skills!")
901
- elif coverage_percentage >= 60:
902
- st.warning("πŸ‘ Good coverage, consider adding a few more")
903
- else:
904
- st.error("⚠️ Low coverage, focus on adding role-specific skills")
905
 
906
  with col2:
907
- if missing_role_skills:
908
- st.write("**Priority skills to add:**")
909
- for skill in missing_role_skills[:5]:
910
- st.write(f"🎯 {skill}")
911
- else:
912
- st.success("βœ… All key role skills covered!")
913
-
914
- # Skills trend analysis (if we had historical data)
915
- st.subheader("πŸ“ˆ Skills Insights")
916
-
917
- insights = []
918
-
919
- # Programming languages analysis
920
- programming_langs = ['python', 'java', 'javascript', 'c++', 'c#', 'php', 'ruby', 'go']
921
- found_langs = [lang for lang in programming_langs if lang in [s.lower() for s in tech_skills]]
922
-
923
- if len(found_langs) >= 3:
924
- insights.append(f"πŸ’» Strong programming portfolio with {len(found_langs)} languages")
925
- elif len(found_langs) >= 1:
926
- insights.append(f"πŸ’» Programming experience in {', '.join(found_langs)}")
927
-
928
- # Cloud skills analysis
929
- cloud_skills = ['aws', 'azure', 'gcp', 'docker', 'kubernetes']
930
- found_cloud = [skill for skill in cloud_skills if skill in [s.lower() for s in tech_skills]]
931
-
932
- if found_cloud:
933
- insights.append(f"☁️ Cloud-ready with {', '.join(found_cloud)} experience")
934
-
935
- # Data skills analysis
936
- data_skills = ['sql', 'python', 'tableau', 'power bi', 'excel', 'pandas', 'numpy']
937
- found_data = [skill for skill in data_skills if skill in [s.lower() for s in tech_skills]]
938
-
939
- if len(found_data) >= 3:
940
- insights.append(f"πŸ“Š Strong data analysis capabilities")
941
-
942
- if insights:
943
- for insight in insights:
944
- st.info(insight)
945
- else:
946
- st.info("πŸ’‘ Add more technical skills to unlock insights about your profile")
947
 
948
  if __name__ == "__main__":
949
- main()
 
 
58
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
59
  from reportlab.lib.styles import getSampleStyleSheet
60
  from reportlab.lib.units import inch
 
 
61
 
62
  # Claude Chatbot Class
63
  class ClaudeChatbot:
 
78
  headers = {
79
  "Authorization": f"Bearer {self.api_key}",
80
  "Content-Type": "application/json",
81
+ "HTTP-Referer": "https://your-app-url.com",
82
  "X-Title": "AI Resume Analyzer"
83
  }
84
 
 
123
  nltk.download('punkt', quiet=True)
124
  nltk.download('stopwords', quiet=True)
125
  nltk.download('wordnet', quiet=True)
126
+ nltk.download('punkt_tab', quiet=True)
127
 
128
  # Initialize tools with better error handling
129
  @st.cache_resource
 
137
  st.success("βœ… spaCy model loaded successfully")
138
  except OSError:
139
  try:
 
140
  import subprocess
141
  import sys
142
  with st.spinner("Downloading spaCy model..."):
 
160
 
161
  return nlp, grammar_tool
162
 
 
163
  def simple_fuzzy_match(keyword, text):
164
  """Simple fuzzy matching fallback when fuzzywuzzy is not available"""
165
  keyword_lower = keyword.lower()
166
  text_lower = text.lower()
167
 
 
168
  if keyword_lower in text_lower:
169
  return 100
170
 
 
171
  keyword_words = keyword_lower.split()
172
  matches = sum(1 for word in keyword_words if word in text_lower)
173
  return (matches / len(keyword_words)) * 100 if keyword_words else 0
 
176
  """Basic grammar check when language_tool_python is not available"""
177
  issues = []
178
 
 
179
  sentences = sent_tokenize(text)
180
 
181
  for i, sentence in enumerate(sentences):
 
182
  if len(sentence.split()) > 30:
183
  issues.append(f"Sentence {i+1} might be too long ({len(sentence.split())} words)")
184
 
 
185
  words = sentence.lower().split()
186
  for j in range(len(words) - 1):
187
  if words[j] == words[j + 1] and len(words[j]) > 3:
 
197
  try:
198
  self.stop_words = set(stopwords.words('english'))
199
  except LookupError:
 
200
  self.stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should', 'could', 'can', 'may', 'might', 'must'}
201
 
202
  self.lemmatizer = WordNetLemmatizer()
203
 
204
+ # Job role keywords dictionary
205
  self.job_keywords = {
206
  "Data Scientist": ["python", "machine learning", "statistics", "pandas", "numpy", "scikit-learn",
207
  "tensorflow", "pytorch", "sql", "data analysis", "visualization", "jupyter", "r", "statistics", "deep learning"],
 
223
  "business process", "gap analysis", "user stories", "workflow", "project management"],
224
  "Full Stack Developer": ["html", "css", "javascript", "react", "angular", "vue", "node.js", "express",
225
  "mongodb", "postgresql", "rest api", "graphql", "version control", "responsive design"],
 
 
 
 
 
 
 
 
 
 
226
  }
227
 
228
  # Common skills database
 
245
  def extract_text_from_pdf(self, file):
246
  """Extract text from PDF file"""
247
  try:
 
248
  with pdfplumber.open(file) as pdf:
249
  text = ""
250
  for page in pdf.pages:
251
  text += page.extract_text() or ""
252
  return text
253
  except:
 
254
  try:
255
  pdf_reader = PyPDF2.PdfReader(file)
256
  text = ""
 
280
 
281
  def preprocess_text(self, text):
282
  """Clean and preprocess text"""
 
283
  text = re.sub(r'[^a-zA-Z\s]', '', text)
 
284
  text = text.lower()
285
+
286
  try:
287
  tokens = word_tokenize(text)
288
  except LookupError:
 
289
  tokens = text.split()
290
 
 
291
  try:
292
  tokens = [self.lemmatizer.lemmatize(token) for token in tokens
293
  if token not in self.stop_words and len(token) > 2]
294
  except LookupError:
 
295
  tokens = [token for token in tokens
296
  if token not in self.stop_words and len(token) > 2]
297
 
 
301
  """Extract different sections from resume"""
302
  sections = {}
303
 
 
304
  section_patterns = {
305
  'education': r'(education|academic|qualification|degree|university|college)',
306
  'experience': r'(experience|employment|work|career|professional|job|position)',
 
323
  continue
324
 
325
  if capturing:
 
326
  if any(re.search(p, line.lower()) for p in section_patterns.values() if p != pattern):
327
  break
328
  if line.strip():
 
344
  found_technical.append(skill)
345
 
346
  for skill in self.soft_skills:
 
347
  skill_words = skill.lower().split()
348
  if all(word in text_lower for word in skill_words):
349
  found_soft.append(skill)
 
361
  found_keywords = []
362
  for keyword in keywords:
363
  if FUZZYWUZZY_AVAILABLE:
 
364
  if fuzz.partial_ratio(keyword, text_lower) > 80:
365
  found_keywords.append(keyword)
366
  else:
 
367
  if simple_fuzzy_match(keyword, text_lower) > 80:
368
  found_keywords.append(keyword)
369
 
 
374
  """Check grammar and language quality"""
375
  if self.grammar_tool and GRAMMAR_TOOL_AVAILABLE:
376
  try:
377
+ matches = self.grammar_tool.check(text[:5000])
378
  return matches
379
  except:
380
  return basic_grammar_check(text)
 
423
  experience = sections.get('experience', '')
424
  skills = sections.get('skills', '')
425
 
 
426
  degree_match = re.search(r'(bachelor|master|phd|degree|engineering|science|business)',
427
  education.lower())
428
  experience_years = len(re.findall(r'\b\d{4}\b', experience))
429
 
 
430
  summary_parts = []
431
 
432
  if degree_match:
 
438
  if experience_years > 0:
439
  summary_parts.append(f"with {experience_years}+ years of experience")
440
 
 
441
  tech_skills, soft_skills = self.extract_skills(text)
442
  if tech_skills:
443
  main_skills = ', '.join(tech_skills[:3])
 
568
  st.error("❌ Claude API Not Available")
569
 
570
  # Initialize analyzer
571
+ try:
572
+ analyzer = ResumeAnalyzer()
573
+ except Exception as e:
574
+ st.error(f"Error initializing analyzer: {str(e)}")
575
+ return
576
 
577
  # Sidebar for job role selection
578
  st.sidebar.header("Analysis Settings")
 
598
  file_type = uploaded_file.type
599
 
600
  with st.spinner("Extracting text from resume..."):
601
+ try:
602
+ if file_type == "application/pdf":
603
+ text = analyzer.extract_text_from_pdf(uploaded_file)
604
+ elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
605
+ text = analyzer.extract_text_from_docx(uploaded_file)
606
+ else: # txt
607
+ text = analyzer.extract_text_from_txt(uploaded_file)
608
+ except Exception as e:
609
+ st.error(f"Error extracting text: {str(e)}")
610
+ return
611
+
612
+ if "Error" not in text and text.strip():
613
  # Process the resume
614
  st.success("βœ… Resume uploaded and processed successfully!")
615
 
616
  # Store resume context for chatbot
617
  st.session_state.resume_context = text
618
 
619
+ try:
620
+ # Extract data for analysis
621
+ sections = analyzer.extract_sections(text)
622
+ tech_skills, soft_skills = analyzer.extract_skills(text)
623
+ found_keywords, match_percentage = analyzer.keyword_matching(text, selected_role)
624
+ ats_score = analyzer.calculate_ats_score(text, sections)
625
+
626
+ # Create tabs for different analyses
627
+ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
628
+ "πŸ“Š Overview", "🎯 Skills Analysis", "πŸ“ Section Breakdown",
629
+ "πŸ” ATS Analysis", "πŸ“‹ Report & Suggestions", "πŸ€– AI Assistant"
630
+ ])
631
+
632
+ with tab1:
633
+ st.header("Resume Overview")
 
 
 
 
 
 
634
 
635
+ col1, col2 = st.columns(2)
636
+
637
+ with col1:
638
+ # Basic stats
639
+ word_count = len(text.split())
640
+ char_count = len(text)
 
 
 
 
 
 
 
 
 
 
 
641
 
642
+ st.metric("Word Count", word_count)
643
+ st.metric("Character Count", char_count)
644
+ st.metric("Sections Found", len([s for s in sections.values() if s]))
645
+
646
+ with col2:
647
+ # Generate persona summary
648
+ persona_summary = analyzer.generate_persona_summary(text, sections)
649
+ st.subheader("🎭 AI Persona Summary")
650
+ st.info(persona_summary)
651
+
652
+ # Word cloud
653
+ st.subheader("☁️ Word Cloud")
654
+ preprocessed_tokens = analyzer.preprocess_text(text)
655
+ if preprocessed_tokens:
656
+ wordcloud_text = ' '.join(preprocessed_tokens)
657
+ try:
658
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(wordcloud_text)
659
+
660
+ fig, ax = plt.subplots(figsize=(12, 6))
661
+ ax.imshow(wordcloud, interpolation='bilinear')
662
+ ax.axis('off')
663
+ st.pyplot(fig)
664
+ except Exception as e:
665
+ st.warning("Could not generate word cloud. Showing top words instead.")
666
+ word_freq = Counter(preprocessed_tokens)
667
+ top_words = word_freq.most_common(20)
668
+
669
+ words_df = pd.DataFrame(top_words, columns=['Word', 'Frequency'])
670
+ fig = px.bar(words_df, x='Word', y='Frequency', title='Top 20 Words')
671
+ st.plotly_chart(fig)
672
+
673
+ with tab2:
674
+ st.header("Skills Analysis")
675
+
676
+ col1, col2 = st.columns(2)
677
+
678
+ with col1:
679
+ st.subheader("πŸ”§ Technical Skills")
680
+ if tech_skills:
681
+ skills_text = " β€’ ".join(tech_skills)
682
+ st.success(f"Found {len(tech_skills)} technical skills:")
683
+ st.write(skills_text)
684
+
685
+ if len(tech_skills) > 5:
686
+ skills_df = pd.DataFrame({
687
+ 'Skill': tech_skills[:10],
688
+ 'Count': [1] * len(tech_skills[:10])
689
+ })
690
+ fig = px.pie(skills_df, values='Count', names='Skill',
691
+ title='Technical Skills Distribution')
692
+ st.plotly_chart(fig, use_container_width=True)
693
+ else:
694
+ skills_df = pd.DataFrame({
695
+ 'Skill': tech_skills,
696
+ 'Count': [1] * len(tech_skills)
697
+ })
698
+ fig = px.bar(skills_df, x='Skill', y='Count',
699
+ title='Technical Skills Found')
700
+ fig.update_xaxis(tickangle=45)
701
+ st.plotly_chart(fig, use_container_width=True)
702
+ else:
703
+ st.warning("No soft skills detected")
704
+ st.info("πŸ’‘ Consider highlighting soft skills like leadership, communication, teamwork")
705
+
706
+ # Role-specific keyword analysis
707
+ st.subheader(f"🎯 {selected_role} Keywords")
708
+ if found_keywords:
709
+ st.success(f"Found {len(found_keywords)} relevant keywords for {selected_role}:")
710
+ keywords_text = " β€’ ".join(found_keywords)
711
+ st.write(keywords_text)
712
+ st.info(f"Match Percentage: {match_percentage:.1f}%")
713
 
714
+ # Progress bar for match percentage
715
+ st.progress(match_percentage / 100)
716
+ else:
717
+ st.warning(f"No {selected_role}-specific keywords found")
718
+ missing_keywords = [kw for kw in analyzer.job_keywords[selected_role] if kw not in text.lower()]
719
+ if missing_keywords:
720
+ st.info(f"πŸ’‘ Consider adding these keywords: {', '.join(missing_keywords[:5])}")
 
721
 
722
+ with tab3:
723
+ st.header("Section Breakdown")
724
+
725
+ for section_name, section_content in sections.items():
726
+ if section_content:
727
+ with st.expander(f"πŸ“‘ {section_name.title()} Section"):
728
+ st.text_area(
729
+ f"{section_name.title()} Content",
730
+ section_content,
731
+ height=200,
732
+ key=f"section_{section_name}"
733
+ )
734
+
735
+ # Section-specific analysis
736
+ word_count = len(section_content.split())
737
+ st.metric(f"{section_name.title()} Word Count", word_count)
738
+
739
+ if section_name == "experience":
740
+ # Analyze experience section
741
+ years_mentioned = len(re.findall(r'\b(19|20)\d{2}\b', section_content))
742
+ companies_mentioned = len(re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', section_content))
743
+ st.metric("Years/Dates Mentioned", years_mentioned)
744
+ st.metric("Potential Companies", companies_mentioned)
745
+
746
+ elif section_name == "education":
747
+ # Analyze education section
748
+ degrees = re.findall(r'\b(bachelor|master|phd|degree|diploma|certificate)\b', section_content.lower())
749
+ st.metric("Degrees/Certificates Found", len(degrees))
750
  else:
751
+ st.warning(f"⚠️ {section_name.title()} section not found or empty")
 
 
 
 
 
 
 
 
 
 
 
752
 
753
+ with tab4:
754
+ st.header("ATS Analysis")
755
+
756
+ col1, col2 = st.columns(2)
757
+
758
+ with col1:
759
+ st.subheader("πŸ“Š ATS Score Breakdown")
760
+ st.metric("Overall ATS Score", f"{ats_score}/100")
761
 
762
+ # ATS score visualization
763
+ fig = go.Figure(go.Indicator(
764
+ mode = "gauge+number+delta",
765
+ value = ats_score,
766
+ domain = {'x': [0, 1], 'y': [0, 1]},
767
+ title = {'text': "ATS Score"},
768
+ delta = {'reference': 70},
769
+ gauge = {
770
+ 'axis': {'range': [None, 100]},
771
+ 'bar': {'color': "darkblue"},
772
+ 'steps': [
773
+ {'range': [0, 50], 'color': "lightgray"},
774
+ {'range': [50, 70], 'color': "yellow"},
775
+ {'range': [70, 100], 'color': "green"}
776
+ ],
777
+ 'threshold': {
778
+ 'line': {'color': "red", 'width': 4},
779
+ 'thickness': 0.75,
780
+ 'value': 90
781
+ }
782
+ }
783
+ ))
784
+ st.plotly_chart(fig, use_container_width=True)
785
+
786
+ with col2:
787
+ st.subheader("🎯 Role Match Analysis")
788
+ st.metric("Role Match Score", f"{match_percentage:.1f}%")
789
+
790
+ # Combined score
791
+ combined_score = (ats_score + match_percentage) / 2
792
+ st.metric("Combined Score", f"{combined_score:.1f}/100")
793
+
794
+ # Score interpretation
795
+ if combined_score >= 80:
796
+ st.success("πŸŽ‰ Excellent! Your resume is well-optimized")
797
+ elif combined_score >= 60:
798
+ st.warning("πŸ‘ Good, but room for improvement")
799
  else:
800
+ st.error("⚠️ Needs significant improvement")
801
+
802
+ # Grammar check
803
+ st.subheader("πŸ“ Grammar & Language Quality")
804
+ with st.spinner("Checking grammar..."):
805
+ grammar_issues = analyzer.grammar_check(text)
806
+
807
+ if grammar_issues:
808
+ st.warning(f"Found {len(grammar_issues)} potential grammar issues:")
809
+ for i, issue in enumerate(grammar_issues[:10]): # Show first 10 issues
810
+ if hasattr(issue, 'message'):
811
+ st.write(f"β€’ {issue.message}")
812
+ else:
813
+ st.write(f"β€’ {str(issue)}")
814
+
815
+ if len(grammar_issues) > 10:
816
+ st.info(f"... and {len(grammar_issues) - 10} more issues")
817
  else:
818
+ st.success("βœ… No major grammar issues detected!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819
 
820
+ with tab5:
821
+ st.header("Report & Suggestions")
822
+
823
+ # Get AI analysis from Claude
824
+ if os.getenv('OPENROUTER_API_KEY'):
825
+ st.subheader("πŸ€– AI-Powered Analysis")
826
+ with st.spinner("Getting AI analysis from Claude..."):
827
+ claude_analysis = analyzer.get_claude_analysis(
828
+ text, sections, selected_role, ats_score, match_percentage
829
+ )
830
+ st.markdown(claude_analysis)
831
  else:
832
+ st.info("Claude API not available. Showing basic recommendations.")
833
+
834
+ # Basic recommendations
835
+ st.subheader("πŸ“‹ Quick Recommendations")
836
+ recommendations = []
837
+
838
+ if ats_score < 70:
839
+ recommendations.append("πŸ”Ή Improve ATS compatibility by adding more bullet points and clear section headers")
840
+
841
+ if match_percentage < 60:
842
+ recommendations.append(f"πŸ”Ή Add more {selected_role}-specific keywords to improve role match")
843
+
844
+ if len(tech_skills) < 5:
845
+ recommendations.append("πŸ”Ή Include more technical skills relevant to your field")
846
+
847
+ if not sections.get('projects'):
848
+ recommendations.append("πŸ”Ή Consider adding a projects section to showcase your work")
849
+
850
+ if len(text.split()) < 300:
851
+ recommendations.append("πŸ”Ή Expand your resume content - it seems too brief")
852
+ elif len(text.split()) > 800:
853
+ recommendations.append("πŸ”Ή Consider condensing your resume - it might be too lengthy")
854
+
855
  for rec in recommendations:
856
+ st.markdown(rec)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
857
 
858
+ # PDF Report Generation
859
+ st.subheader("πŸ“„ Download Report")
860
+ if st.button("Generate PDF Report"):
861
+ try:
862
+ pdf_buffer = analyzer.create_pdf_report(
863
+ text, sections, ats_score, match_percentage,
864
+ selected_role, tech_skills, soft_skills, found_keywords
865
+ )
866
+
867
+ st.download_button(
868
+ label="πŸ“₯ Download PDF Report",
869
+ data=pdf_buffer,
870
+ file_name=f"resume_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf",
871
+ mime="application/pdf"
872
+ )
873
+ except Exception as e:
874
+ st.error(f"Error generating PDF: {str(e)}")
875
+
876
+ with tab6:
877
+ st.header("πŸ€– AI Assistant Chat")
878
 
879
+ if not os.getenv('OPENROUTER_API_KEY'):
880
+ st.error("Claude API key not configured. Please set OPENROUTER_API_KEY environment variable.")
881
+ return
 
 
 
 
 
 
 
 
 
 
 
 
882
 
883
+ # Display chat history
884
+ for chat in st.session_state.chat_history:
885
+ with st.chat_message(chat["role"]):
886
+ st.markdown(chat["content"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
 
888
+ # Chat input
889
+ if prompt := st.chat_input("Ask me anything about your resume..."):
890
+ # Add user message to chat history
891
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
892
+
893
+ # Display user message
894
+ with st.chat_message("user"):
895
+ st.markdown(prompt)
896
+
897
+ # Generate AI response
898
+ with st.chat_message("assistant"):
899
+ with st.spinner("Thinking..."):
900
+ response = analyzer.chatbot.generate_response(
901
+ prompt,
902
+ st.session_state.resume_context
903
+ )
904
+ st.markdown(response)
905
+
906
+ # Add assistant response to chat history
907
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
908
+
909
+ # Suggested questions
910
+ st.subheader("πŸ’‘ Suggested Questions")
911
  col1, col2 = st.columns(2)
912
 
913
  with col1:
914
+ if st.button("How can I improve my resume?"):
915
+ st.session_state.chat_history.append({
916
+ "role": "user",
917
+ "content": "How can I improve my resume?"
918
+ })
919
+ st.experimental_rerun()
 
 
920
 
921
  with col2:
922
+ if st.button("What skills should I add?"):
923
+ st.session_state.chat_history.append({
924
+ "role": "user",
925
+ "content": f"What skills should I add for a {selected_role} position?"
926
+ })
927
+ st.experimental_rerun()
928
+
929
+ # Clear chat button
930
+ if st.button("πŸ—‘οΈ Clear Chat History"):
931
+ st.session_state.chat_history = []
932
+ st.experimental_rerun()
933
+
934
+ except Exception as e:
935
+ st.error(f"Error during analysis: {str(e)}")
936
+ else:
937
+ st.error("Could not extract text from the uploaded file. Please try a different file.")
938
+
939
+ # Footer
940
+ st.markdown("---")
941
+ st.markdown("### πŸ“ Tips for Better Resume Analysis")
942
+ st.markdown("""
943
+ - **Upload clear, well-formatted documents** for better text extraction
944
+ - **Select the appropriate job role** to get relevant keyword matching
945
+ - **Use the AI Assistant** to get personalized advice
946
+ - **Download the PDF report** for offline reference
947
+ - **Check multiple job roles** to see how your resume performs across different positions
948
+ """)
949
+
950
+ # Instructions for API setup
951
+ if not os.getenv('OPENROUTER_API_KEY'):
952
+ with st.expander("πŸ”§ Setup Instructions for Claude AI"):
953
+ st.markdown("""
954
+ To enable the AI Assistant feature:
955
+ 1. Get an API key from [OpenRouter](https://openrouter.ai/)
956
+ 2. Set the environment variable: `OPENROUTER_API_KEY=your_key_here`
957
+ 3. Restart the application
958
+ """)
 
 
 
959
 
960
  if __name__ == "__main__":
961
+ main()
962
+