Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import PyPDF2 | |
| import docx | |
| import io | |
| import re | |
| import numpy as np | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import nltk | |
| from collections import Counter | |
| import warnings | |
| import time | |
| import json | |
| warnings.filterwarnings("ignore") | |
| # Download required NLTK data | |
| try: | |
| nltk.data.find('tokenizers/punkt') | |
| except LookupError: | |
| try: | |
| nltk.download('punkt') | |
| except: | |
| nltk.download('punkt_tab') | |
| try: | |
| nltk.data.find('tokenizers/punkt_tab') | |
| except LookupError: | |
| nltk.download('punkt_tab') | |
| try: | |
| nltk.data.find('corpora/stopwords') | |
| except LookupError: | |
| nltk.download('stopwords') | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize, sent_tokenize | |
| class ModernATSAnalyzer: | |
| def __init__(self): | |
| self.progress_callback = None | |
| self.llm_pipeline = None | |
| self.embedding_model = None | |
| self.update_progress("π Initializing AI models...", 5) | |
| # Initialize embedding model for semantic analysis | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| # Use latest 2025 optimized model for better understanding | |
| self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| self.update_progress("β Embedding model loaded", 15) | |
| except Exception as e: | |
| self.update_progress(f"β Embedding model failed: {str(e)}", 15) | |
| # Initialize LLM for intelligent analysis (using 2025 small models) | |
| try: | |
| # Try to load a small but capable 2025 model | |
| model_options = [ | |
| "microsoft/DialoGPT-small", # Fallback option | |
| "HuggingFaceTB/SmolLM2-135M", # 2025 efficient model | |
| "Qwen/Qwen2.5-0.5B" # 2025 small but powerful | |
| ] | |
| for model_name in model_options: | |
| try: | |
| self.llm_pipeline = pipeline( | |
| "text-generation", | |
| model=model_name, | |
| tokenizer=model_name, | |
| device=-1, # CPU | |
| max_length=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| pad_token_id=50256 | |
| ) | |
| self.update_progress(f"β LLM loaded: {model_name}", 25) | |
| break | |
| except: | |
| continue | |
| if not self.llm_pipeline: | |
| self.update_progress("β οΈ Using rule-based analysis (LLM unavailable)", 25) | |
| except Exception as e: | |
| self.update_progress(f"β οΈ LLM initialization failed, using backup methods", 25) | |
| self.stop_words = set(stopwords.words('english')) | |
| self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=1000) | |
| self.update_progress("π― System ready for analysis!", 30) | |
| def set_progress_callback(self, callback): | |
| self.progress_callback = callback | |
| def update_progress(self, message, progress): | |
| if self.progress_callback: | |
| self.progress_callback(message, progress) | |
| time.sleep(0.05) | |
| def extract_text_from_pdf(self, file_path): | |
| """Extract text from PDF file""" | |
| try: | |
| with open(file_path, 'rb') as file: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| return f"Error reading PDF: {str(e)}" | |
| def extract_text_from_docx(self, file_path): | |
| """Extract text from DOCX file""" | |
| try: | |
| doc = docx.Document(file_path) | |
| text = "" | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + "\n" | |
| return text | |
| except Exception as e: | |
| return f"Error reading DOCX: {str(e)}" | |
| def clean_text(self, text): | |
| """Clean and normalize text""" | |
| text = re.sub(r'\s+', ' ', text) | |
| text = re.sub(r'[^\w\s.,()-]', ' ', text) | |
| return text.strip() | |
| def extract_dynamic_keywords(self, text, top_n=30): | |
| """Dynamically extract important keywords using NLP techniques""" | |
| # Clean text | |
| clean_text = self.clean_text(text.lower()) | |
| # Tokenize and filter | |
| words = word_tokenize(clean_text) | |
| words = [word for word in words if ( | |
| word.isalpha() and | |
| len(word) > 2 and | |
| word not in self.stop_words | |
| )] | |
| # Get word frequencies | |
| word_freq = Counter(words) | |
| # Extract phrases (bigrams and trigrams) | |
| sentences = sent_tokenize(text) | |
| phrases = [] | |
| for sentence in sentences: | |
| sentence_words = word_tokenize(sentence.lower()) | |
| sentence_words = [w for w in sentence_words if w.isalpha()] | |
| # Bigrams | |
| for i in range(len(sentence_words) - 1): | |
| bigram = f"{sentence_words[i]} {sentence_words[i+1]}" | |
| if len(bigram) > 6: # Avoid very short phrases | |
| phrases.append(bigram) | |
| # Trigrams for technical terms | |
| for i in range(len(sentence_words) - 2): | |
| trigram = f"{sentence_words[i]} {sentence_words[i+1]} {sentence_words[i+2]}" | |
| if len(trigram) > 10: | |
| phrases.append(trigram) | |
| phrase_freq = Counter(phrases) | |
| # Combine words and phrases | |
| keywords = [] | |
| # Add top words | |
| for word, freq in word_freq.most_common(top_n//2): | |
| keywords.append((word, freq, 'word')) | |
| # Add top phrases | |
| for phrase, freq in phrase_freq.most_common(top_n//2): | |
| if freq >= 2: # Only include phrases that appear multiple times | |
| keywords.append((phrase, freq, 'phrase')) | |
| return keywords | |
| def analyze_with_llm(self, resume_text, job_text): | |
| """Use LLM for intelligent analysis""" | |
| if not self.llm_pipeline: | |
| return self.fallback_analysis(resume_text, job_text) | |
| try: | |
| prompt = f"""Analyze this resume against the job description and provide a compatibility score out of 100. | |
| Job Description: | |
| {job_text[:500]}... | |
| Resume: | |
| {resume_text[:500]}... | |
| Provide analysis in this format: | |
| Score: [0-100] | |
| Skills Match: [description] | |
| Experience Match: [description] | |
| Key Gaps: [description] | |
| """ | |
| response = self.llm_pipeline(prompt, max_new_tokens=200, num_return_sequences=1) | |
| analysis_text = response[0]['generated_text'].split(prompt)[-1].strip() | |
| # Parse the response | |
| score_match = re.search(r'Score:\s*(\d+)', analysis_text) | |
| score = int(score_match.group(1)) if score_match else 50 | |
| return { | |
| 'overall_score': min(100, max(0, score)), | |
| 'analysis_text': analysis_text, | |
| 'method': 'LLM' | |
| } | |
| except Exception as e: | |
| return self.fallback_analysis(resume_text, job_text) | |
| def fallback_analysis(self, resume_text, job_text): | |
| """Sophisticated rule-based analysis as fallback""" | |
| # Extract keywords from both texts | |
| resume_keywords = self.extract_dynamic_keywords(resume_text) | |
| job_keywords = self.extract_dynamic_keywords(job_text) | |
| # Create keyword sets for comparison | |
| resume_terms = set([kw[0] for kw in resume_keywords]) | |
| job_terms = set([kw[0] for kw in job_keywords]) | |
| # Calculate various similarity metrics | |
| # 1. Keyword overlap | |
| overlap = len(resume_terms.intersection(job_terms)) | |
| keyword_score = (overlap / len(job_terms)) * 100 if job_terms else 0 | |
| # 2. TF-IDF Similarity | |
| try: | |
| tfidf_matrix = self.tfidf_vectorizer.fit_transform([resume_text, job_text]) | |
| tfidf_similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] * 100 | |
| except: | |
| tfidf_similarity = 0 | |
| # 3. Semantic similarity using embeddings | |
| semantic_score = 0 | |
| if self.embedding_model: | |
| try: | |
| resume_embedding = self.embedding_model.encode(resume_text[:512]) | |
| job_embedding = self.embedding_model.encode(job_text[:512]) | |
| semantic_score = cosine_similarity([resume_embedding], [job_embedding])[0][0] * 100 | |
| except: | |
| semantic_score = 0 | |
| # 4. Structure and length analysis | |
| structure_score = self.analyze_resume_structure(resume_text) | |
| # Weighted combination | |
| overall_score = ( | |
| keyword_score * 0.3 + | |
| tfidf_similarity * 0.25 + | |
| semantic_score * 0.25 + | |
| structure_score * 0.2 | |
| ) | |
| return { | |
| 'overall_score': min(100, max(0, overall_score)), | |
| 'keyword_score': keyword_score, | |
| 'tfidf_score': tfidf_similarity, | |
| 'semantic_score': semantic_score, | |
| 'structure_score': structure_score, | |
| 'resume_keywords': resume_keywords[:10], | |
| 'job_keywords': job_keywords[:10], | |
| 'common_keywords': list(resume_terms.intersection(job_terms))[:10], | |
| 'method': 'Advanced Rule-based' | |
| } | |
| def analyze_resume_structure(self, resume_text): | |
| """Analyze resume structure and formatting""" | |
| score = 100 | |
| # Check for essential sections | |
| sections = { | |
| 'contact': r'(email|phone|@|linkedin|github)', | |
| 'experience': r'(experience|work|employment|career|job)', | |
| 'education': r'(education|degree|university|college|school)', | |
| 'skills': r'(skills|technical|technologies|competencies|tools)' | |
| } | |
| sections_found = 0 | |
| for section, pattern in sections.items(): | |
| if re.search(pattern, resume_text, re.IGNORECASE): | |
| sections_found += 1 | |
| # Penalize missing sections | |
| section_penalty = (4 - sections_found) * 15 | |
| score -= section_penalty | |
| # Check word count | |
| word_count = len(resume_text.split()) | |
| if word_count < 150: | |
| score -= 30 | |
| elif word_count > 1200: | |
| score -= 10 | |
| # Check for bullet points or structure | |
| if 'β’' in resume_text or '-' in resume_text or '*' in resume_text: | |
| score += 5 | |
| # Check for years/dates (experience indicators) | |
| years_pattern = r'(20\d{2}|19\d{2})' | |
| if re.search(years_pattern, resume_text): | |
| score += 10 | |
| return max(0, min(100, score)) | |
| def generate_intelligent_suggestions(self, analysis_result): | |
| """Generate intelligent suggestions based on analysis""" | |
| suggestions = [] | |
| if analysis_result['method'] == 'LLM' and 'analysis_text' in analysis_result: | |
| # Extract suggestions from LLM response | |
| if 'Key Gaps:' in analysis_result['analysis_text']: | |
| gaps = analysis_result['analysis_text'].split('Key Gaps:')[-1].strip() | |
| suggestions.append(f"π― **Key Areas to Improve**: {gaps}") | |
| # Add rule-based suggestions | |
| score = analysis_result['overall_score'] | |
| if score < 40: | |
| suggestions.append("π¨ **Critical**: Your resume needs major optimization. Consider professional resume writing services.") | |
| elif score < 60: | |
| suggestions.append("β οΈ **Moderate Compatibility**: Your resume shows potential but needs significant keyword optimization.") | |
| elif score < 80: | |
| suggestions.append("π **Good Foundation**: You're on the right track. Focus on fine-tuning keywords and formatting.") | |
| else: | |
| suggestions.append("β **Excellent**: Your resume shows strong compatibility with this job!") | |
| # Specific suggestions based on analysis components | |
| if 'keyword_score' in analysis_result and analysis_result['keyword_score'] < 40: | |
| suggestions.append("π **Keywords**: Incorporate more relevant keywords from the job description naturally into your resume content.") | |
| if 'structure_score' in analysis_result and analysis_result['structure_score'] < 70: | |
| suggestions.append("π **Structure**: Improve resume formatting with clear sections: Contact, Experience, Education, Skills.") | |
| if 'semantic_score' in analysis_result and analysis_result['semantic_score'] < 50: | |
| suggestions.append("π¨ **Content Alignment**: Rewrite your experience descriptions to better match the job's language and requirements.") | |
| # Add common ATS tips | |
| suggestions.append("π‘ **ATS Tips**: Use standard fonts, avoid images/graphics, save as PDF, and use keywords in context rather than just listing them.") | |
| return suggestions | |
| def process_resume_analysis(self, resume_file, job_description, progress=gr.Progress()): | |
| """Main analysis function""" | |
| try: | |
| def update_progress_ui(message, prog): | |
| progress(prog/100, desc=message) | |
| self.set_progress_callback(update_progress_ui) | |
| # Validation | |
| if resume_file is None: | |
| return "β Please upload a resume file.", "", "", "" | |
| if not job_description or len(job_description.strip()) < 50: | |
| return "β Please provide a detailed job description (at least 50 characters).", "", "", "" | |
| self.update_progress("π Extracting text from resume...", 35) | |
| # Extract resume text | |
| filename = str(resume_file).lower() | |
| if filename.endswith('.pdf'): | |
| resume_text = self.extract_text_from_pdf(resume_file) | |
| elif filename.endswith('.docx'): | |
| resume_text = self.extract_text_from_docx(resume_file) | |
| else: | |
| return f"β Unsupported file format. Please upload PDF or DOCX files.", "", "", "" | |
| if "Error reading" in resume_text: | |
| return resume_text, "", "", "" | |
| if len(resume_text.strip()) < 100: | |
| return "β Resume text is too short or couldn't be extracted. Please ensure your file contains readable text.", "", "", "" | |
| self.update_progress("π§ Analyzing with AI...", 50) | |
| # Perform AI analysis | |
| analysis_result = self.analyze_with_llm(resume_text, job_description) | |
| self.update_progress("π‘ Generating suggestions...", 80) | |
| # Generate suggestions | |
| suggestions = self.generate_intelligent_suggestions(analysis_result) | |
| self.update_progress("β Analysis complete!", 100) | |
| # Format results | |
| score = analysis_result['overall_score'] | |
| if score >= 85: | |
| emoji = "π’" | |
| status = "Excellent Match" | |
| elif score >= 70: | |
| emoji = "π‘" | |
| status = "Good Compatibility" | |
| elif score >= 50: | |
| emoji = "π " | |
| status = "Moderate Match" | |
| else: | |
| emoji = "π΄" | |
| status = "Needs Improvement" | |
| score_text = f"# π― ATS Compatibility Score: {score:.0f}/100\n\n{emoji} **{status}**" | |
| # Detailed breakdown | |
| details = f"""## π Analysis Breakdown | |
| **Analysis Method**: {analysis_result['method']} | |
| **Overall Score**: {score:.1f}/100 | |
| """ | |
| if 'keyword_score' in analysis_result: | |
| details += f""" | |
| **Keyword Match**: {analysis_result['keyword_score']:.1f}/100 | |
| **Content Similarity**: {analysis_result.get('tfidf_score', 0):.1f}/100 | |
| **Semantic Match**: {analysis_result.get('semantic_score', 0):.1f}/100 | |
| **Structure Quality**: {analysis_result.get('structure_score', 0):.1f}/100 | |
| """ | |
| suggestions_text = "## π‘ Improvement Recommendations\n\n" + "\n\n".join(suggestions) | |
| # Keywords analysis | |
| keywords_text = "## π Keyword Analysis\n\n" | |
| if 'resume_keywords' in analysis_result: | |
| resume_kw = [kw[0] for kw in analysis_result['resume_keywords']] | |
| job_kw = [kw[0] for kw in analysis_result['job_keywords']] | |
| common_kw = analysis_result.get('common_keywords', []) | |
| keywords_text += f"""**Resume Keywords**: {', '.join(resume_kw)} | |
| **Job Keywords**: {', '.join(job_kw)} | |
| **Matching Keywords**: {', '.join(common_kw) if common_kw else 'Limited overlap detected'} | |
| **Recommendation**: Focus on incorporating more job-specific keywords naturally into your resume content. | |
| """ | |
| else: | |
| keywords_text += "**Dynamic keyword extraction completed.** The analysis considered context and semantic meaning rather than simple keyword matching." | |
| return score_text, details, suggestions_text, keywords_text | |
| except Exception as e: | |
| return f"β Analysis error: {str(e)}\n\nPlease try again or contact support.", "", "", "" | |
| # Initialize analyzer | |
| analyzer = ModernATSAnalyzer() | |
| def create_interface(): | |
| with gr.Blocks(title="Modern ATS Analyzer 2025", theme=gr.themes.Soft()) as interface: | |
| gr.HTML(""" | |
| <div style='text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;'> | |
| <h1>π€ Modern ATS Resume Analyzer 2025</h1> | |
| <p style='font-size: 16px; margin: 10px 0;'>Powered by Latest AI Models | Dynamic Keyword Extraction | Intelligent Analysis</p> | |
| <p style='font-size: 14px; opacity: 0.9;'>No predefined keywords - Real ATS-like analysis using 2025 AI technology</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.HTML("<h3>π Upload Resume</h3>") | |
| resume_file = gr.File( | |
| label="Upload Resume (PDF/DOCX)", | |
| file_types=[".pdf", ".docx"], | |
| type="filepath" | |
| ) | |
| gr.HTML("<h3>π Job Description</h3>") | |
| job_description = gr.Textbox( | |
| label="Paste Complete Job Description", | |
| placeholder="Paste the full job posting including requirements, responsibilities, qualifications, and company information...", | |
| lines=15, | |
| max_lines=25 | |
| ) | |
| analyze_btn = gr.Button("π Analyze with Modern AI", variant="primary", size="lg") | |
| gr.HTML(""" | |
| <div style='margin-top: 15px; padding: 15px; background: #f0f8ff; border-radius: 8px; border-left: 4px solid #4CAF50;'> | |
| <h4 style='margin: 0 0 10px 0; color: #2E7D32;'>π― What makes this different:</h4> | |
| <ul style='margin: 0; padding-left: 20px; color: #424242;'> | |
| <li><strong>No predefined keywords</strong> - Dynamically extracts relevant terms</li> | |
| <li><strong>2025 AI models</strong> - Uses latest language understanding</li> | |
| <li><strong>Context-aware</strong> - Understands meaning, not just word matching</li> | |
| <li><strong>Real ATS simulation</strong> - Mimics actual hiring systems</li> | |
| </ul> | |
| </div> | |
| """) | |
| with gr.Column(scale=1): | |
| score_output = gr.Markdown(label="π― Compatibility Score") | |
| details_output = gr.Markdown(label="π Detailed Analysis") | |
| suggestions_output = gr.Markdown(label="π‘ AI Recommendations") | |
| keywords_output = gr.Markdown(label="π Keyword Intelligence") | |
| analyze_btn.click( | |
| fn=analyzer.process_resume_analysis, | |
| inputs=[resume_file, job_description], | |
| outputs=[score_output, details_output, suggestions_output, keywords_output] | |
| ) | |
| gr.HTML(""" | |
| <div style='text-align: center; padding: 20px; margin-top: 30px; border-top: 2px solid #e0e0e0; background: #fafafa; border-radius: 8px;'> | |
| <h4 style='color: #333; margin-bottom: 15px;'>π§ AI-Powered Analysis Engine</h4> | |
| <div style='display: flex; justify-content: space-around; flex-wrap: wrap;'> | |
| <div style='margin: 10px; text-align: center;'> | |
| <strong style='color: #1976D2;'>π― Dynamic Keywords</strong><br> | |
| <span style='font-size: 12px; color: #666;'>Extracts context-relevant terms</span> | |
| </div> | |
| <div style='margin: 10px; text-align: center;'> | |
| <strong style='color: #388E3C;'>π§ Semantic Analysis</strong><br> | |
| <span style='font-size: 12px; color: #666;'>Understands meaning & context</span> | |
| </div> | |
| <div style='margin: 10px; text-align: center;'> | |
| <strong style='color: #F57C00;'>π Multi-metric Scoring</strong><br> | |
| <span style='font-size: 12px; color: #666;'>Comprehensive compatibility analysis</span> | |
| </div> | |
| <div style='margin: 10px; text-align: center;'> | |
| <strong style='color: #7B1FA2;'>π‘ AI Suggestions</strong><br> | |
| <span style='font-size: 12px; color: #666;'>Personalized improvement tips</span> | |
| </div> | |
| </div> | |
| <p style='margin-top: 15px; font-size: 13px; color: #777;'> | |
| <em>Optimized for CPU inference β’ 2025 Model Architecture β’ Enterprise-grade Analysis</em> | |
| </p> | |
| </div> | |
| """) | |
| return interface | |
| if __name__ == "__main__": | |
| app = create_interface() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True | |
| ) |