import gradio as gr import spacy import pdfplumber from docx import Document from sentence_transformers import SentenceTransformer, util import re import plotly.graph_objects as go import sys import os import torch from transformers import pipeline # Initialize Models once at startup print("π [1/4] Initializing Semantic Matcher (all-MiniLM-L6-v2)...") try: model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu') except Exception as e: print(f"β Semantic Matcher failed: {e}") model = None print("π [2/4] Initializing Reasoning LLM (FLAN-T5-Base)...") try: # Use CPU by default for stability on Windows unless explicitly requested llm_reasoner = pipeline("text2text-generation", model="google/flan-t5-base", device=-1) except Exception as e: print(f"β οΈ Reasoning LLM failed: {e}") llm_reasoner = None print("π [3/4] Initializing Interview Coach (LaMini-GPT)...") try: llm_coach = pipeline("text-generation", model="MBZUAI/LaMini-GPT-124M", device=-1) except Exception as e: print(f"β οΈ Coach LLM failed: {e}") llm_coach = None print("π [4/4] Loading NLP Entities (spaCy)...") try: nlp = spacy.load("en_core_web_sm") except: print("π₯ Downloading spaCy model...") os.system(f"{sys.executable} -m spacy download en_core_web_sm") try: nlp = spacy.load("en_core_web_sm") except: nlp = None print("β NLP Load failed completely.") print("β¨ Intelligence Engine Ready.") TECH_SKILLS = [ "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql", "git", "ml", "nlp", "tensorflow", "pytorch", "java", "golang", "postgresql", "mongodb", "redis", "devops", "rest api", "graphql", "scikit-learn", "pandas", "numpy", "django", "flask", "typescript", "angular", "vue", "html", "css", "node.js", "express", "azure", "gcp", "linux", "terraform", "ansible", "jenkins", "prompt engineering", "openai", "llm", "bert", "tableau", "powerbi", "excel", "c#", "c++", "rust", "r", "spark", "hadoop", "kafka", "mysql", "oracle", "snowflake" ] ROADMAP_DB = { "python": "π [Master Python](https://realpython.com/) - Focus on Backend Automation & Data Science.", "react": "βοΈ [React.dev](https://react.dev/) - Master Hooks & State Management.", "aws": "βοΈ [AWS Builder](https://explore.skillbuilder.aws/) - Get Certified (Solutions Architect).", "docker": "π³ [Docker Guide](https://docs.docker.com/) - Learn Container Architecture.", "kubernetes": "βΈοΈ [K8s Certification](https://kubernetes.io/docs/tutorials/) - Master Orchestration.", "ml": "π€ [ML Specialization](https://www.coursera.org/specializations/machine-learning-introduction) - Focus on Scikit-Learn.", "nlp": "βοΈ [HF NLP Course](https://huggingface.co/learn/nlp-course/) - Master Transformers.", "sql": "πΎ [SQL Practice](https://sqlzoo.net/) - Master Joins & Query Optmization.", "javascript": "π [JS.info](https://javascript.info/) - Master ES6+ Features.", "devops": "βοΈ [Roadmap.sh/devops](https://roadmap.sh/devops/) - Learn CI/CD & Infrastructure as Code." } def extract_text(file_obj): """Robust text extraction for PDF and DOCX.""" if file_obj is None: return "" file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj) try: if file_path.lower().endswith('.pdf'): with pdfplumber.open(file_path) as pdf: return "".join([page.extract_text() or "" for page in pdf.pages]) elif file_path.lower().endswith('.docx'): doc = Document(file_path) return "\n".join([p.text for p in doc.paragraphs]) else: # Try reading as plain text with open(file_path, 'r', encoding='utf-8') as f: return f.read() except Exception as e: print(f"Extraction error on {file_path}: {e}") return "" def discover_skills(text): if not text: return set() found = set() text_l = text.lower() for skill in TECH_SKILLS: if re.search(r'\b' + re.escape(skill) + r'\b', text_l): found.add(skill) return found def create_score_gauges(match_score, content_score, search_score, ats_score): def make_gauge(val, title, color): return go.Indicator( mode="gauge+number", value=val, title={'text': title, 'font': {'size': 14, 'color': "white"}}, domain={'x': [0, 1], 'y': [0, 1]}, gauge={ 'axis': {'range': [0, 100], 'tickwidth': 1, 'tickcolor': "white"}, 'bar': {'color': color}, 'bgcolor': "rgba(0,0,0,0)", 'borderwidth': 2, 'bordercolor': "gray", 'steps': [ {'range': [0, 40], 'color': 'rgba(255, 0, 0, 0.1)'}, {'range': [40, 70], 'color': 'rgba(255, 255, 0, 0.1)'}, {'range': [70, 100], 'color': 'rgba(0, 255, 0, 0.1)'} ], } ) fig = go.Figure() fig.add_trace(make_gauge(match_score, "Match Score", "#00dfd8")) fig.update_layout( paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font={'color': "white", 'family': "Arial"}, height=300, margin=dict(l=40, r=40, t=80, b=40) ) return fig def create_radar_chart(skills, exp, edu, readiness, search): categories = ['Skills', 'Experience', 'Education', 'Readiness', 'Searchability'] fig = go.Figure() fig.add_trace(go.Scatterpolar( r=[skills, exp, edu, readiness, search], theta=categories, fill='toself', name='Competency Profile', line_color='#00dfd8', fillcolor='rgba(0, 223, 216, 0.3)' )) fig.update_layout( polar=dict( radialaxis=dict(visible=True, range=[0, 100], color="white", gridcolor="gray"), angularaxis=dict(color="white", gridcolor="gray"), bgcolor='rgba(0,0,0,0)' ), showlegend=False, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', height=350, margin=dict(l=60, r=60, t=20, b=20) ) return fig def estimate_salary(score, skills): base = 75 multiplier = 1 + (score / 100) skill_bonus = len(skills) * 2.5 low = round(base * multiplier + skill_bonus) high = round(low * 1.45) return f"${low}k - ${high}k" def main_process(resume_file, jd_text, progress=gr.Progress()): print("\n" + "="*40) print("π INITIALIZING NEURAL INTELLIGENCE CYCLE") print("="*40) try: if not resume_file or not jd_text.strip(): print("β οΈ Incomplete inputs detected.") return [ "β οΈ Missing Inputs", "", None, None, "Incomplete data sequence. Please upload a resume and paste the JD.", "N/A", [], gr.update(visible=False) ] progress(0.1, desc="β‘ Extracting Neural Content...") print("Stage 1: Text Extraction...") resume_text = extract_text(resume_file) if not resume_text.strip(): print("β Extraction failure: text is empty.") return [ "β Extract Error", "", None, None, "Neural bypass failed. Ensure the resume is a readable PDF or DOCX (not just an image).", "N/A", [], gr.update(visible=False) ] progress(0.2, desc="π Discovering Skill Entities...") print("Stage 2: Skill Discovery...") r_skills = discover_skills(resume_text) j_skills = discover_skills(jd_text) match_skills = sorted(list(r_skills.intersection(j_skills))) gap_skills = sorted(list(j_skills - r_skills)) print(f" - Identified {len(match_skills)} matches and {len(gap_skills)} gaps.") progress(0.4, desc="π Computing Semantic Distance...") print("Stage 3: Embedding Computation...") if not model: print("β Error: Semantic model not loaded.") return ["Model Load Error", "N/A", None, None, "The semantic matching engine failed to initialize. Try restarting.", "N/A", [], gr.update(visible=False)] emb1 = model.encode(resume_text, convert_to_tensor=True) emb2 = model.encode(jd_text, convert_to_tensor=True) score = round(util.pytorch_cos_sim(emb1, emb2).item() * 100, 1) print(f" - Semantic Match Score: {score}%") progress(0.7, desc="π€ Syncing Neural Consensus...") print("Stage 4: LLM Ensemble Reasoning...") ensemble_insight = [] # Model 1: Reasoner (FLAN-T5) if llm_reasoner: try: print(" - Querying Reasoner (FLAN-T5)...") prompt_t5 = f"Analyze resume relevance. Score: {score}%. Gaps: {', '.join(gap_skills)}. Summarize fit." t5_out = llm_reasoner(prompt_t5, max_length=100)[0]['generated_text'] ensemble_insight.append(f"**Reasoner**: {t5_out}") except Exception as e: print(f" - T5 Inference Error: {e}") else: print(" - Reasoner LLM not available. Skipping.") ensemble_insight.append("**Reasoner**: Neural reasoning offline. Using heuristic fallback.") # Model 2: Coach (LaMini) if llm_coach: try: print(" - Querying Coach (LaMini)...") prompt_lamini = f"Career coach advice for {score}% match." lamini_out = llm_coach(prompt_lamini, max_length=100, truncation=True)[0]['generated_text'] # Clean up if output contains prompt clean_lamini = lamini_out.split("match.")[-1].strip() if "match." in lamini_out else lamini_out ensemble_insight.append(f"**Coach**: {clean_lamini}") except Exception as e: print(f" - LaMini Inference Error: {e}") else: print(" - Coach LLM not available. Skipping.") ensemble_insight.append("**Coach**: Coaching strategy offline.") ai_analysis = "\n\n".join(ensemble_insight) if ensemble_insight else f"Neural consensus reached alignment at {score}%." # Heuristic metrics print("Stage 5: Calculating Dashboard Metrics...") content_score = min(100, len(resume_text.split()) / 4) search_score = min(100, len(r_skills) * 12) progress(0.9, desc="π¨ Rendering Compass Interface...") print("Stage 6: Finalizing UI Components...") gauge_plot = create_score_gauges(score, content_score, search_score, score*0.9) radar_plot = create_radar_chart(len(match_skills)*12, 80, 85, score, search_score) salary_range = estimate_salary(score, match_skills) present_str = ", ".join([s.upper() for s in match_skills]) if match_skills else "No direct matches." gap_str = ", ".join([s.upper() for s in gap_skills]) if gap_skills else "No critical gaps!" print(f"β CYCLE COMPLETE | Score: {score}%") # Return exactly what the UI elements expect in the correct order return [ present_str, gap_str, gauge_plot, radar_plot, ai_analysis, salary_range, gap_skills, gr.update(visible=True) ] except Exception as e: print(f"π₯ CRITICAL SYSTEM FAULT: {str(e)}") return [ "β οΈ System Error", "Analysis Failed", None, None, f"An unexpected error occurred: {str(e)}", "N/A", [], gr.update(visible=False) ] def generate_roadmap(gap_skills): if not gap_skills: return """
Both Reasoner & Coach models agree: Your profile is industry-ready.
{res}
Unified Neural Ensemble Dashboard