srcdaksh / app.py
mkshari's picture
Update app.py
889a5d3 verified
import gradio as gr
import spacy
import pdfplumber
from docx import Document
from sentence_transformers import SentenceTransformer, util
import re
import plotly.graph_objects as go
import sys
import os
import torch
from transformers import pipeline
# Initialize Models once at startup
print("πŸš€ [1/4] Initializing Semantic Matcher (all-MiniLM-L6-v2)...")
try:
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
except Exception as e:
print(f"❌ Semantic Matcher failed: {e}")
model = None
print("πŸš€ [2/4] Initializing Reasoning LLM (FLAN-T5-Base)...")
try:
# Use CPU by default for stability on Windows unless explicitly requested
llm_reasoner = pipeline("text2text-generation", model="google/flan-t5-base", device=-1)
except Exception as e:
print(f"⚠️ Reasoning LLM failed: {e}")
llm_reasoner = None
print("πŸš€ [3/4] Initializing Interview Coach (LaMini-GPT)...")
try:
llm_coach = pipeline("text-generation", model="MBZUAI/LaMini-GPT-124M", device=-1)
except Exception as e:
print(f"⚠️ Coach LLM failed: {e}")
llm_coach = None
print("πŸš€ [4/4] Loading NLP Entities (spaCy)...")
try:
nlp = spacy.load("en_core_web_sm")
except:
print("πŸ“₯ Downloading spaCy model...")
os.system(f"{sys.executable} -m spacy download en_core_web_sm")
try:
nlp = spacy.load("en_core_web_sm")
except:
nlp = None
print("❌ NLP Load failed completely.")
print("✨ Intelligence Engine Ready.")
TECH_SKILLS = [
"python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
"git", "ml", "nlp", "tensorflow", "pytorch", "java", "golang", "postgresql",
"mongodb", "redis", "devops", "rest api", "graphql", "scikit-learn", "pandas",
"numpy", "django", "flask", "typescript", "angular", "vue", "html", "css",
"node.js", "express", "azure", "gcp", "linux", "terraform", "ansible", "jenkins",
"prompt engineering", "openai", "llm", "bert", "tableau", "powerbi", "excel",
"c#", "c++", "rust", "r", "spark", "hadoop", "kafka", "mysql", "oracle", "snowflake"
]
ROADMAP_DB = {
"python": "🐍 [Master Python](https://realpython.com/) - Focus on Backend Automation & Data Science.",
"react": "βš›οΈ [React.dev](https://react.dev/) - Master Hooks & State Management.",
"aws": "☁️ [AWS Builder](https://explore.skillbuilder.aws/) - Get Certified (Solutions Architect).",
"docker": "🐳 [Docker Guide](https://docs.docker.com/) - Learn Container Architecture.",
"kubernetes": "☸️ [K8s Certification](https://kubernetes.io/docs/tutorials/) - Master Orchestration.",
"ml": "πŸ€– [ML Specialization](https://www.coursera.org/specializations/machine-learning-introduction) - Focus on Scikit-Learn.",
"nlp": "✍️ [HF NLP Course](https://huggingface.co/learn/nlp-course/) - Master Transformers.",
"sql": "πŸ’Ύ [SQL Practice](https://sqlzoo.net/) - Master Joins & Query Optmization.",
"javascript": "πŸ“œ [JS.info](https://javascript.info/) - Master ES6+ Features.",
"devops": "βš™οΈ [Roadmap.sh/devops](https://roadmap.sh/devops/) - Learn CI/CD & Infrastructure as Code."
}
def extract_text(file_obj):
"""Robust text extraction for PDF and DOCX."""
if file_obj is None:
return ""
file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
try:
if file_path.lower().endswith('.pdf'):
with pdfplumber.open(file_path) as pdf:
return "".join([page.extract_text() or "" for page in pdf.pages])
elif file_path.lower().endswith('.docx'):
doc = Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])
else:
# Try reading as plain text
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
except Exception as e:
print(f"Extraction error on {file_path}: {e}")
return ""
def discover_skills(text):
if not text: return set()
found = set()
text_l = text.lower()
for skill in TECH_SKILLS:
if re.search(r'\b' + re.escape(skill) + r'\b', text_l):
found.add(skill)
return found
def create_score_gauges(match_score, content_score, search_score, ats_score):
def make_gauge(val, title, color):
return go.Indicator(
mode="gauge+number",
value=val,
title={'text': title, 'font': {'size': 14, 'color': "white"}},
domain={'x': [0, 1], 'y': [0, 1]},
gauge={
'axis': {'range': [0, 100], 'tickwidth': 1, 'tickcolor': "white"},
'bar': {'color': color},
'bgcolor': "rgba(0,0,0,0)",
'borderwidth': 2,
'bordercolor': "gray",
'steps': [
{'range': [0, 40], 'color': 'rgba(255, 0, 0, 0.1)'},
{'range': [40, 70], 'color': 'rgba(255, 255, 0, 0.1)'},
{'range': [70, 100], 'color': 'rgba(0, 255, 0, 0.1)'}
],
}
)
fig = go.Figure()
fig.add_trace(make_gauge(match_score, "Match Score", "#00dfd8"))
fig.update_layout(
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
font={'color': "white", 'family': "Arial"},
height=300,
margin=dict(l=40, r=40, t=80, b=40)
)
return fig
def create_radar_chart(skills, exp, edu, readiness, search):
categories = ['Skills', 'Experience', 'Education', 'Readiness', 'Searchability']
fig = go.Figure()
fig.add_trace(go.Scatterpolar(
r=[skills, exp, edu, readiness, search],
theta=categories,
fill='toself',
name='Competency Profile',
line_color='#00dfd8',
fillcolor='rgba(0, 223, 216, 0.3)'
))
fig.update_layout(
polar=dict(
radialaxis=dict(visible=True, range=[0, 100], color="white", gridcolor="gray"),
angularaxis=dict(color="white", gridcolor="gray"),
bgcolor='rgba(0,0,0,0)'
),
showlegend=False,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
height=350,
margin=dict(l=60, r=60, t=20, b=20)
)
return fig
def estimate_salary(score, skills):
base = 75
multiplier = 1 + (score / 100)
skill_bonus = len(skills) * 2.5
low = round(base * multiplier + skill_bonus)
high = round(low * 1.45)
return f"${low}k - ${high}k"
def main_process(resume_file, jd_text, progress=gr.Progress()):
print("\n" + "="*40)
print("πŸš€ INITIALIZING NEURAL INTELLIGENCE CYCLE")
print("="*40)
try:
if not resume_file or not jd_text.strip():
print("⚠️ Incomplete inputs detected.")
return [
"⚠️ Missing Inputs", "", None, None,
"Incomplete data sequence. Please upload a resume and paste the JD.", "N/A", [], gr.update(visible=False)
]
progress(0.1, desc="⚑ Extracting Neural Content...")
print("Stage 1: Text Extraction...")
resume_text = extract_text(resume_file)
if not resume_text.strip():
print("❌ Extraction failure: text is empty.")
return [
"❌ Extract Error", "", None, None,
"Neural bypass failed. Ensure the resume is a readable PDF or DOCX (not just an image).", "N/A", [], gr.update(visible=False)
]
progress(0.2, desc="πŸ” Discovering Skill Entities...")
print("Stage 2: Skill Discovery...")
r_skills = discover_skills(resume_text)
j_skills = discover_skills(jd_text)
match_skills = sorted(list(r_skills.intersection(j_skills)))
gap_skills = sorted(list(j_skills - r_skills))
print(f" - Identified {len(match_skills)} matches and {len(gap_skills)} gaps.")
progress(0.4, desc="πŸ“ Computing Semantic Distance...")
print("Stage 3: Embedding Computation...")
if not model:
print("❌ Error: Semantic model not loaded.")
return ["Model Load Error", "N/A", None, None, "The semantic matching engine failed to initialize. Try restarting.", "N/A", [], gr.update(visible=False)]
emb1 = model.encode(resume_text, convert_to_tensor=True)
emb2 = model.encode(jd_text, convert_to_tensor=True)
score = round(util.pytorch_cos_sim(emb1, emb2).item() * 100, 1)
print(f" - Semantic Match Score: {score}%")
progress(0.7, desc="πŸ€– Syncing Neural Consensus...")
print("Stage 4: LLM Ensemble Reasoning...")
ensemble_insight = []
# Model 1: Reasoner (FLAN-T5)
if llm_reasoner:
try:
print(" - Querying Reasoner (FLAN-T5)...")
prompt_t5 = f"Analyze resume relevance. Score: {score}%. Gaps: {', '.join(gap_skills)}. Summarize fit."
t5_out = llm_reasoner(prompt_t5, max_length=100)[0]['generated_text']
ensemble_insight.append(f"**Reasoner**: {t5_out}")
except Exception as e:
print(f" - T5 Inference Error: {e}")
else:
print(" - Reasoner LLM not available. Skipping.")
ensemble_insight.append("**Reasoner**: Neural reasoning offline. Using heuristic fallback.")
# Model 2: Coach (LaMini)
if llm_coach:
try:
print(" - Querying Coach (LaMini)...")
prompt_lamini = f"Career coach advice for {score}% match."
lamini_out = llm_coach(prompt_lamini, max_length=100, truncation=True)[0]['generated_text']
# Clean up if output contains prompt
clean_lamini = lamini_out.split("match.")[-1].strip() if "match." in lamini_out else lamini_out
ensemble_insight.append(f"**Coach**: {clean_lamini}")
except Exception as e:
print(f" - LaMini Inference Error: {e}")
else:
print(" - Coach LLM not available. Skipping.")
ensemble_insight.append("**Coach**: Coaching strategy offline.")
ai_analysis = "\n\n".join(ensemble_insight) if ensemble_insight else f"Neural consensus reached alignment at {score}%."
# Heuristic metrics
print("Stage 5: Calculating Dashboard Metrics...")
content_score = min(100, len(resume_text.split()) / 4)
search_score = min(100, len(r_skills) * 12)
progress(0.9, desc="🎨 Rendering Compass Interface...")
print("Stage 6: Finalizing UI Components...")
gauge_plot = create_score_gauges(score, content_score, search_score, score*0.9)
radar_plot = create_radar_chart(len(match_skills)*12, 80, 85, score, search_score)
salary_range = estimate_salary(score, match_skills)
present_str = ", ".join([s.upper() for s in match_skills]) if match_skills else "No direct matches."
gap_str = ", ".join([s.upper() for s in gap_skills]) if gap_skills else "No critical gaps!"
print(f"βœ… CYCLE COMPLETE | Score: {score}%")
# Return exactly what the UI elements expect in the correct order
return [
present_str, gap_str, gauge_plot, radar_plot,
ai_analysis, salary_range, gap_skills, gr.update(visible=True)
]
except Exception as e:
print(f"πŸ”₯ CRITICAL SYSTEM FAULT: {str(e)}")
return [
"⚠️ System Error", "Analysis Failed", None, None,
f"An unexpected error occurred: {str(e)}", "N/A", [], gr.update(visible=False)
]
def generate_roadmap(gap_skills):
if not gap_skills:
return """
<div style='text-align: center; padding: 40px; background: rgba(0, 223, 216, 0.05); border-radius: 15px; border: 1px dashed #00dfd8;'>
<h2 style='color: #00dfd8; margin: 0;'>🌟 Neural Consensus: 100% Ready</h2>
<p style='color: #8b949e;'>Both Reasoner & Coach models agree: Your profile is industry-ready.</p>
</div>
"""
# Using LLMs to generate more specific roadmap tips if available
llm_tips = ""
if llm_coach:
try:
prompt = f"Provide a one-sentence learning goal for each skill: {', '.join(gap_skills)}."
llm_tips = llm_coach(prompt, max_length=150)[0]['generated_text']
except:
llm_tips = "Focus on documentation and hands-on builds."
cards_html = f"<div style='margin-bottom: 20px; color: #8b949e; font-size: 11px; font-style: italic;'>AI Insight: {llm_tips}</div>"
cards_html += "<div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 20px;'>"
for s in gap_skills:
res = ROADMAP_DB.get(s.lower(), f"Master {s.upper()} via specialized projects.")
cards_html += f"""
<div style='background: rgba(255, 255, 255, 0.03); border: 1px solid rgba(0, 223, 216, 0.2); border-radius: 12px; padding: 15px;'>
<div style='display: flex; justify-content: space-between;'>
<span style='color: #00dfd8; font-size: 10px;'>NEURAL UPGRADE</span>
<span style='color: #8b949e; font-size: 10px;'>LVL: EXPERT</span>
</div>
<h4 style='color: #fff; margin: 10px 0 5px 0;'>{s.upper()}</h4>
<p style='color: #8b949e; font-size: 11px; margin-bottom: 12px;'>{res}</p>
</div>
"""
cards_html += "</div>"
return f"<div>{cards_html}</div>"
def generate_interview_questions(gaps):
print("πŸ€– Neural Ensemble Coaching active...")
context = ", ".join(gaps[:3]) if gaps else "strategic leadership"
# Model 1: Reasoner (FLAN-T5) generates formal questions
q1 = "1. Walk us through a scenario where you bridged technical gaps."
if llm_reasoner:
try:
prompt_t5 = f"List 2 technical questions for {context}."
q1 = llm_reasoner(prompt_t5, max_length=100)[0]['generated_text']
except: pass
# Model 2: Coach (LaMini) generates behavioral tips
tips = "Focus on your adaptability."
if llm_coach:
try:
prompt_lamini = f"Give a career coaching tip for someone learning {context}."
tips = llm_coach(prompt_lamini, max_length=100)[0]['generated_text']
except: pass
return f"### 🎀 Neural Ensemble Coaching\n**Technical Focus**: {q1}\n\n**Career Strategy**: {tips}\n\n*This insight was cross-verified by T5 and LaMini models.*"
# Premium CSS for Glassmorphism
STYLE = """
.gradio-container { background-color: #0b0e14 !important; color: white !important; font-family: 'Inter', sans-serif !important; }
.glass-panel { background: rgba(255, 255, 255, 0.03) !important; border: 1px solid rgba(255, 255, 255, 0.08) !important; border-radius: 12px !important; padding: 18px !important; }
.hub-tag { background: #1a1e26; border: 1px solid #30363d; border-radius: 6px; padding: 4px 10px; font-size: 11px; color: #8b949e; display: inline-block; margin-right: 5px; }
.active-tag { background: rgba(0, 223, 216, 0.1); border-color: #00dfd8; color: #00dfd8; }
"""
with gr.Blocks(theme=gr.themes.Soft(), css=STYLE, title="Career Compass AI") as demo:
with gr.Row():
with gr.Column(scale=3):
gr.HTML("""
<div style="padding: 10px;">
<h1 style="color: #00dfd8; margin: 0; font-size: 2.2em;">🧭 Career Compass</h1>
<p style="color: #8b949e; margin-top: 5px;">Unified Neural Ensemble Dashboard</p>
</div>
""")
with gr.Column(scale=2):
gr.HTML(f"""
<div style="text-align: right; padding-top: 20px;">
<span class="hub-tag active-tag">Ensemble Mode: ACTIVE</span>
<span class="hub-tag">T5-Base</span>
<span class="hub-tag">LaMini-GPT</span>
</div>
""")
with gr.Row():
with gr.Column(scale=1, variant="panel"):
gr.Markdown("### πŸ“₯ Neural Feed")
resume_input = gr.File(label="Target Resume")
jd_input = gr.Textbox(label="Requirement Set", lines=8, placeholder="Paste JD sequence...")
run_btn = gr.Button("⚑ Run Intelligent Cycle", variant="primary")
with gr.Column(scale=2):
with gr.Row():
with gr.Column():
gr.Markdown("### πŸ“Š Alignment Core")
gauge_plot = gr.Plot(label="Match Quality")
with gr.Column():
gr.Markdown("### πŸ•ΈοΈ Competency Radar")
radar_plot = gr.Plot(label="Skills Radar")
with gr.Row():
with gr.Column(elem_classes=["glass-panel"]):
gr.Markdown("### πŸ’‘ Ensemble Insight")
analysis_text = gr.Markdown("Waiting for neural consensus...")
with gr.Column(elem_classes=["glass-panel"]):
gr.Markdown("### πŸ’° Market Value")
salary_display = gr.Textbox(label="Projected Range", interactive=False)
with gr.Row():
with gr.Column():
match_display = gr.Textbox(label="Neural Matches", interactive=False)
with gr.Column():
gap_display = gr.Textbox(label="Identified Faults", interactive=False)
with gr.Column(visible=False) as roadmap_container:
gr.Markdown("---")
with gr.Row(elem_classes=["glass-panel"]):
with gr.Column(scale=2):
gr.Markdown("### πŸš€ Knowledge Upgrade Path")
roadmap_btn = gr.Button("Initialize Pathfinder", variant="secondary")
roadmap_output = gr.HTML()
with gr.Column(scale=1):
gr.Markdown("### 🎀 Interview Coaching")
interview_prep = gr.Markdown("Pathfinder inactive.")
gap_state = gr.State(value=[])
run_btn.click(
fn=main_process,
inputs=[resume_input, jd_input],
outputs=[
match_display, gap_display, gauge_plot, radar_plot,
analysis_text, salary_display, gap_state, roadmap_container
]
)
roadmap_btn.click(
fn=lambda gaps: [generate_roadmap(gaps), generate_interview_questions(gaps)],
inputs=[gap_state],
outputs=[roadmap_output, interview_prep]
)
if __name__ == "__main__":
demo.launch()