Spaces:

mkshari
/

srcdaksh

Sleeping

App Files Files Community

mkshari commited on Mar 10

Commit

a25590b

verified ·

1 Parent(s): ed1f07a

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -219

app.py CHANGED Viewed

@@ -3,257 +3,171 @@ import spacy
 import pdfplumber
 from docx import Document
 from sentence_transformers import SentenceTransformer, util
-import pandas as pd
 import re
-# Load models
-print("Loading models...")
 try:
     nlp = spacy.load("en_core_web_sm")
-    print("spaCy model loaded successfully.")
-except Exception as e:
-    print(f"spaCy load error: {e}. Trying direct import...")
-    try:
-        import en_core_web_sm
-        nlp = en_core_web_sm.load()
-    except:
-        print("Model not found. Using fallback keyword matching only.")
-        nlp = None
 model = SentenceTransformer('all-MiniLM-L6-v2')
-print("Sentence Transformer loaded.")
-# Common Skill Dictionary (Expanded)
-SKILLS_DB = [
     "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
-    "git", "machine learning", "nlp", "tensorflow", "pytorch", "java", "c++", "golang",
-    "postgresql", "mongodb", "redis", "cloud computing", "devops", "rest api", "graphql",
-    "scikit-learn", "pandas", "numpy", "django", "flask", "typescript", "angular", "vue",
-    "html", "css", "node.js", "express", "azure", "gcp", "linux", "bash", "jenkins",
-    "terraform", "ansible", "prompt engineering", "openai", "llm", "bert", "transformer",
-    "tableau", "powerbi", "excel", "dynamic programming", "data structures", "algorithms"
 ]
 ROADMAP_DB = {
-    # (previous content kept, adding more)
-    "python": "Master Python: [Real Python](https://realpython.com/) | [Programming with Mosh](https://www.youtube.com/user/programmingwithmosh)",
-    "react": "Build UI with React: [Official Docs](https://react.dev/) | [FreeCodeCamp React Course](https://www.freecodecamp.org/news/free-react-course-2024/)",
-    "aws": "Cloud Mastery: [AWS Skill Builder](https://explore.skillbuilder.aws/) | [Cloud Guru](https://www.pluralsight.com/cloud-computing/aws)",
-    "docker": "Containerization: [Docker Get Started](https://docs.docker.com/get-started/) | [Docker Tutorial for Beginners](https://www.youtube.com/watch?v=pg19Z8LL06w)",
-    "kubernetes": "Orchestration: [K8s Basics](https://kubernetes.io/docs/tutorials/kubernetes-basics/) | [Nana's K8s Course](https://www.youtube.com/c/TechWorldwithNana)",
-    "fastapi": "Modern APIs: [FastAPI Docs](https://fastapi.tiangolo.com/) | [TestDriven.io FastAPI](https://testdriven.io/blog/fastapi-crud/)",
-    "nlp": "Language Processing: [Hugging Face NLP Course](https://huggingface.co/learn/nlp-course/) | [Stanford CS224N](https://web.stanford.edu/class/cs224n/)",
-    "machine learning": "AI Fundamentals: [ML Specialization by Andrew Ng](https://www.coursera.org/specializations/machine-learning-introduction)",
-    "sql": "Database Management: [SQLZoo](https://sqlzoo.net/) | [Mode SQL Tutorial](https://mode.com/sql-tutorial/)",
-    "git": "Version Control: [Git Immersion](https://gitimmersion.com/) | [GitHub Learning Path](https://skills.github.com/)",
-    "javascript": "JS Deep Dive: [MDN Web Docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript) | [JavaScript.info](https://javascript.info/)",
-    "typescript": "Strict Typing: [TypeScript Handbook](https://www.typescriptlang.org/docs/handbook/intro.html)",
-    "postgresql": "Advanced Data: [Postgres Tutorial](https://www.postgresqltutorial.com/)",
-    "rest api": "API Design: [RESTful API Guide](https://restfulapi.net/)",
-    "html": "Web Basics: [W3Schools HTML](https://www.w3schools.com/html/)",
-    "css": "Styling: [CSS-Tricks](https://css-tricks.com/)",
-    "node.js": "Backend JS: [Node.js Guide](https://nodejs.dev/en/learn/)"
 }
-def extract_text_from_pdf(pdf_file):
-    try:
-        with pdfplumber.open(pdf_file) as pdf:
-            text = ""
-            for page in pdf.pages:
-                text += page.extract_text() or ""
-        return text
-    except Exception as e:
-        print(f"PDF Extraction Error: {e}")
         return ""
-def extract_text_from_docx(docx_file):
     try:
-        doc = Document(docx_file)
-        text = ""
-        for para in doc.paragraphs:
-            text += para.text + "\n"
-        return text
     except Exception as e:
-        print(f"DOCX Extraction Error: {e}")
-        return ""
-def clean_text(text):
-    text = re.sub(r'[^a-zA-Z0-9\s#\.\+]', ' ', text) # Preserve # for C#, . for Node.js, + for C++
-    return text.lower().strip()
-def get_skills(text):
-    clean_t = clean_text(text)
-    found_skills = set()
-    for skill in SKILLS_DB:
-        # Improved regex to handle skills with dots or pluses
-        pattern = r'\b' + re.escape(skill) + r'\b'
-        if re.search(pattern, clean_t):
-            found_skills.add(skill)
-    return found_skills
-def analyze_resume(resume_file, jd_text):
-    print("Analysis started...")
-    if resume_file is None or not jd_text.strip():
-        return "Please upload a resume and provide a job description.", "", "", 0, []
-    # Step 1: Extract text
-    resume_path = resume_file.name
-    print(f"Extracting text from: {resume_path}")
-    if resume_path.lower().endswith('.pdf'):
-        resume_text = extract_text_from_pdf(resume_path)
-    elif resume_path.lower().endswith('.docx'):
-        resume_text = extract_text_from_docx(resume_path)
-    else:
-        return "Unsupported file format. Please upload PDF or DOCX.", "", "", 0, []
     if not resume_text.strip():
-        return "Could not extract text from the file. Please check the file content.", "", "", 0, []
-    # Step 2: Skill Extraction
-    resume_skills = get_skills(resume_text)
-    jd_skills = get_skills(jd_text)
-    print(f"Resume Skills: {resume_skills}")
-    print(f"JD Skills: {jd_skills}")
-    present_skills = list(resume_skills.intersection(jd_skills))
-    missing_skills = list(jd_skills - resume_skills)
-    # Step 3: Similarity Score
-    embeddings1 = model.encode(resume_text, convert_to_tensor=True)
-    embeddings2 = model.encode(jd_text, convert_to_tensor=True)
-    cosine_score = util.pytorch_cos_sim(embeddings1, embeddings2)
-    match_percentage = round(cosine_score.item() * 100, 2)
-    # Adjust score if no skills overlap but similarity is high
-    if not present_skills and match_percentage > 50:
-        match_percentage -= 20 # Penalize for lack of keyword match
-    present_str = ", ".join([s.capitalize() for s in present_skills]) if present_skills else "No matching skills found."
-    missing_str = ", ".join([s.capitalize() for s in missing_skills]) if missing_skills else "All JD skills found in resume!"
-    print(f"Match: {match_percentage}%")
-    return f"{match_percentage}%", present_str, missing_str, match_percentage, missing_skills
-def get_roadmap(missing_skills):
-    if not missing_skills:
-        return "### 🎉 Perfect Match!\nYou already possess all the key skills mentioned in the job description. Tip: ensure you've highlighted these clearly in your experience sections."
-    roadmap_items = []
-    for skill in missing_skills:
-        resource = ROADMAP_DB.get(skill.lower(), f"Suggested resource for {skill}: Check out specialized courses on Coursera, Udemy, or YouTube.")
-        roadmap_items.append(f"#### 📖 {skill.capitalize()}\n{resource}")
-    return "\n\n".join(roadmap_items)
-# Custom CSS for Premium Look
-custom_css = """
-#logo-img {
-    margin: auto;
-    display: block;
-    max-width: 150px;
-}
-.gradio-container {
-    background-color: #f0f2f5;
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-}
-.main-header {
-    text-align: center;
-    color: #003366;
-    margin-bottom: 5px;
-}
-.sub-header {
-    text-align: center;
-    color: #b8860b;
-    margin-top: 0;
-    font-style: italic;
-}
-.sastra-text {
-    text-align: center;
-    font-size: 1em;
-    color: #444;
-    font-weight: bold;
-}
-#analyze-btn {
-    background: linear-gradient(135deg, #003366 0%, #00509d 100%) !important;
-    border: none !important;
-    color: white !important;
-    height: 50px;
-    font-size: 1.1em;
-}
-#roadmap-btn {
-    background: linear-gradient(135deg, #b8860b 0%, #daa520 100%) !important;
-    border: none !important;
-    color: white !important;
-    height: 45px;
-}
-.output-label {
-    text-align: center;
-    font-size: 2em;
-}
-"""
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=custom_css) as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            try:
-                gr.Image("logo.png", show_label=False, height=130, container=False, elem_id="logo-img")
-            except:
-                gr.Markdown("### [LOGO MISSING]")
-        with gr.Column(scale=3):
-            gr.Markdown("# SETHU AI", elem_classes=["main-header"])
-            gr.Markdown("### From Resume to Career Readiness", elem_classes=["sub-header"])
-            gr.Markdown("SASTRA DEEMED UNIVERSITY", elem_classes=["sastra-text"])
     gr.Markdown("---")
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### 📥 Step 1: Upload & Paste")
-            resume_input = gr.File(label="Upload Resume (PDF/DOCX)", file_types=[".pdf", ".docx"], type="filepath")
-            jd_input = gr.Textbox(label="Job Description", placeholder="Paste the job requirements here...", lines=10)
-            analyze_btn = gr.Button("Analyze Match", variant="primary", elem_id="analyze-btn")
         with gr.Column(scale=1):
-            gr.Markdown("### 📊 Step 2: Evaluation")
-            match_score_output = gr.Label(label="Match Quality", elem_classes=["output-label"])
-            with gr.Tabs():
-                with gr.TabItem("✅ Skills Matched"):
-                    present_skills_output = gr.Textbox(label="", interactive=False, lines=4)
-                with gr.TabItem("❌ Missing Skills"):
-                    missing_skills_output = gr.Textbox(label="", interactive=False, lines=4)
             gr.Markdown("---")
-            roadmap_btn = gr.Button("🚀 Generate Learning Roadmap", interactive=True, elem_id="roadmap-btn")
-    roadmap_output = gr.Markdown(visible=False)
-    missing_skills_state = gr.State([])
-    def on_analyze(resume, jd):
-        score_str, present, missing, score_val, missing_list = analyze_resume(resume, jd)
-        return {
-            match_score_output: score_str,
-            present_skills_output: present,
-            missing_skills_output: missing,
-            missing_skills_state: missing_list,
-            roadmap_output: gr.update(visible=False)
-        }
-    def on_roadmap(missing_list):
-        content = get_roadmap(missing_list)
-        return gr.update(value=content, visible=True)
-    analyze_btn.click(
-        on_analyze,
-        inputs=[resume_input, jd_input],
-        outputs=[match_score_output, present_skills_output, missing_skills_output, missing_skills_state, roadmap_output]
     )
     roadmap_btn.click(
-        on_roadmap,
-        inputs=[missing_skills_state],
         outputs=[roadmap_output]
     )

 import pdfplumber
 from docx import Document
 from sentence_transformers import SentenceTransformer, util
 import re
+import plotly.graph_objects as go
+# Initialize Models once at startup
+print("🚀 Initializing SETHU AI Engine...")
 try:
     nlp = spacy.load("en_core_web_sm")
+except:
+    import os
+    os.system("python -m spacy download en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
 model = SentenceTransformer('all-MiniLM-L6-v2')
+TECH_SKILLS = [
     "python", "javascript", "react", "fastapi", "aws", "docker", "kubernetes", "sql",
+    "git", "ml", "nlp", "tensorflow", "pytorch", "java", "golang", "postgresql",
+    "mongodb", "redis", "devops", "rest api", "graphql", "scikit-learn", "pandas",
+    "numpy", "django", "flask", "typescript", "angular", "vue", "html", "css",
+    "node.js", "express", "azure", "gcp", "linux", "terraform", "ansible", "jenkins",
+    "prompt engineering", "openai", "llm", "bert", "tableau", "powerbi", "excel",
+    "c#", "c++", "rust", "r", "spark", "hadoop", "kafka", "mysql", "oracle", "snowflake"
 ]
 ROADMAP_DB = {
+    "python": "🐍 [Master Python](https://realpython.com/) - Focus on Backend Automation & Data Science.",
+    "react": "⚛️ [React.dev](https://react.dev/) - Master Hooks & State Management.",
+    "aws": "☁️ [AWS Builder](https://explore.skillbuilder.aws/) - Get Certified (Solutions Architect).",
+    "docker": "🐳 [Docker Guide](https://docs.docker.com/) - Learn Container Architecture.",
+    "kubernetes": "☸️ [K8s Certification](https://kubernetes.io/docs/tutorials/) - Master Orchestration.",
+    "ml": "🤖 [ML Specialization](https://www.coursera.org/specializations/machine-learning-introduction) - Focus on Scikit-Learn.",
+    "nlp": "✍️ [HF NLP Course](https://huggingface.co/learn/nlp-course/) - Master Transformers.",
+    "sql": "💾 [SQL Practice](https://sqlzoo.net/) - Master Joins & Query Optmization.",
+    "javascript": "📜 [JS.info](https://javascript.info/) - Master ES6+ Features.",
+    "devops": "⚙️ [Roadmap.sh/devops](https://roadmap.sh/devops/) - Learn CI/CD & Infrastructure as Code."
 }
+def extract_text(file_obj):
+    """Robust text extraction for PDF and DOCX."""
+    if file_obj is None:
         return ""
+    # Gradio might pass a file-like object or a string path
+    file_path = file_obj.name if hasattr(file_obj, 'name') else str(file_obj)
     try:
+        if file_path.lower().endswith('.pdf'):
+            with pdfplumber.open(file_path) as pdf:
+                return "".join([page.extract_text() or "" for page in pdf.pages])
+        elif file_path.lower().endswith('.docx'):
+            doc = Document(file_path)
+            return "\n".join([p.text for p in doc.paragraphs])
     except Exception as e:
+        print(f"Extraction error on {file_path}: {e}")
+    return ""
+def discover_skills(text):
+    if not text: return set()
+    found = set()
+    text_l = text.lower()
+    for skill in TECH_SKILLS:
+        if re.search(r'\b' + re.escape(skill) + r'\b', text_l):
+            found.add(skill)
+    return found
+def create_gauge(score):
+    fig = go.Figure(go.Indicator(
+        mode = "gauge+number",
+        value = score,
+        domain = {'x': [0, 1], 'y': [0, 1]},
+        gauge = {
+            'axis': {'range': [0, 100]},
+            'bar': {'color': "#003366"},
+            'steps': [
+                {'range': [0, 50], 'color': "#ffcccc"},
+                {'range': [50, 80], 'color': "#fff3cd"},
+                {'range': [80, 100], 'color': "#d4edda"}
+            ],
+        }
+    ))
+    fig.update_layout(height=250, margin=dict(l=30, r=30, t=30, b=30), paper_bgcolor="rgba(0,0,0,0)")
+    return fig
+def main_process(resume_file, jd_text):
+    print("--- New Analysis Request ---")
+    if not resume_file or not jd_text.strip():
+        return "⚠️ Error: Please upload a resume and paste the JD.", "", None, [], gr.update(visible=False)
+    # 1. Extraction
+    resume_text = extract_text(resume_file)
     if not resume_text.strip():
+        return "⚠️ Error: Failed to extract text from resume. Ensure it's not and image-only PDF.", "", None, [], gr.update(visible=False)
+    # 2. Skill Matching
+    r_skills = discover_skills(resume_text)
+    j_skills = discover_skills(jd_text)
+    match_skills = sorted(list(r_skills.intersection(j_skills)))
+    gap_skills = sorted(list(j_skills - r_skills))
+    # 3. AI scoring
+    emb1 = model.encode(resume_text, convert_to_tensor=True)
+    emb2 = model.encode(jd_text, convert_to_tensor=True)
+    score = round(util.pytorch_cos_sim(emb1, emb2).item() * 100, 1)
+    # 4. Results Formatting
+    present_str = ", ".join([s.upper() for s in match_skills]) if match_skills else "No direct skill matches found."
+    gap_str = ", ".join([s.upper() for s in gap_skills]) if gap_skills else "No major skill gaps detected!"
+    plot = create_gauge(score)
+    print(f"Analysis Complete. Score: {score}")
+    return present_str, gap_str, plot, gap_skills, gr.update(visible=True)
+def generate_roadmap(gap_skills):
+    if not gap_skills:
+        return "### 🌟 Career Ready!\nYour profile is an excellent match for this role. Focus on practicing behavioral interview questions."
+    roadmap = "### 🛤️ Personalized Readiness Roadmap\n\n"
+    for s in gap_skills:
+        res = ROADMAP_DB.get(s.lower(), f"Learn **{s.upper()}** through hands-on projects and documentation.")
+        roadmap += f"- **{s.upper()}**: {res}\n"
+    return roadmap
+# UI Layout
+with gr.Blocks(theme=gr.themes.Soft(), title="SETHU AI") as demo:
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Image("logo.png", show_label=False, height=120, container=False)
+        with gr.Column(scale=4):
+            gr.Markdown("# SETHU AI - Career Intelligence Hub")
+            gr.Markdown("### From Resume to Career Readiness | Powered by SASTRA DEEMED UNIVERSITY")
     gr.Markdown("---")
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 📥 1. Upload Requirements")
+            resume_input = gr.File(label="Upload Resume (PDF/DOCX)")
+            jd_input = gr.Textbox(label="Job Description", lines=12, placeholder="Paste the job requirements here...")
+            run_btn = gr.Button("🔍 Run AI Analysis", variant="primary")
         with gr.Column(scale=1):
+            gr.Markdown("### 📊 2. Match Intelligence")
+            gauge_plot = gr.Plot()
+            match_display = gr.Textbox(label="Identified Matching Skills", interactive=False)
+            gap_display = gr.Textbox(label="Identified Skill Gaps", interactive=False)
+    with gr.Row(visible=False) as roadmap_container:
+        with gr.Column():
             gr.Markdown("---")
+            roadmap_btn = gr.Button("🚀 Generate Knowledge Upgrade Roadmap", variant="secondary")
+            roadmap_output = gr.Markdown()
+    # Shared State
+    gap_state = gr.State([])
+    # Event Mapping
+    run_btn.click(
+        fn=main_process,
+        inputs=[resume_input, jd_input],
+        outputs=[match_display, gap_display, gauge_plot, gap_state, roadmap_container]
     )
     roadmap_btn.click(
+        fn=generate_roadmap,
+        inputs=[gap_state],
         outputs=[roadmap_output]
     )