Spaces:

indhupamula
/

ML_PRO

Runtime error

File size: 9,056 Bytes

import gradio as gr
import re
import numpy as np
import pandas as pd
from PyPDF2 import PdfReader
from docx import Document
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import spacy
from fpdf import FPDF
import subprocess

# ---------------------------
# Load SpaCy model (runtime download if needed)
# ---------------------------
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")

# Load sentence-transformers model
model = SentenceTransformer('all-MiniLM-L6-v2')

# ---------------------------
# Resume Parsing Functions
# ---------------------------
def extract_text_from_pdf(file):
    try:
        reader = PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text() or ""
        return text
    except:
        return ""

def extract_text_from_docx(file):
    try:
        doc = Document(file)
        text = "\n".join([p.text for p in doc.paragraphs])
        return text
    except:
        return ""

def extract_skills(jd_text):
    skills = re.split(r"[,\n;]", jd_text)
    return [s.strip() for s in skills if s.strip()]

def split_sections(resume_text):
    sections = {"Education":"","Experience":"","Skills":""}
    try:
        edu = re.search(r'(Education|EDUCATION)(.*?)(Experience|EXPERIENCE|Skills|SKILLS|$)', resume_text, re.DOTALL)
        exp = re.search(r'(Experience|EXPERIENCE)(.*?)(Skills|SKILLS|$)', resume_text, re.DOTALL)
        skills = re.search(r'(Skills|SKILLS)(.*)', resume_text, re.DOTALL)
        if edu: sections["Education"] = edu.group(2).strip()
        if exp: sections["Experience"] = exp.group(2).strip()
        if skills: sections["Skills"] = skills.group(2).strip()
    except:
        pass
    return sections

def compute_scores(resume_text, jd_text, required_skills):
    try:
        present_skills = [kw for kw in required_skills if kw.lower() in resume_text.lower()]
        keyword_score = len(present_skills)/max(len(required_skills),1)
        res_vec = model.encode(resume_text)
        jd_vec  = model.encode(jd_text)
        semantic_score = cosine_similarity([res_vec],[jd_vec])[0][0]
        sections = split_sections(resume_text)
        section_scores = {}
        for sec, text in sections.items():
            sec_present = [kw for kw in required_skills if kw.lower() in text.lower()]
            section_scores[sec] = len(sec_present)/max(len(required_skills),1)
        final_score = 0.6*keyword_score + 0.4*semantic_score
        tips = [f"⚠️ Add '{skill}' to improve ATS match" for skill in required_skills if skill.lower() not in resume_text.lower()]
        return final_score, keyword_score, semantic_score, section_scores, tips
    except:
        return 0,0,0,{"Education":0,"Experience":0,"Skills":0},[]

# ---------------------------
# CSV & PDF Export
# ---------------------------
def export_csv(df, filename="ats_report.csv"):
    try:
        df.to_csv(filename, index=False)
    except:
        pass
    return filename

def export_pdf(df, filename="ats_report.pdf"):
    try:
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        pdf.cell(200, 10, txt="ATS Resume Screening Report", ln=True, align="C")
        pdf.ln(10)
        for i, row in df.iterrows():
            pdf.cell(200, 10, txt=f"JD {i+1}: {row['JD']}", ln=True)
            pdf.cell(200, 10, txt=f"Final Score: {row['Final Score']}", ln=True)
            pdf.cell(200, 10, txt=f"Keyword Score: {row['Keyword Score']}", ln=True)
            pdf.cell(200, 10, txt=f"Semantic Score: {row['Semantic Score']}", ln=True)
            pdf.cell(200, 10, txt="Section Scores:", ln=True)
            pdf.multi_cell(0, 10, row["Section Scores"])
            pdf.cell(200, 10, txt="Tips:", ln=True)
            pdf.multi_cell(0, 10, row["Tips"])
            pdf.ln(5)
        pdf.output(filename)
    except:
        pass
    return filename

# ---------------------------
# AI Resume Rewriter & Feedback
# ---------------------------
def ai_resume_rewriter(resume_text, jd_text):
    required_skills = extract_skills(jd_text)
    missing_skills = [skill for skill in required_skills if skill.lower() not in resume_text.lower()]
    rewritten = resume_text
    if missing_skills:
        rewritten += "\n\n### Suggested Skills to Add:\n" + "\n".join([f"- {s}" for s in missing_skills])
    return rewritten

skill_course_mapping = {
    "Python": ["Complete 'Python for Everybody' on Coursera", "Try Python projects on GitHub"],
    "Machine Learning": ["Take 'Machine Learning' by Andrew Ng on Coursera", "Kaggle ML competitions"],
    "Deep Learning": ["DeepLearning.AI TensorFlow Developer Course", "Build neural network projects"],
    "SQL": ["SQL for Data Science - Coursera", "Practice on LeetCode SQL problems"],
    "AWS": ["AWS Certified Solutions Architect - Associate", "AWS Free Tier practice"],
    "TensorFlow": ["TensorFlow in Practice Specialization - Coursera", "Hands-on DL projects"]
}

certification_mapping = {
    "AWS": "AWS Certified Solutions Architect",
    "ML": "Machine Learning by Andrew Ng",
    "Python": "PCAP: Python Certified Associate Programmer",
    "TensorFlow": "TensorFlow Developer Certificate"
}

def generate_feedback(resume_text, jd_text):
    required_skills = extract_skills(jd_text)
    resume_lower = resume_text.lower()
    missing_skills = [skill for skill in required_skills if skill.lower() not in resume_lower]
    skill_suggestions = [f"{s}: {', '.join(skill_course_mapping[s])}" for s in missing_skills if s in skill_course_mapping]
    cert_suggestions = [f"Consider certification: {certification_mapping[s]}" for s in missing_skills if s in certification_mapping]
    resume_tips = []
    if "Education" not in resume_text:
        resume_tips.append("Include an Education section if missing.")
    if "Experience" not in resume_text:
        resume_tips.append("Include an Experience section with quantified achievements.")
    if "Skills" not in resume_text:
        resume_tips.append("Add a Skills section highlighting relevant skills.")
    if len(resume_text.split()) < 200:
        resume_tips.append("Consider adding more details to increase resume length and content richness.")
    feedback_text = "### Missing Skills:\n" + ("\n".join(missing_skills) if missing_skills else "None")
    feedback_text += "\n\n### Suggested Courses:\n" + ("\n".join(skill_suggestions) if skill_suggestions else "No suggestions")
    feedback_text += "\n\n### Suggested Certifications:\n" + ("\n".join(cert_suggestions) if cert_suggestions else "No suggestions")
    feedback_text += "\n\n### Resume Optimization Tips:\n" + ("\n".join(resume_tips) if resume_tips else "Your resume looks well-structured.")
    return feedback_text

# ---------------------------
# Multi-JD Analysis
# ---------------------------
def analyze_multi_jd(resume_file, jd_texts):
    file_ext = resume_file.name.split('.')[-1].lower()
    if file_ext == "pdf":
        resume_text = extract_text_from_pdf(resume_file)
    elif file_ext == "docx":
        resume_text = extract_text_from_docx(resume_file)
    else:
        resume_text = ""

    jd_list = [jd.strip() for jd in jd_texts.split("\n\n") if jd.strip()]
    results = []
    for jd in jd_list:
        required_skills = extract_skills(jd)
        final_score, keyword_score, semantic_score, section_scores, tips = compute_scores(resume_text, jd, required_skills)
        section_scores_str = "\n".join([f"{k}: {v:.2%}" for k,v in section_scores.items()])
        tips_str = "\n".join(tips) if tips else "No suggestions"
        results.append({
            "JD": jd[:50]+"..." if len(jd)>50 else jd,
            "Final Score": f"{final_score:.2%}",
            "Keyword Score": f"{keyword_score:.2%}",
            "Semantic Score": f"{semantic_score:.2%}",
            "Section Scores": section_scores_str,
            "Tips": tips_str
        })
    df = pd.DataFrame(results)
    export_csv(df)
    export_pdf(df)
    feedback = generate_feedback(resume_text, jd_texts)
    rewritten_resume = ai_resume_rewriter(resume_text, jd_texts)
    return "ats_report.csv", "ats_report.pdf", feedback, rewritten_resume

# ---------------------------
# Gradio Interface
# ---------------------------
iface = gr.Interface(
    fn=analyze_multi_jd,
    inputs=[
        gr.File(label="Upload Resume (PDF/DOCX)"),
        gr.Textbox(label="Paste Job Description(s) (Separate multiple JDs with double line breaks)", lines=10)
    ],
    outputs=[
        gr.File(label="Download CSV Report"),
        gr.File(label="Download PDF Report"),
        gr.Textbox(label="Personalized Feedback", lines=15),
        gr.Textbox(label="AI Suggested Resume Revisions", lines=15)
    ],
    title="AI-Powered Resume Screening System",
    description="Upload your resume, paste job descriptions, and get ATS scoring, personalized feedback, and AI suggestions."
)

iface.launch()