import gradio as gr from langchain_community.document_loaders.pdf import PyPDFLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_groq import ChatGroq from langchain_core.prompts import PromptTemplate from langchain_core.output_parsers import StrOutputParser from sklearn.metrics.pairwise import cosine_similarity from dotenv import load_dotenv from fpdf import FPDF from collections import Counter import numpy as np import tempfile import os load_dotenv() groq_api_key = os.getenv("GROQ_API_KEY") os.environ["GROQ_API_KEY"] = groq_api_key def extract_text_from_pdf(pdf_file): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp: temp.write(pdf_file) temp.flush() loader = PyPDFLoader(temp.name) pages = loader.load_and_split() return " ".join([page.page_content for page in pages]) def extract_skills(text): skills_list = ["Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Data Visualization", "Cloud", "TensorFlow", "PyTorch", "Statistics", "Java", "C++", "HTML", "CSS", "JavaScript"] return [skill for skill in skills_list if skill.lower() in text.lower()] def generate_learning_resources(missing_skills): suggestions = [] for skill in missing_skills: search_link = f"https://www.google.com/search?q={skill}+online+course" youtube_link = f"https://www.youtube.com/results?search_query={skill}+tutorial" suggestions.append(f"šŸ”Ž [{skill} Courses on Google]({search_link})\nā–¶ļø [YouTube Tutorials]({youtube_link})\n") return "\n\n".join(suggestions) def suggest_certifications(missing_skills): cert_mapping = { "Python": "Python for Everybody (Coursera)", "Machine Learning": "Machine Learning by Andrew Ng (Coursera)", "Cloud": "AWS Certified Solutions Architect", "SQL": "Google Data Analytics Certificate", "TensorFlow": "TensorFlow Developer Certificate", "NLP": "Natural Language Processing Specialization (DeepLearning.AI)", "Java": "Oracle Certified Java Programmer", "C++": "C++ Nanodegree (Udacity)" } suggestions = [] for skill in missing_skills: if skill in cert_mapping: suggestions.append(f"{skill}: {cert_mapping[skill]}") return "\n".join(suggestions) if suggestions else "No specific certifications recommended." def generate_circular_progress(percentage): html_code = f"""
{percentage}%
""" return html_code def generate_skill_gap_report(user_skills, job_skills, missing_skills, match_percent): llm = ChatGroq(model="llama3-8b-8192", temperature=0.2) template = """ User Skills: {user_skills} Job Requirements: {job_skills} Missing Skills: {missing_skills} Match Percentage: {match_percent}% Generate a short, friendly skill gap report. Suggest next steps for the user to improve their chances. """ prompt = PromptTemplate.from_template(template) chain = prompt | llm | StrOutputParser() report = chain.invoke({ "user_skills": ", ".join(user_skills), "job_skills": ", ".join(job_skills), "missing_skills": ", ".join(missing_skills), "match_percent": match_percent }) return report def create_pdf(full_report_text): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, full_report_text) output_path = "multi_jd_skill_gap_report.pdf" pdf.output(output_path) return output_path def process_skill_gap(resume_pdf, jd_pdfs): if resume_pdf is None or jd_pdfs is None: return "", "", "", "", "", None, "", "", "" resume_text = extract_text_from_pdf(resume_pdf) user_skills = extract_skills(resume_text) all_missing_skills = [] full_report = "" for idx, jd_pdf in enumerate(jd_pdfs, start=1): jd_text = extract_text_from_pdf(jd_pdf) job_skills = extract_skills(jd_text) common = set(user_skills) & set(job_skills) match_percent = (len(common) / len(job_skills)) * 100 if job_skills else 0 missing_skills = list(set(job_skills) - set(user_skills)) all_missing_skills.extend(missing_skills) embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectors = embed_model.embed_documents([resume_text, jd_text]) similarity_score = cosine_similarity([vectors[0]], [vectors[1]])[0][0] similarity_percent = round(similarity_score * 100, 2) ai_report = generate_skill_gap_report(user_skills, job_skills, missing_skills, match_percent) full_report += f"\nJD {idx}:\nSkill Match: {match_percent}%\nMissing Skills: {', '.join(missing_skills) if missing_skills else 'None'}\nSimilarity Score: {similarity_percent}%\nAI Report:\n{ai_report}\n-------------------------\n" resources = generate_learning_resources(list(set(all_missing_skills))) certifications = suggest_certifications(all_missing_skills) most_common_skills = Counter(all_missing_skills).most_common(3) top_missing_skills_text = "Top Missing Skills Across JDs: " + ", ".join( [f"{skill} ({count} times)" for skill, count in most_common_skills] ) if most_common_skills else "No missing skills detected." overall_match = round( (sum([len(set(user_skills) & set(extract_skills(extract_text_from_pdf(jd)))) for jd in jd_pdfs]) / (len(user_skills) * len(jd_pdfs))) * 100, 2 ) if user_skills else 0 full_report_clean = full_report.encode('ascii', 'ignore').decode('ascii') pdf_path = create_pdf(full_report_clean) progress_display = generate_circular_progress(overall_match) return progress_display, "āœ… Analysis done across all JDs", ", ".join(set(all_missing_skills)), "Multi-JD Comparison Completed", full_report, pdf_path, top_missing_skills_text, resources, certifications with gr.Blocks() as demo: gr.HTML("""
""") gr.Markdown("# 🧠 TALENTPATCH - Multi-JD AI Skill Gap Checker") resume_file = gr.File(label="šŸ“„ Upload Resume (PDF)", type="binary") jd_files = gr.File(label="šŸ“„ Upload Multiple Job Descriptions (PDFs)", type="binary", file_types=[".pdf"], file_count="multiple") match_progress = gr.HTML(label="Skill Match Progress") skill_match_text = gr.Textbox(label="Status", interactive=False) missing_skills_text = gr.Textbox(label="All Missing Skills", interactive=False) similarity_text = gr.Textbox(label="Status Message", interactive=False) report_output = gr.Textbox(label="AI-Generated Multi-JD Skill Gap Report", lines=20, interactive=False) download_pdf = gr.File(label="šŸ“„ Download Full Report as PDF") top_skills_output = gr.Textbox(label="Top Missing Skills Across JDs", interactive=False) learning_resources = gr.Markdown(label="šŸ“š AI Learning Resource Recommendations") certification_output = gr.Textbox(label="šŸŽ“ Recommended Certifications", interactive=False) submit_btn = gr.Button("šŸš€ Analyze Skill Gap") submit_btn.click( fn=process_skill_gap, inputs=[resume_file, jd_files], outputs=[ match_progress, skill_match_text, missing_skills_text, similarity_text, report_output, download_pdf, top_skills_output, learning_resources, certification_output ] ) demo.launch()