import gradio as gr
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
from fpdf import FPDF
from collections import Counter
import numpy as np
import tempfile
import os
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = groq_api_key
def extract_text_from_pdf(pdf_file):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp:
temp.write(pdf_file)
temp.flush()
loader = PyPDFLoader(temp.name)
pages = loader.load_and_split()
return " ".join([page.page_content for page in pages])
def extract_skills(text):
skills_list = ["Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Data Visualization", "Cloud", "TensorFlow", "PyTorch", "Statistics", "Java", "C++", "HTML", "CSS", "JavaScript"]
return [skill for skill in skills_list if skill.lower() in text.lower()]
def generate_learning_resources(missing_skills):
suggestions = []
for skill in missing_skills:
search_link = f"https://www.google.com/search?q={skill}+online+course"
youtube_link = f"https://www.youtube.com/results?search_query={skill}+tutorial"
suggestions.append(f"š [{skill} Courses on Google]({search_link})\nā¶ļø [YouTube Tutorials]({youtube_link})\n")
return "\n\n".join(suggestions)
def suggest_certifications(missing_skills):
cert_mapping = {
"Python": "Python for Everybody (Coursera)",
"Machine Learning": "Machine Learning by Andrew Ng (Coursera)",
"Cloud": "AWS Certified Solutions Architect",
"SQL": "Google Data Analytics Certificate",
"TensorFlow": "TensorFlow Developer Certificate",
"NLP": "Natural Language Processing Specialization (DeepLearning.AI)",
"Java": "Oracle Certified Java Programmer",
"C++": "C++ Nanodegree (Udacity)"
}
suggestions = []
for skill in missing_skills:
if skill in cert_mapping:
suggestions.append(f"{skill}: {cert_mapping[skill]}")
return "\n".join(suggestions) if suggestions else "No specific certifications recommended."
def generate_circular_progress(percentage):
html_code = f"""
{percentage}%
"""
return html_code
def generate_skill_gap_report(user_skills, job_skills, missing_skills, match_percent):
llm = ChatGroq(model="llama3-8b-8192", temperature=0.2)
template = """
User Skills: {user_skills}
Job Requirements: {job_skills}
Missing Skills: {missing_skills}
Match Percentage: {match_percent}%
Generate a short, friendly skill gap report. Suggest next steps for the user to improve their chances.
"""
prompt = PromptTemplate.from_template(template)
chain = prompt | llm | StrOutputParser()
report = chain.invoke({
"user_skills": ", ".join(user_skills),
"job_skills": ", ".join(job_skills),
"missing_skills": ", ".join(missing_skills),
"match_percent": match_percent
})
return report
def create_pdf(full_report_text):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 10, full_report_text)
output_path = "multi_jd_skill_gap_report.pdf"
pdf.output(output_path)
return output_path
def process_skill_gap(resume_pdf, jd_pdfs):
if resume_pdf is None or jd_pdfs is None:
return "", "", "", "", "", None, "", "", ""
resume_text = extract_text_from_pdf(resume_pdf)
user_skills = extract_skills(resume_text)
all_missing_skills = []
full_report = ""
for idx, jd_pdf in enumerate(jd_pdfs, start=1):
jd_text = extract_text_from_pdf(jd_pdf)
job_skills = extract_skills(jd_text)
common = set(user_skills) & set(job_skills)
match_percent = (len(common) / len(job_skills)) * 100 if job_skills else 0
missing_skills = list(set(job_skills) - set(user_skills))
all_missing_skills.extend(missing_skills)
embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectors = embed_model.embed_documents([resume_text, jd_text])
similarity_score = cosine_similarity([vectors[0]], [vectors[1]])[0][0]
similarity_percent = round(similarity_score * 100, 2)
ai_report = generate_skill_gap_report(user_skills, job_skills, missing_skills, match_percent)
full_report += f"\nJD {idx}:\nSkill Match: {match_percent}%\nMissing Skills: {', '.join(missing_skills) if missing_skills else 'None'}\nSimilarity Score: {similarity_percent}%\nAI Report:\n{ai_report}\n-------------------------\n"
resources = generate_learning_resources(list(set(all_missing_skills)))
certifications = suggest_certifications(all_missing_skills)
most_common_skills = Counter(all_missing_skills).most_common(3)
top_missing_skills_text = "Top Missing Skills Across JDs: " + ", ".join(
[f"{skill} ({count} times)" for skill, count in most_common_skills]
) if most_common_skills else "No missing skills detected."
overall_match = round(
(sum([len(set(user_skills) & set(extract_skills(extract_text_from_pdf(jd)))) for jd in jd_pdfs]) / (len(user_skills) * len(jd_pdfs))) * 100,
2
) if user_skills else 0
full_report_clean = full_report.encode('ascii', 'ignore').decode('ascii')
pdf_path = create_pdf(full_report_clean)
progress_display = generate_circular_progress(overall_match)
return progress_display, "ā
Analysis done across all JDs", ", ".join(set(all_missing_skills)), "Multi-JD Comparison Completed", full_report, pdf_path, top_missing_skills_text, resources, certifications
with gr.Blocks() as demo:
gr.HTML("""
""")
gr.Markdown("# š§ TALENTPATCH - Multi-JD AI Skill Gap Checker")
resume_file = gr.File(label="š Upload Resume (PDF)", type="binary")
jd_files = gr.File(label="š Upload Multiple Job Descriptions (PDFs)", type="binary", file_types=[".pdf"], file_count="multiple")
match_progress = gr.HTML(label="Skill Match Progress")
skill_match_text = gr.Textbox(label="Status", interactive=False)
missing_skills_text = gr.Textbox(label="All Missing Skills", interactive=False)
similarity_text = gr.Textbox(label="Status Message", interactive=False)
report_output = gr.Textbox(label="AI-Generated Multi-JD Skill Gap Report", lines=20, interactive=False)
download_pdf = gr.File(label="š„ Download Full Report as PDF")
top_skills_output = gr.Textbox(label="Top Missing Skills Across JDs", interactive=False)
learning_resources = gr.Markdown(label="š AI Learning Resource Recommendations")
certification_output = gr.Textbox(label="š Recommended Certifications", interactive=False)
submit_btn = gr.Button("š Analyze Skill Gap")
submit_btn.click(
fn=process_skill_gap,
inputs=[resume_file, jd_files],
outputs=[
match_progress,
skill_match_text,
missing_skills_text,
similarity_text,
report_output,
download_pdf,
top_skills_output,
learning_resources,
certification_output
]
)
demo.launch()