Jobconnection / app.py
sivan26's picture
Update app.py
c76eee7 verified
import gradio as gr
import pandas as pd
import random
from typing import List, Dict
from sentence_transformers import SentenceTransformer, util
# -----------------------------
# Job Database Generation
# -----------------------------
class JobDatabase:
def __init__(self):
self.jobs = self._generate_job_database()
self.df_jobs = pd.DataFrame(self.jobs)
# Prepare a textual representation of skills for embeddings
self.df_jobs['skills_text'] = self.df_jobs['requirements'].apply(lambda x: ", ".join(x))
# Load lightweight embedding model
self.model = SentenceTransformer('all-MiniLM-L6-v2')
# Encode all job skills in advance
self.job_embeddings = self.model.encode(self.df_jobs['skills_text'].tolist(), convert_to_tensor=True)
def _generate_job_database(self) -> List[Dict]:
"""Generate a minimal example database; replace with your full database"""
job_templates = {
"Technology": [
{"title": "Software Engineer", "desc": "Design and develop software applications",
"skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]},
{"title": "Data Scientist", "desc": "Analyze complex data to extract business insights",
"skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]}
],
"Finance": [
{"title": "Financial Analyst", "desc": "Analyze financial data and market trends",
"skills": ["Financial Modeling", "Excel", "Data Analysis", "Financial Reporting", "Market Research"]}
]
}
experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"]
salary_ranges = {
"Entry-level": ["$35k-$50k", "$40k-$55k"],
"Mid-level": ["$55k-$75k", "$60k-$80k"],
"Senior": ["$80k-$110k", "$90k-$120k"],
"Lead/Principal": ["$120k-$150k", "$130k-$160k"]
}
additional_skills = {
"Technology": ["Debugging", "Code Review", "System Design"],
"Finance": ["Financial Regulations", "Risk Management", "Excel Advanced"]
}
jobs = []
job_id = 1
categories = list(job_templates.keys())
jobs_per_category = 1000 // len(categories)
remaining_jobs = 1000 % len(categories)
for i, category in enumerate(categories):
templates = job_templates[category]
jobs_for_this_category = jobs_per_category + (1 if i < remaining_jobs else 0)
for j in range(jobs_for_this_category):
template = templates[j % len(templates)]
title_variations = [
template["title"],
f"Senior {template['title']}",
f"Junior {template['title']}",
f"Lead {template['title']}",
f"{template['title']} Specialist"
]
title = title_variations[j % len(title_variations)]
exp_level = random.choice(experience_levels)
salary = random.choice(salary_ranges[exp_level])
base_skills = template["skills"].copy()
extra_skills = random.sample(additional_skills[category],
random.randint(1, min(3, len(additional_skills[category]))))
all_skills = base_skills + extra_skills
unique_skills = list(dict.fromkeys(all_skills))[:8]
job = {
"id": job_id,
"title": title,
"description": template["desc"],
"requirements": unique_skills,
"experience_level": exp_level,
"salary_range": salary,
"category": category,
"location": random.choice([
"Remote", "New York, NY", "San Francisco, CA", "Chicago, IL",
"Austin, TX", "Seattle, WA", "Boston, MA", "Los Angeles, CA",
"Denver, CO", "Atlanta, GA", "Miami, FL", "Portland, OR"
])
}
jobs.append(job)
job_id += 1
return jobs
# -----------------------------
# Job Matching Function
# -----------------------------
def match_jobs_embeddings(user_skills: List[str], db: JobDatabase, top_n=5):
# Convert user input into single string
skills_text = ", ".join([s.strip() for s in user_skills if s.strip()])
if not skills_text:
return pd.DataFrame([{"title":"No skills entered","description":"","requirements":"","experience_level":"","salary_range":"","location":""}])
# Encode user skills
user_embedding = db.model.encode(skills_text, convert_to_tensor=True)
# Compute cosine similarity
cos_scores = util.cos_sim(user_embedding, db.job_embeddings)[0]
# Get top N matches
top_results = cos_scores.topk(top_n)
indices = top_results.indices.tolist()
matched_jobs = db.df_jobs.iloc[indices]
return matched_jobs[['title', 'description', 'requirements', 'experience_level', 'salary_range', 'location']]
# -----------------------------
# Gradio Interface
# -----------------------------
db = JobDatabase()
def find_jobs_ui(user_skills_text):
user_skills = [skill.strip() for skill in user_skills_text.split(",")]
return match_jobs_embeddings(user_skills, db)
iface = gr.Interface(
fn=find_jobs_ui,
inputs=gr.Textbox(lines=2, placeholder="Enter skills separated by commas, e.g. Python, SQL, Excel"),
outputs=gr.Dataframe(headers=["Title","Description","Requirements","Experience Level","Salary","Location"]),
title="Job Finder with AI Embeddings",
description="Enter your skills and get top matching jobs using AI embeddings."
)
if __name__ == "__main__":
iface.launch()