Spaces:
Build error
Build error
File size: 5,940 Bytes
ee4cdc1 c76eee7 c4600bb c76eee7 ee4cdc1 c76eee7 c4600bb c76eee7 ee4cdc1 c76eee7 ee4cdc1 c76eee7 c4600bb c76eee7 ee4cdc1 c76eee7 ee4cdc1 c76eee7 ee4cdc1 c76eee7 ee4cdc1 c76eee7 ee4cdc1 c76eee7 ee4cdc1 c76eee7 ee4cdc1 c76eee7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | import gradio as gr
import pandas as pd
import random
from typing import List, Dict
from sentence_transformers import SentenceTransformer, util
# -----------------------------
# Job Database Generation
# -----------------------------
class JobDatabase:
def __init__(self):
self.jobs = self._generate_job_database()
self.df_jobs = pd.DataFrame(self.jobs)
# Prepare a textual representation of skills for embeddings
self.df_jobs['skills_text'] = self.df_jobs['requirements'].apply(lambda x: ", ".join(x))
# Load lightweight embedding model
self.model = SentenceTransformer('all-MiniLM-L6-v2')
# Encode all job skills in advance
self.job_embeddings = self.model.encode(self.df_jobs['skills_text'].tolist(), convert_to_tensor=True)
def _generate_job_database(self) -> List[Dict]:
"""Generate a minimal example database; replace with your full database"""
job_templates = {
"Technology": [
{"title": "Software Engineer", "desc": "Design and develop software applications",
"skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]},
{"title": "Data Scientist", "desc": "Analyze complex data to extract business insights",
"skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]}
],
"Finance": [
{"title": "Financial Analyst", "desc": "Analyze financial data and market trends",
"skills": ["Financial Modeling", "Excel", "Data Analysis", "Financial Reporting", "Market Research"]}
]
}
experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"]
salary_ranges = {
"Entry-level": ["$35k-$50k", "$40k-$55k"],
"Mid-level": ["$55k-$75k", "$60k-$80k"],
"Senior": ["$80k-$110k", "$90k-$120k"],
"Lead/Principal": ["$120k-$150k", "$130k-$160k"]
}
additional_skills = {
"Technology": ["Debugging", "Code Review", "System Design"],
"Finance": ["Financial Regulations", "Risk Management", "Excel Advanced"]
}
jobs = []
job_id = 1
categories = list(job_templates.keys())
jobs_per_category = 1000 // len(categories)
remaining_jobs = 1000 % len(categories)
for i, category in enumerate(categories):
templates = job_templates[category]
jobs_for_this_category = jobs_per_category + (1 if i < remaining_jobs else 0)
for j in range(jobs_for_this_category):
template = templates[j % len(templates)]
title_variations = [
template["title"],
f"Senior {template['title']}",
f"Junior {template['title']}",
f"Lead {template['title']}",
f"{template['title']} Specialist"
]
title = title_variations[j % len(title_variations)]
exp_level = random.choice(experience_levels)
salary = random.choice(salary_ranges[exp_level])
base_skills = template["skills"].copy()
extra_skills = random.sample(additional_skills[category],
random.randint(1, min(3, len(additional_skills[category]))))
all_skills = base_skills + extra_skills
unique_skills = list(dict.fromkeys(all_skills))[:8]
job = {
"id": job_id,
"title": title,
"description": template["desc"],
"requirements": unique_skills,
"experience_level": exp_level,
"salary_range": salary,
"category": category,
"location": random.choice([
"Remote", "New York, NY", "San Francisco, CA", "Chicago, IL",
"Austin, TX", "Seattle, WA", "Boston, MA", "Los Angeles, CA",
"Denver, CO", "Atlanta, GA", "Miami, FL", "Portland, OR"
])
}
jobs.append(job)
job_id += 1
return jobs
# -----------------------------
# Job Matching Function
# -----------------------------
def match_jobs_embeddings(user_skills: List[str], db: JobDatabase, top_n=5):
# Convert user input into single string
skills_text = ", ".join([s.strip() for s in user_skills if s.strip()])
if not skills_text:
return pd.DataFrame([{"title":"No skills entered","description":"","requirements":"","experience_level":"","salary_range":"","location":""}])
# Encode user skills
user_embedding = db.model.encode(skills_text, convert_to_tensor=True)
# Compute cosine similarity
cos_scores = util.cos_sim(user_embedding, db.job_embeddings)[0]
# Get top N matches
top_results = cos_scores.topk(top_n)
indices = top_results.indices.tolist()
matched_jobs = db.df_jobs.iloc[indices]
return matched_jobs[['title', 'description', 'requirements', 'experience_level', 'salary_range', 'location']]
# -----------------------------
# Gradio Interface
# -----------------------------
db = JobDatabase()
def find_jobs_ui(user_skills_text):
user_skills = [skill.strip() for skill in user_skills_text.split(",")]
return match_jobs_embeddings(user_skills, db)
iface = gr.Interface(
fn=find_jobs_ui,
inputs=gr.Textbox(lines=2, placeholder="Enter skills separated by commas, e.g. Python, SQL, Excel"),
outputs=gr.Dataframe(headers=["Title","Description","Requirements","Experience Level","Salary","Location"]),
title="Job Finder with AI Embeddings",
description="Enter your skills and get top matching jobs using AI embeddings."
)
if __name__ == "__main__":
iface.launch()
|