import gradio as gr import pandas as pd import random from typing import List, Dict from sentence_transformers import SentenceTransformer, util # ----------------------------- # Job Database Generation # ----------------------------- class JobDatabase: def __init__(self): self.jobs = self._generate_job_database() self.df_jobs = pd.DataFrame(self.jobs) # Prepare a textual representation of skills for embeddings self.df_jobs['skills_text'] = self.df_jobs['requirements'].apply(lambda x: ", ".join(x)) # Load lightweight embedding model self.model = SentenceTransformer('all-MiniLM-L6-v2') # Encode all job skills in advance self.job_embeddings = self.model.encode(self.df_jobs['skills_text'].tolist(), convert_to_tensor=True) def _generate_job_database(self) -> List[Dict]: """Generate a minimal example database; replace with your full database""" job_templates = { "Technology": [ {"title": "Software Engineer", "desc": "Design and develop software applications", "skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]}, {"title": "Data Scientist", "desc": "Analyze complex data to extract business insights", "skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]} ], "Finance": [ {"title": "Financial Analyst", "desc": "Analyze financial data and market trends", "skills": ["Financial Modeling", "Excel", "Data Analysis", "Financial Reporting", "Market Research"]} ] } experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"] salary_ranges = { "Entry-level": ["$35k-$50k", "$40k-$55k"], "Mid-level": ["$55k-$75k", "$60k-$80k"], "Senior": ["$80k-$110k", "$90k-$120k"], "Lead/Principal": ["$120k-$150k", "$130k-$160k"] } additional_skills = { "Technology": ["Debugging", "Code Review", "System Design"], "Finance": ["Financial Regulations", "Risk Management", "Excel Advanced"] } jobs = [] job_id = 1 categories = list(job_templates.keys()) jobs_per_category = 1000 // len(categories) remaining_jobs = 1000 % len(categories) for i, category in enumerate(categories): templates = job_templates[category] jobs_for_this_category = jobs_per_category + (1 if i < remaining_jobs else 0) for j in range(jobs_for_this_category): template = templates[j % len(templates)] title_variations = [ template["title"], f"Senior {template['title']}", f"Junior {template['title']}", f"Lead {template['title']}", f"{template['title']} Specialist" ] title = title_variations[j % len(title_variations)] exp_level = random.choice(experience_levels) salary = random.choice(salary_ranges[exp_level]) base_skills = template["skills"].copy() extra_skills = random.sample(additional_skills[category], random.randint(1, min(3, len(additional_skills[category])))) all_skills = base_skills + extra_skills unique_skills = list(dict.fromkeys(all_skills))[:8] job = { "id": job_id, "title": title, "description": template["desc"], "requirements": unique_skills, "experience_level": exp_level, "salary_range": salary, "category": category, "location": random.choice([ "Remote", "New York, NY", "San Francisco, CA", "Chicago, IL", "Austin, TX", "Seattle, WA", "Boston, MA", "Los Angeles, CA", "Denver, CO", "Atlanta, GA", "Miami, FL", "Portland, OR" ]) } jobs.append(job) job_id += 1 return jobs # ----------------------------- # Job Matching Function # ----------------------------- def match_jobs_embeddings(user_skills: List[str], db: JobDatabase, top_n=5): # Convert user input into single string skills_text = ", ".join([s.strip() for s in user_skills if s.strip()]) if not skills_text: return pd.DataFrame([{"title":"No skills entered","description":"","requirements":"","experience_level":"","salary_range":"","location":""}]) # Encode user skills user_embedding = db.model.encode(skills_text, convert_to_tensor=True) # Compute cosine similarity cos_scores = util.cos_sim(user_embedding, db.job_embeddings)[0] # Get top N matches top_results = cos_scores.topk(top_n) indices = top_results.indices.tolist() matched_jobs = db.df_jobs.iloc[indices] return matched_jobs[['title', 'description', 'requirements', 'experience_level', 'salary_range', 'location']] # ----------------------------- # Gradio Interface # ----------------------------- db = JobDatabase() def find_jobs_ui(user_skills_text): user_skills = [skill.strip() for skill in user_skills_text.split(",")] return match_jobs_embeddings(user_skills, db) iface = gr.Interface( fn=find_jobs_ui, inputs=gr.Textbox(lines=2, placeholder="Enter skills separated by commas, e.g. Python, SQL, Excel"), outputs=gr.Dataframe(headers=["Title","Description","Requirements","Experience Level","Salary","Location"]), title="Job Finder with AI Embeddings", description="Enter your skills and get top matching jobs using AI embeddings." ) if __name__ == "__main__": iface.launch()