Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| import random | |
| from typing import List, Dict | |
| from sentence_transformers import SentenceTransformer, util | |
| # ----------------------------- | |
| # Job Database Generation | |
| # ----------------------------- | |
| class JobDatabase: | |
| def __init__(self): | |
| self.jobs = self._generate_job_database() | |
| self.df_jobs = pd.DataFrame(self.jobs) | |
| # Prepare a textual representation of skills for embeddings | |
| self.df_jobs['skills_text'] = self.df_jobs['requirements'].apply(lambda x: ", ".join(x)) | |
| # Load lightweight embedding model | |
| self.model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Encode all job skills in advance | |
| self.job_embeddings = self.model.encode(self.df_jobs['skills_text'].tolist(), convert_to_tensor=True) | |
| def _generate_job_database(self) -> List[Dict]: | |
| """Generate a minimal example database; replace with your full database""" | |
| job_templates = { | |
| "Technology": [ | |
| {"title": "Software Engineer", "desc": "Design and develop software applications", | |
| "skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]}, | |
| {"title": "Data Scientist", "desc": "Analyze complex data to extract business insights", | |
| "skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]} | |
| ], | |
| "Finance": [ | |
| {"title": "Financial Analyst", "desc": "Analyze financial data and market trends", | |
| "skills": ["Financial Modeling", "Excel", "Data Analysis", "Financial Reporting", "Market Research"]} | |
| ] | |
| } | |
| experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"] | |
| salary_ranges = { | |
| "Entry-level": ["$35k-$50k", "$40k-$55k"], | |
| "Mid-level": ["$55k-$75k", "$60k-$80k"], | |
| "Senior": ["$80k-$110k", "$90k-$120k"], | |
| "Lead/Principal": ["$120k-$150k", "$130k-$160k"] | |
| } | |
| additional_skills = { | |
| "Technology": ["Debugging", "Code Review", "System Design"], | |
| "Finance": ["Financial Regulations", "Risk Management", "Excel Advanced"] | |
| } | |
| jobs = [] | |
| job_id = 1 | |
| categories = list(job_templates.keys()) | |
| jobs_per_category = 1000 // len(categories) | |
| remaining_jobs = 1000 % len(categories) | |
| for i, category in enumerate(categories): | |
| templates = job_templates[category] | |
| jobs_for_this_category = jobs_per_category + (1 if i < remaining_jobs else 0) | |
| for j in range(jobs_for_this_category): | |
| template = templates[j % len(templates)] | |
| title_variations = [ | |
| template["title"], | |
| f"Senior {template['title']}", | |
| f"Junior {template['title']}", | |
| f"Lead {template['title']}", | |
| f"{template['title']} Specialist" | |
| ] | |
| title = title_variations[j % len(title_variations)] | |
| exp_level = random.choice(experience_levels) | |
| salary = random.choice(salary_ranges[exp_level]) | |
| base_skills = template["skills"].copy() | |
| extra_skills = random.sample(additional_skills[category], | |
| random.randint(1, min(3, len(additional_skills[category])))) | |
| all_skills = base_skills + extra_skills | |
| unique_skills = list(dict.fromkeys(all_skills))[:8] | |
| job = { | |
| "id": job_id, | |
| "title": title, | |
| "description": template["desc"], | |
| "requirements": unique_skills, | |
| "experience_level": exp_level, | |
| "salary_range": salary, | |
| "category": category, | |
| "location": random.choice([ | |
| "Remote", "New York, NY", "San Francisco, CA", "Chicago, IL", | |
| "Austin, TX", "Seattle, WA", "Boston, MA", "Los Angeles, CA", | |
| "Denver, CO", "Atlanta, GA", "Miami, FL", "Portland, OR" | |
| ]) | |
| } | |
| jobs.append(job) | |
| job_id += 1 | |
| return jobs | |
| # ----------------------------- | |
| # Job Matching Function | |
| # ----------------------------- | |
| def match_jobs_embeddings(user_skills: List[str], db: JobDatabase, top_n=5): | |
| # Convert user input into single string | |
| skills_text = ", ".join([s.strip() for s in user_skills if s.strip()]) | |
| if not skills_text: | |
| return pd.DataFrame([{"title":"No skills entered","description":"","requirements":"","experience_level":"","salary_range":"","location":""}]) | |
| # Encode user skills | |
| user_embedding = db.model.encode(skills_text, convert_to_tensor=True) | |
| # Compute cosine similarity | |
| cos_scores = util.cos_sim(user_embedding, db.job_embeddings)[0] | |
| # Get top N matches | |
| top_results = cos_scores.topk(top_n) | |
| indices = top_results.indices.tolist() | |
| matched_jobs = db.df_jobs.iloc[indices] | |
| return matched_jobs[['title', 'description', 'requirements', 'experience_level', 'salary_range', 'location']] | |
| # ----------------------------- | |
| # Gradio Interface | |
| # ----------------------------- | |
| db = JobDatabase() | |
| def find_jobs_ui(user_skills_text): | |
| user_skills = [skill.strip() for skill in user_skills_text.split(",")] | |
| return match_jobs_embeddings(user_skills, db) | |
| iface = gr.Interface( | |
| fn=find_jobs_ui, | |
| inputs=gr.Textbox(lines=2, placeholder="Enter skills separated by commas, e.g. Python, SQL, Excel"), | |
| outputs=gr.Dataframe(headers=["Title","Description","Requirements","Experience Level","Salary","Location"]), | |
| title="Job Finder with AI Embeddings", | |
| description="Enter your skills and get top matching jobs using AI embeddings." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |