File size: 5,940 Bytes
ee4cdc1
 
c76eee7
c4600bb
c76eee7
ee4cdc1
c76eee7
 
 
 
 
 
 
 
 
 
 
 
 
c4600bb
c76eee7
 
 
 
 
 
 
 
 
 
 
 
 
 
ee4cdc1
c76eee7
 
 
 
 
 
 
 
 
 
 
ee4cdc1
c76eee7
 
 
 
 
c4600bb
c76eee7
 
 
ee4cdc1
c76eee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee4cdc1
c76eee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee4cdc1
c76eee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee4cdc1
c76eee7
 
 
 
ee4cdc1
c76eee7
 
 
ee4cdc1
c76eee7
 
 
 
 
 
 
ee4cdc1
 
c76eee7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
import pandas as pd
import random
from typing import List, Dict
from sentence_transformers import SentenceTransformer, util

# -----------------------------
# Job Database Generation
# -----------------------------
class JobDatabase:
    def __init__(self):
        self.jobs = self._generate_job_database()
        self.df_jobs = pd.DataFrame(self.jobs)
        # Prepare a textual representation of skills for embeddings
        self.df_jobs['skills_text'] = self.df_jobs['requirements'].apply(lambda x: ", ".join(x))
        # Load lightweight embedding model
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        # Encode all job skills in advance
        self.job_embeddings = self.model.encode(self.df_jobs['skills_text'].tolist(), convert_to_tensor=True)

    def _generate_job_database(self) -> List[Dict]:
        """Generate a minimal example database; replace with your full database"""
        job_templates = {
            "Technology": [
                {"title": "Software Engineer", "desc": "Design and develop software applications",
                 "skills": ["Python", "Java", "JavaScript", "Git", "Agile", "Problem Solving"]},
                {"title": "Data Scientist", "desc": "Analyze complex data to extract business insights",
                 "skills": ["Python", "R", "Machine Learning", "SQL", "Statistics", "Pandas"]}
            ],
            "Finance": [
                {"title": "Financial Analyst", "desc": "Analyze financial data and market trends",
                 "skills": ["Financial Modeling", "Excel", "Data Analysis", "Financial Reporting", "Market Research"]}
            ]
        }

        experience_levels = ["Entry-level", "Mid-level", "Senior", "Lead/Principal"]
        salary_ranges = {
            "Entry-level": ["$35k-$50k", "$40k-$55k"],
            "Mid-level": ["$55k-$75k", "$60k-$80k"],
            "Senior": ["$80k-$110k", "$90k-$120k"],
            "Lead/Principal": ["$120k-$150k", "$130k-$160k"]
        }
        additional_skills = {
            "Technology": ["Debugging", "Code Review", "System Design"],
            "Finance": ["Financial Regulations", "Risk Management", "Excel Advanced"]
        }

        jobs = []
        job_id = 1
        categories = list(job_templates.keys())
        jobs_per_category = 1000 // len(categories)
        remaining_jobs = 1000 % len(categories)

        for i, category in enumerate(categories):
            templates = job_templates[category]
            jobs_for_this_category = jobs_per_category + (1 if i < remaining_jobs else 0)

            for j in range(jobs_for_this_category):
                template = templates[j % len(templates)]
                title_variations = [
                    template["title"],
                    f"Senior {template['title']}",
                    f"Junior {template['title']}",
                    f"Lead {template['title']}",
                    f"{template['title']} Specialist"
                ]
                title = title_variations[j % len(title_variations)]
                exp_level = random.choice(experience_levels)
                salary = random.choice(salary_ranges[exp_level])
                base_skills = template["skills"].copy()
                extra_skills = random.sample(additional_skills[category],
                                             random.randint(1, min(3, len(additional_skills[category]))))
                all_skills = base_skills + extra_skills
                unique_skills = list(dict.fromkeys(all_skills))[:8]

                job = {
                    "id": job_id,
                    "title": title,
                    "description": template["desc"],
                    "requirements": unique_skills,
                    "experience_level": exp_level,
                    "salary_range": salary,
                    "category": category,
                    "location": random.choice([
                        "Remote", "New York, NY", "San Francisco, CA", "Chicago, IL",
                        "Austin, TX", "Seattle, WA", "Boston, MA", "Los Angeles, CA",
                        "Denver, CO", "Atlanta, GA", "Miami, FL", "Portland, OR"
                    ])
                }
                jobs.append(job)
                job_id += 1
        return jobs

# -----------------------------
# Job Matching Function
# -----------------------------
def match_jobs_embeddings(user_skills: List[str], db: JobDatabase, top_n=5):
    # Convert user input into single string
    skills_text = ", ".join([s.strip() for s in user_skills if s.strip()])
    if not skills_text:
        return pd.DataFrame([{"title":"No skills entered","description":"","requirements":"","experience_level":"","salary_range":"","location":""}])
    
    # Encode user skills
    user_embedding = db.model.encode(skills_text, convert_to_tensor=True)
    # Compute cosine similarity
    cos_scores = util.cos_sim(user_embedding, db.job_embeddings)[0]
    # Get top N matches
    top_results = cos_scores.topk(top_n)
    indices = top_results.indices.tolist()
    
    matched_jobs = db.df_jobs.iloc[indices]
    return matched_jobs[['title', 'description', 'requirements', 'experience_level', 'salary_range', 'location']]

# -----------------------------
# Gradio Interface
# -----------------------------
db = JobDatabase()

def find_jobs_ui(user_skills_text):
    user_skills = [skill.strip() for skill in user_skills_text.split(",")]
    return match_jobs_embeddings(user_skills, db)

iface = gr.Interface(
    fn=find_jobs_ui,
    inputs=gr.Textbox(lines=2, placeholder="Enter skills separated by commas, e.g. Python, SQL, Excel"),
    outputs=gr.Dataframe(headers=["Title","Description","Requirements","Experience Level","Salary","Location"]),
    title="Job Finder with AI Embeddings",
    description="Enter your skills and get top matching jobs using AI embeddings."
)

if __name__ == "__main__":
    iface.launch()