Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| class SkillMapper: | |
| def __init__(self): | |
| # Load CSV | |
| self.df = pd.read_csv("job_titles_classification_extended.csv").dropna() | |
| # Convert skills to lowercase | |
| self.df["skills_required"] = self.df["skills_required"].str.lower() | |
| # TF-IDF Vectorizer | |
| self.vectorizer = TfidfVectorizer(ngram_range=(1, 2)) | |
| self.skill_matrix = self.vectorizer.fit_transform(self.df["skills_required"]) | |
| def predict_top_roles(self, skills_list, top_n=3): | |
| """Return top N job titles with similarity scores.""" | |
| if not skills_list: | |
| return [] | |
| skills_text = " ".join(skills_list).lower() | |
| user_vec = self.vectorizer.transform([skills_text]) | |
| similarities = cosine_similarity(user_vec, self.skill_matrix)[0] | |
| top_indices = similarities.argsort()[-top_n:][::-1] | |
| results = [] | |
| for idx in top_indices: | |
| job_title = self.df.iloc[idx]["job_title"] # ✅ Use job_title | |
| score = round(float(similarities[idx]), 3) | |
| results.append((job_title, score)) | |
| return results | |
| def predict_role(self, skills_list): | |
| """Return single best matching job_title with score.""" | |
| top_roles = self.predict_top_roles(skills_list, top_n=1) | |
| if top_roles: | |
| return top_roles[0] # (job_title, score) | |
| else: | |
| return ("Unknown Job Title", 0.0) |