project / models /skill_mapper.py
venkataashok's picture
Upload 3 files
8f04bd0 verified
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
class SkillMapper:
def __init__(self):
# Load CSV
self.df = pd.read_csv("job_titles_classification_extended.csv").dropna()
# Convert skills to lowercase
self.df["skills_required"] = self.df["skills_required"].str.lower()
# TF-IDF Vectorizer
self.vectorizer = TfidfVectorizer(ngram_range=(1, 2))
self.skill_matrix = self.vectorizer.fit_transform(self.df["skills_required"])
def predict_top_roles(self, skills_list, top_n=3):
"""Return top N job titles with similarity scores."""
if not skills_list:
return []
skills_text = " ".join(skills_list).lower()
user_vec = self.vectorizer.transform([skills_text])
similarities = cosine_similarity(user_vec, self.skill_matrix)[0]
top_indices = similarities.argsort()[-top_n:][::-1]
results = []
for idx in top_indices:
job_title = self.df.iloc[idx]["job_title"] # ✅ Use job_title
score = round(float(similarities[idx]), 3)
results.append((job_title, score))
return results
def predict_role(self, skills_list):
"""Return single best matching job_title with score."""
top_roles = self.predict_top_roles(skills_list, top_n=1)
if top_roles:
return top_roles[0] # (job_title, score)
else:
return ("Unknown Job Title", 0.0)