harshlaghave's picture
Update app.py
e32d554 verified
import os
os.environ["WANDB_DISABLED"] = "true" # Disable online logging
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import numpy as np
# ------------------------
# Load CSV and pre-fine-tuned model
# ------------------------
MODEL_PATH = os.path.join(os.path.dirname(__file__), "fine_tuned_internship_model")
model = SentenceTransformer(MODEL_PATH)
df = pd.read_csv("converted (1).csv")
# ------------------------
# Normalization functions
# ------------------------
SKILL_MAP = {
"js": "javascript", "reactjs": "react", "nodejs": "node.js",
"cpp": "c++", "btech": "bachelor of technology",
"cs": "computer science", "ece": "electronics and communication",
"ml": "machine learning", "ai": "artificial intelligence",
"it": "information technology"
}
def normalize_text(text):
if not text: return ""
text = text.lower().replace(".", "").replace(",", "")
for k,v in SKILL_MAP.items(): text = text.replace(k,v)
return text.strip()
def normalize_skills(skills_str):
skills = [normalize_text(s.strip()) for s in skills_str.split(",")]
return [s for s in skills if s]
def location_similarity(candidate_loc, internship_loc):
candidate_loc = candidate_loc.lower().strip()
internship_loc = internship_loc.lower().strip()
if candidate_loc == internship_loc: return 100
if candidate_loc.split()[0] == internship_loc.split()[0]: return 70
return 0
# ------------------------
# Cache embeddings for all internships
# ------------------------
cached_internships = []
for _, row in df.iterrows():
internship_skills = normalize_skills(row['Required_Skills'])
internship_skill_embs = [model.encode(s, convert_to_tensor=True) for s in internship_skills]
internship_edu_emb = model.encode(normalize_text(row['Student_Education']), convert_to_tensor=True)
internship_interest_emb = model.encode(normalize_text(row['Student_Interest']), convert_to_tensor=True)
cached_internships.append({
"row": row,
"skill_embs": internship_skill_embs,
"edu_emb": internship_edu_emb,
"interest_emb": internship_interest_emb
})
weights = np.array([0.4,0.3,0.2,0.1]) # Skills, Education, Interest, Location
intercept = 0
# ------------------------
# Matching function
# ------------------------
def match_internship(skills, education, interest, location):
candidate_skills_input = normalize_skills(skills)
candidate_education_input = normalize_text(education)
candidate_interest_input = normalize_text(interest)
candidate_location_input = location
candidate_skill_embs_input = [model.encode(s, convert_to_tensor=True) for s in candidate_skills_input]
candidate_edu_emb_input = model.encode(candidate_education_input, convert_to_tensor=True)
candidate_interest_emb_input = model.encode(candidate_interest_input, convert_to_tensor=True)
results = []
for internship in cached_internships:
row = internship["row"]
# Skill similarity
skill_sims = []
for c_emb in candidate_skill_embs_input:
max_sim = max([util.cos_sim(c_emb, i_emb).item() for i_emb in internship["skill_embs"]], default=0)
skill_sims.append(max_sim)
skills_sim = np.mean(skill_sims)*100 if skill_sims else 0
# Education similarity
edu_sim = util.cos_sim(candidate_edu_emb_input, internship["edu_emb"]).item()*100
# Interest similarity
interest_sim = util.cos_sim(candidate_interest_emb_input, internship["interest_emb"]).item()*100
# Location similarity
loc_sim = location_similarity(candidate_location_input, row['Location'])
# Overall match
overall = np.dot(weights, [skills_sim, edu_sim, interest_sim, loc_sim]) + intercept
results.append({
"Company": row['Company'],
"Position": row['Position'],
"Skills_Match": skills_sim,
"Education_Match": edu_sim,
"Interest_Match": interest_sim,
"Location_Match": loc_sim,
"Overall_Match": overall
})
results.sort(key=lambda x: x['Overall_Match'], reverse=True)
# Return top 5 as list of dicts
return results[:5]
# ------------------------
# Gradio interface
# ------------------------
inputs = [
gr.Textbox(label="Your Skills (comma-separated)"),
gr.Textbox(label="Your Education"),
gr.Textbox(label="Your Interest / Field"),
gr.Textbox(label="Your Location")
]
outputs = gr.JSON(label="Top 5 Internship Matches")
demo = gr.Interface(
fn=match_internship,
inputs=inputs,
outputs=outputs,
title="Super-Intelligent Internship Matcher",
description="Enter your skills, education, interest, and location to get top 5 internship matches."
)
if __name__ == "__main__":
demo.launch()