Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["WANDB_DISABLED"] = "true" # Disable online logging | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer, util | |
| import pandas as pd | |
| import numpy as np | |
| # ------------------------ | |
| # Load CSV and pre-fine-tuned model | |
| # ------------------------ | |
| MODEL_PATH = os.path.join(os.path.dirname(__file__), "fine_tuned_internship_model") | |
| model = SentenceTransformer(MODEL_PATH) | |
| df = pd.read_csv("converted (1).csv") | |
| # ------------------------ | |
| # Normalization functions | |
| # ------------------------ | |
| SKILL_MAP = { | |
| "js": "javascript", "reactjs": "react", "nodejs": "node.js", | |
| "cpp": "c++", "btech": "bachelor of technology", | |
| "cs": "computer science", "ece": "electronics and communication", | |
| "ml": "machine learning", "ai": "artificial intelligence", | |
| "it": "information technology" | |
| } | |
| def normalize_text(text): | |
| if not text: return "" | |
| text = text.lower().replace(".", "").replace(",", "") | |
| for k,v in SKILL_MAP.items(): text = text.replace(k,v) | |
| return text.strip() | |
| def normalize_skills(skills_str): | |
| skills = [normalize_text(s.strip()) for s in skills_str.split(",")] | |
| return [s for s in skills if s] | |
| def location_similarity(candidate_loc, internship_loc): | |
| candidate_loc = candidate_loc.lower().strip() | |
| internship_loc = internship_loc.lower().strip() | |
| if candidate_loc == internship_loc: return 100 | |
| if candidate_loc.split()[0] == internship_loc.split()[0]: return 70 | |
| return 0 | |
| # ------------------------ | |
| # Cache embeddings for all internships | |
| # ------------------------ | |
| cached_internships = [] | |
| for _, row in df.iterrows(): | |
| internship_skills = normalize_skills(row['Required_Skills']) | |
| internship_skill_embs = [model.encode(s, convert_to_tensor=True) for s in internship_skills] | |
| internship_edu_emb = model.encode(normalize_text(row['Student_Education']), convert_to_tensor=True) | |
| internship_interest_emb = model.encode(normalize_text(row['Student_Interest']), convert_to_tensor=True) | |
| cached_internships.append({ | |
| "row": row, | |
| "skill_embs": internship_skill_embs, | |
| "edu_emb": internship_edu_emb, | |
| "interest_emb": internship_interest_emb | |
| }) | |
| weights = np.array([0.4,0.3,0.2,0.1]) # Skills, Education, Interest, Location | |
| intercept = 0 | |
| # ------------------------ | |
| # Matching function | |
| # ------------------------ | |
| def match_internship(skills, education, interest, location): | |
| candidate_skills_input = normalize_skills(skills) | |
| candidate_education_input = normalize_text(education) | |
| candidate_interest_input = normalize_text(interest) | |
| candidate_location_input = location | |
| candidate_skill_embs_input = [model.encode(s, convert_to_tensor=True) for s in candidate_skills_input] | |
| candidate_edu_emb_input = model.encode(candidate_education_input, convert_to_tensor=True) | |
| candidate_interest_emb_input = model.encode(candidate_interest_input, convert_to_tensor=True) | |
| results = [] | |
| for internship in cached_internships: | |
| row = internship["row"] | |
| # Skill similarity | |
| skill_sims = [] | |
| for c_emb in candidate_skill_embs_input: | |
| max_sim = max([util.cos_sim(c_emb, i_emb).item() for i_emb in internship["skill_embs"]], default=0) | |
| skill_sims.append(max_sim) | |
| skills_sim = np.mean(skill_sims)*100 if skill_sims else 0 | |
| # Education similarity | |
| edu_sim = util.cos_sim(candidate_edu_emb_input, internship["edu_emb"]).item()*100 | |
| # Interest similarity | |
| interest_sim = util.cos_sim(candidate_interest_emb_input, internship["interest_emb"]).item()*100 | |
| # Location similarity | |
| loc_sim = location_similarity(candidate_location_input, row['Location']) | |
| # Overall match | |
| overall = np.dot(weights, [skills_sim, edu_sim, interest_sim, loc_sim]) + intercept | |
| results.append({ | |
| "Company": row['Company'], | |
| "Position": row['Position'], | |
| "Skills_Match": skills_sim, | |
| "Education_Match": edu_sim, | |
| "Interest_Match": interest_sim, | |
| "Location_Match": loc_sim, | |
| "Overall_Match": overall | |
| }) | |
| results.sort(key=lambda x: x['Overall_Match'], reverse=True) | |
| # Return top 5 as list of dicts | |
| return results[:5] | |
| # ------------------------ | |
| # Gradio interface | |
| # ------------------------ | |
| inputs = [ | |
| gr.Textbox(label="Your Skills (comma-separated)"), | |
| gr.Textbox(label="Your Education"), | |
| gr.Textbox(label="Your Interest / Field"), | |
| gr.Textbox(label="Your Location") | |
| ] | |
| outputs = gr.JSON(label="Top 5 Internship Matches") | |
| demo = gr.Interface( | |
| fn=match_internship, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="Super-Intelligent Internship Matcher", | |
| description="Enter your skills, education, interest, and location to get top 5 internship matches." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |