import os
os.environ["WANDB_DISABLED"] = "true"  # Disable online logging

import gradio as gr
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import numpy as np

# ------------------------
# Load CSV and pre-fine-tuned model
# ------------------------
MODEL_PATH = os.path.join(os.path.dirname(__file__), "fine_tuned_internship_model")
model = SentenceTransformer(MODEL_PATH)
df = pd.read_csv("converted (1).csv")

# ------------------------
# Normalization functions
# ------------------------
SKILL_MAP = {
    "js": "javascript", "reactjs": "react", "nodejs": "node.js",
    "cpp": "c++", "btech": "bachelor of technology",
    "cs": "computer science", "ece": "electronics and communication",
    "ml": "machine learning", "ai": "artificial intelligence",
    "it": "information technology"
}

def normalize_text(text):
    if not text: return ""
    text = text.lower().replace(".", "").replace(",", "")
    for k,v in SKILL_MAP.items(): text = text.replace(k,v)
    return text.strip()

def normalize_skills(skills_str):
    skills = [normalize_text(s.strip()) for s in skills_str.split(",")]
    return [s for s in skills if s]

def location_similarity(candidate_loc, internship_loc):
    candidate_loc = candidate_loc.lower().strip()
    internship_loc = internship_loc.lower().strip()
    if candidate_loc == internship_loc: return 100
    if candidate_loc.split()[0] == internship_loc.split()[0]: return 70
    return 0

# ------------------------
# Cache embeddings for all internships
# ------------------------
cached_internships = []
for _, row in df.iterrows():
    internship_skills = normalize_skills(row['Required_Skills'])
    internship_skill_embs = [model.encode(s, convert_to_tensor=True) for s in internship_skills]
    internship_edu_emb = model.encode(normalize_text(row['Student_Education']), convert_to_tensor=True)
    internship_interest_emb = model.encode(normalize_text(row['Student_Interest']), convert_to_tensor=True)
    cached_internships.append({
        "row": row,
        "skill_embs": internship_skill_embs,
        "edu_emb": internship_edu_emb,
        "interest_emb": internship_interest_emb
    })

weights = np.array([0.4,0.3,0.2,0.1])  # Skills, Education, Interest, Location
intercept = 0

# ------------------------
# Matching function
# ------------------------
def match_internship(skills, education, interest, location):
    candidate_skills_input = normalize_skills(skills)
    candidate_education_input = normalize_text(education)
    candidate_interest_input = normalize_text(interest)
    candidate_location_input = location

    candidate_skill_embs_input = [model.encode(s, convert_to_tensor=True) for s in candidate_skills_input]
    candidate_edu_emb_input = model.encode(candidate_education_input, convert_to_tensor=True)
    candidate_interest_emb_input = model.encode(candidate_interest_input, convert_to_tensor=True)

    results = []
    for internship in cached_internships:
        row = internship["row"]

        # Skill similarity
        skill_sims = []
        for c_emb in candidate_skill_embs_input:
            max_sim = max([util.cos_sim(c_emb, i_emb).item() for i_emb in internship["skill_embs"]], default=0)
            skill_sims.append(max_sim)
        skills_sim = np.mean(skill_sims)*100 if skill_sims else 0

        # Education similarity
        edu_sim = util.cos_sim(candidate_edu_emb_input, internship["edu_emb"]).item()*100

        # Interest similarity
        interest_sim = util.cos_sim(candidate_interest_emb_input, internship["interest_emb"]).item()*100

        # Location similarity
        loc_sim = location_similarity(candidate_location_input, row['Location'])

        # Overall match
        overall = np.dot(weights, [skills_sim, edu_sim, interest_sim, loc_sim]) + intercept

        results.append({
            "Company": row['Company'],
            "Position": row['Position'],
            "Skills_Match": skills_sim,
            "Education_Match": edu_sim,
            "Interest_Match": interest_sim,
            "Location_Match": loc_sim,
            "Overall_Match": overall
        })

    results.sort(key=lambda x: x['Overall_Match'], reverse=True)
    # Return top 5 as list of dicts
    return results[:5]

# ------------------------
# Gradio interface
# ------------------------
inputs = [
    gr.Textbox(label="Your Skills (comma-separated)"),
    gr.Textbox(label="Your Education"),
    gr.Textbox(label="Your Interest / Field"),
    gr.Textbox(label="Your Location")
]

outputs = gr.JSON(label="Top 5 Internship Matches")

demo = gr.Interface(
    fn=match_internship,
    inputs=inputs,
    outputs=outputs,
    title="Super-Intelligent Internship Matcher",
    description="Enter your skills, education, interest, and location to get top 5 internship matches."
)

if __name__ == "__main__":
    demo.launch()