from sklearn.metrics.pairwise import cosine_similarity
from transformers import BertTokenizer, BertModel
import torch
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

import pandas as pd
import numpy as np

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def preprocess_text(text: str) -> str:
    nltk.download('stopwords')
    nltk.download('wordnet')
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    words = text.split()
    words = [lemmatizer.lemmatize(word.lower()) for word in words if word.lower() not in stop_words]
    return ' '.join(words)

def get_bert_embedding(text: str):
    inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).detach().numpy()


def recommend_resume(resume_df: pd.DataFrame, job_desc: str) -> pd.DataFrame:
  
    '''Take Resume and return the recommended resumes align with the job description'''
    recommended_resumes = pd.DataFrame()
    rows = resume_df.shape[0]
    job_description_embedding = get_bert_embedding(job_desc)
    all_similarity_scores = []
    for row in range(rows):
        print('This is row', row)
        print("This is row skill: ",resume_df['skills'].iloc[row])
          
        resume_skill_str = " ".join(resume_df['skills'].iloc[row])
        resume_aboutSec_str = " ".join(resume_df['about_section'].iloc[row])
        
        
        resume_text = preprocess_text(" ".join(resume_skill_str + " " + resume_aboutSec_str))
        resume_embedding = get_bert_embedding(resume_text)
        similarity_score = cosine_similarity(resume_embedding, job_description_embedding).flatten()[0]
        experience_match = resume_df['past_company_experience'].iloc[row] >= 0
        print('similarity_score: ',similarity_score, 'experience_match: ',experience_match )
        all_similarity_scores.append(similarity_score)
        matching_threshold = 0.0
        if experience_match:
            if similarity_score >= matching_threshold:
              recommended_resumes = pd.concat([recommended_resumes, resume_df.iloc[[row]]])
              print("Resume matches the job description.")
            else:
                print("similarity score is out of threshold")
                continue
        else:
            print("Resume does not match the job description.")
        print('one complete')
    recommended_resumes['similarity_score'] = all_similarity_scores
    return recommended_resumes