import streamlit as st
import numpy as np
import re, emoji, string, os
os.environ['TF_USE_LEGACY_KERAS'] = '1'
from transformers import TFAutoModel, AutoTokenizer
import tensorflow as tf

def clear_emoji(text):
    return emoji.replace_emoji(text, ' ')

def casefold(text):
    return text.lower()

def replace_punctuations(text):
    punctuations = set(string.punctuation)
    for char in text:
        if char in punctuations:
            text = text.replace(char, ' ')
    return text

def tear_Is(text):
    words = ['that', 'this', 'there', 'he', 'she', 'it', 'what', 'who', 'when', 'where', 'how', 'everyone']
    apostrophe = "'"
    for word in words:
        if str(word+apostrophe+'s') in text:
            text = text.replace(str(word+apostrophe+'s'), str(word+' '+'is'))
    return text

def first_clean(text):
    text = clear_emoji(text)
    text = casefold(text)
    text = re.sub(r'\b\d+K{1}\b', ' thousand ', text, flags=re.IGNORECASE)
    text = re.sub(r'[0-9]+', ' ', text)          # remove numbers
    text = re.sub(r"http\S+", ' ', text)         # remove links with http
    text = re.sub(r"www.+", ' ', text)           # remove links with www
    text = re.sub(r'#[a-zA-Z0-9]+', ' ', text)   # remove hashtags
    text = re.sub(r'@[a-zA-Z0-9]+', ' ', text)   # remove mentions
    text = text.replace("'", "'")
    text = text.replace("’", "'")
    text = text.replace("´", "'")
    text = text.replace("'d", " had")
    text = text.replace("-", " ")
    text = text.replace('\n', ' ')              # replace new line into space
    return text

def second_clean(text):
    text = tear_Is(text)
    text = text.strip()
    text = text.replace(" the f ", ' the fuck ')
    text = text.replace(" *s* ", ' ass ')
    text = text.replace("f*ed", "fucked")
    text = text.replace("f**ed", "fucked")
    text = text.replace("f*ck", 'fuck')
    text = text.replace("f*ck* ", 'fuck')
    text = text.replace("f*n", 'fucking')
    text = text.replace("f*ckn*", 'fucking')
    text = text.replace("f**in*", 'fucking')
    text = text.replace("sh*t", "shit")
    text = text.replace("sh*te", "shit")
    text = text.replace("s**t", "shit")
    text = text.replace("lol", "laugh out loud")
    text = text.replace("wuz", "was")
    text = text.replace(" wanna ", " want to ")
    text = text.replace(" won't ", " will not ")
    text = text.replace(" wont ", " will not ")
    text = text.replace(" isn't ", ' is not')
    text = text.replace(" ii ", ' two ')        # replace ai'nt with not
    text = text.replace("yall", 'you all')        # replace ai'nt with not
    text = text.replace("y'all", 'you all')        # replace ai'nt with not
    text = text.replace("let's", 'let us')        # replace ai'nt with not
    text = text.replace("thats", "that is")
    text = text.replace("lets", 'let us')        # replace ai'nt with not
    text = text.replace("ain't", 'not')        # replace ai'nt with not
    text = text.replace("aint", 'not')        # replace ai'nt with not
    text = text.replace("can't", 'can not')        # replace ai'nt with not
    text = text.replace("n't", ' not')         # replace n't with not
    text = text.replace("i'm", 'i am')          # replace i'm with i am
    text = text.replace(" i'm", 'i am')          # replace i'm with i am
    text = text.replace(" dont ", 'do not')          # replace i'm with i am
    text = text.replace("didnt", 'did not')          # replace i'm with i am
    text = text.replace("doesnt", 'does not')          # replace i'm with i am
    text = text.replace(" isnt ", ' is not ')          # replace i'm with i am
    text = text.replace(" cant ", ' can not ')          # replace i'm with i am
    text = text.replace(" im ", 'i am')          # replace im with i am
    text = text.replace("'re", ' are')          # replace 're with are
    text = text.replace("'ll", ' will')          # replace 're with are
    text = text.replace("'ve", ' have')          # replace 're with are
    text = text.replace(" da ", " the ")        # replace da with the
    text = text.replace(" imo ", ' in my opinion ')
    text = text.replace(" og ", ' original ')
    text = text.replace(" ya ", ' you ')
    text = text.replace(" ppl ", " people ")
    text = text.replace(" nota ", " not a ")
    text = text.replace(" cuz ", " cause ")
    text = text.replace(" wth ", " what the heck ")
    text = text.replace("f*k", "fuck")
    text = text.replace("f k", "fuck")
    text = text.replace("d*k", "dick")
    text = text.replace(" i m  ", " i am ")
    text = text.replace(" gg ", " glory glory ")
    text = text.replace(" btw ", " by the way ")
    text = text.replace(" ill ", " i will ")
    text = text.replace(" af ", " as fuck ")
    text = text.replace(" idk ", " i do not know ")
    text = text.replace("ffs", "for fuck sake")
    text = text.replace(" tho ", " though ")
    text = text.replace(" tf ", " the fuck ")
    text = text.replace(" bs ", " bullshit ")
    text = text.replace(" smh ", " shaking my head ")
    text = text.replace(" dei ", " diversity, equity, and inclusion ")
    text = text.replace("f*cked", 'fucked')
    text = text.replace("f*ked", 'fucked')
    text = text.replace("tha f", 'the fuck')
    return text

def third_clean(text):
    text = replace_punctuations(text) # remove all punctuations
    text = text.replace(" rip ", ' rest in peace ')
    text = text.replace(" im ", " i am ")
    text = text.replace(" don t ", " do not ")
    text = text.replace(" iwill ", " i will ")
    text = text.replace(" st ", " first ")
    text = text.replace(" u ", " you ")
    text = text.replace(" the f ", ' the fuck ')
    text = text.replace(" f ck ", ' fuck ')
    text = text.replace(" f ck ", ' fuck ')
    text = text.replace(" f  k ", ' fuck ')
    text = text.replace(" f  king", ' fucking ')
    text = text.replace(" f it ", ' fuck it ')
    text = text.strip()                         # delete space at the start and end of string
    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)  # remove special characters
    text = ' '.join(text.split())
    return text

def complete_clean(text):
    text = first_clean(text)
    text = second_clean(text)
    text = third_clean(text)
    text = text.strip()
    return text

def tokenize(texts, tokenizer, max_length=512):
  encoded = tokenizer(
            list(texts),
            padding=True,
            truncation=True,
            max_length=max_length,
            return_tensors="tf"
        )
  return encoded

def predict(sentence, model, tokenizer, distilbert=False):
  labels = ['Negative', 'Neutral', 'Positive']
  sentence = complete_clean(sentence)
  sentence = np.array([sentence])
  tokenized = tokenize(texts=sentence, tokenizer=tokenizer)
  predictions = None
  if distilbert:
      predictions = model([
        tokenized['input_ids'],
        tokenized['attention_mask']
        ])      
  else:    
      predictions = model([
        tokenized['input_ids'],
        tokenized['token_type_ids'], 
        tokenized['attention_mask']
        ])
  predictions = np.array(predictions[0])
  label = labels[np.argmax(predictions)]
  confidence = np.max(predictions)*100
  return label, confidence 

st.title("Sentiment Analysis with HuggingFace Spaces")

option = st.selectbox(
    'Choose a Model:',
    ['ALBERT-Base', 'DistilBERT-Base', 'BERT-Base']
)

directory_dict = {
    "ALBERT-Base":["src/models/albert_base/albert_tokenizer", "src/models/albert_base/albert_sentiment_model"],
    "DistilBERT-Base":["src/models/distilbert/distilbert_tokenizer", "src/models/distilbert/distilbert_sentiment_model"],
    "BERT-Base":["src/models/bert_base/bert_base_tokenizer", "src/models/bert_base/bert_base_sentiment_model"]
}

chosen_model = directory_dict[option]
if option == 'DistilBERT-Base':
    distilbert=True
else:
    distilbert=False
loaded_tokenizer = AutoTokenizer.from_pretrained(chosen_model[0], local_files_only=True)
loaded_model = tf.saved_model.load(
    chosen_model[1]
)

st.write("Enter a sentence to analyze its sentiment:")

user_input = st.text_input("")
if user_input:
    result, confidence = predict(user_input, model=loaded_model, tokenizer=loaded_tokenizer, distilbert=distilbert)
    st.write(f"Model Chosen : {option}")
    st.write(f"Sentiment    : {result}")
    st.write(f"Confidence   : {confidence:.2f}%")