# prediction_compile.py
# Import Libraries
import streamlit as st
import re
import pickle
import joblib
import nltk
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow import keras
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

# --- Setup NLTK ---
nltk_data_path = os.path.join("/tmp", "nltk_data")
os.makedirs(nltk_data_path, exist_ok=True)
nltk.data.path.append(nltk_data_path)
nltk.download("stopwords", download_dir=nltk_data_path)
nltk.download("punkt", download_dir=nltk_data_path)

# --- Loading Info ---
st.markdown(
    '<p style="color:gray; font-size:14px; font-style:italic;">'
    'Loading models and resources from local storage... '
    'Please be patient and DO NOT refresh the page :)'
    '</p>',
    unsafe_allow_html=True
)

# --- Cached Loading Functions ---
@st.cache_resource
def load_sentiment_model():
    path = "./src/best_model.keras"
    return keras.models.load_model(path)

@st.cache_resource
def load_tokenizer_params():
    tokenizer_path = "./src/tokenizer.pkl"
    params_path = "./src/params.pkl"
    with open(tokenizer_path, "rb") as f:
        tokenizer = pickle.load(f)
    with open(params_path, "rb") as f:
        params = pickle.load(f)
    return tokenizer, params

@st.cache_resource
def load_topic_models():
    neg_path = "./src/fastopic_negative_model.pkl"
    pos_path = "./src/fastopic_positive_model.pkl"
    neg_model = joblib.load(neg_path)
    pos_model = joblib.load(pos_path)
    return neg_model, pos_model

# --- Load all resources once ---
sentiment_model = load_sentiment_model()
tokenizer, params = load_tokenizer_params()
topic_model_neg, topic_model_pos = load_topic_models()

max_len = params["max_len"]

# --- Preprocessing Function ---
negations = {"not", "no", "never"}
stpwrds_en = set(stopwords.words("english")) - negations
stemmer = PorterStemmer()

replacements = {
    "sia": "sq",
    "flown": "fly",
    "flew": "fly",
    "alway": "always",
    "boarding": "board",
    "told": "tell",
    "said": "say",
    "booked": "book",
    "paid": "pay",
    "well": "good",
    "aircraft": "plane"
}

def text_preprocessing(text):
    text = text.lower()
    text = re.sub(r"\\n", " ", text)
    text = text.strip()
    text = re.sub(r'[^a-z0-9\s]', ' ', text)
    tokens = word_tokenize(text)
    tokens = [replacements.get(word, word) for word in tokens]
    tokens = [word for word in tokens if word not in stpwrds_en]
    tokens = [stemmer.stem(word) for word in tokens]
    return "emptytext" if len(tokens) == 0 else ' '.join(tokens)

# --- Topic Labels ---
topic_labels_neg = {
    1: "meal and entertainment service",
    2: "refund, cancellation, and booking tickets policy",
    3: "business class/premium facility",
    4: "baggage limits and price",
    5: "hidden charges"
}

topic_labels_pos = {
    1: "good food and crew service",
    2: "excellent economy seat",
    3: "refund and cancellation policy",
    4: "meals quality",
    5: "accommodation and assistance"
}

# --- Streamlit App ---
def run():
    st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")

    st.markdown(
    """
    Enter a customer review below to predict sentiment and topic.
    """
    )

    with st.form(key='SQ-sentiment-analysis'):
        text = st.text_input('Customer Review', value='--customer review--')
        submitted = st.form_submit_button('Predict')

    if submitted:
        # Preprocess
        processed = text_preprocessing(text)
        seq = tokenizer.texts_to_sequences([processed])
        padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")

        # Sentiment Prediction
        pred_probs = sentiment_model.predict(padded)
        if pred_probs.shape[1] == 1:
            # Binary sigmoid
            p_pos = float(pred_probs[0][0])
            p_neg = 1 - p_pos
            sentiment_label = "Positive" if p_pos >= 0.5 else "Negative"
            confidence = max(p_pos, p_neg)
        else:
            # Softmax
            pred_class = np.argmax(pred_probs, axis=1)[0]
            label_map = {0: "Negative", 1: "Positive"}
            sentiment_label = label_map[pred_class]
            confidence = float(pred_probs[0][pred_class])

        color = "green" if sentiment_label == "Positive" else "red"
        st.markdown(
            f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
            f"Predicted Sentiment: {sentiment_label} "
            f"(Confidence: {confidence:.2f})</p>",
            unsafe_allow_html=True
        )

        # Topic Prediction
        st.write("### Topic Modeling")
        if sentiment_label == "Negative":
            probs = topic_model_neg.transform([text])[0]
            topic_id = int(np.argmax(probs)) + 1
            topic_name = topic_labels_neg.get(topic_id, "Unknown Topic")
            st.write("**Using Negative Model**")
        else:
            probs = topic_model_pos.transform([text])[0]
            topic_id = int(np.argmax(probs)) + 1
            topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
            st.write("**Using Positive Model**")

        # Output
        st.markdown(
            f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
            f"Topic {topic_id}: {topic_name}</p>",
            unsafe_allow_html=True
        )
        st.write("**Probabilities:**", probs.tolist())

if __name__ == "__main__":
    run()