Sentiment_Analysis_And_Topic_Modelling

Sleeping

App Files Files Community

Sentiment_Analysis_And_Topic_Modelling / src /prediction_compile.py

hanantonio

Upload 3 files

1dffecd verified 6 months ago

raw

history blame

5.7 kB

	# prediction_compile.py
	# Import Libraries
	import streamlit as st
	import re
	import pickle
	import joblib
	import nltk
	import os
	import numpy as np
	import pandas as pd
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow import keras
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.stem import PorterStemmer

	# --- Setup NLTK ---
	nltk_data_path = os.path.join("/tmp", "nltk_data")
	os.makedirs(nltk_data_path, exist_ok=True)
	nltk.data.path.append(nltk_data_path)
	nltk.download("stopwords", download_dir=nltk_data_path)
	nltk.download("punkt", download_dir=nltk_data_path)

	# --- Loading Info ---
	st.markdown(
	'<p style="color:gray; font-size:14px; font-style:italic;">'
	'Loading models and resources from local storage... '
	'Please be patient and DO NOT refresh the page :)'
	'</p>',
	unsafe_allow_html=True
	)

	# --- Cached Loading Functions ---
	@st.cache_resource
	def load_sentiment_model():
	path = "./src/best_model.keras"
	return keras.models.load_model(path)

	@st.cache_resource
	def load_tokenizer_params():
	tokenizer_path = "./src/tokenizer.pkl"
	params_path = "./src/params.pkl"
	with open(tokenizer_path, "rb") as f:
	tokenizer = pickle.load(f)
	with open(params_path, "rb") as f:
	params = pickle.load(f)
	return tokenizer, params

	@st.cache_resource
	def load_topic_models():
	neg_path = "./src/fastopic_negative_model.pkl"
	pos_path = "./src/fastopic_positive_model.pkl"
	neg_model = joblib.load(neg_path)
	pos_model = joblib.load(pos_path)
	return neg_model, pos_model

	# --- Load all resources once ---
	sentiment_model = load_sentiment_model()
	tokenizer, params = load_tokenizer_params()
	topic_model_neg, topic_model_pos = load_topic_models()

	max_len = params["max_len"]

	# --- Preprocessing Function ---
	negations = {"not", "no", "never"}
	stpwrds_en = set(stopwords.words("english")) - negations
	stemmer = PorterStemmer()

	replacements = {
	"sia": "sq",
	"flown": "fly",
	"flew": "fly",
	"alway": "always",
	"boarding": "board",
	"told": "tell",
	"said": "say",
	"booked": "book",
	"paid": "pay",
	"well": "good",
	"aircraft": "plane"
	}

	def text_preprocessing(text):
	text = text.lower()
	text = re.sub(r"\\n", " ", text)
	text = text.strip()
	text = re.sub(r'[^a-z0-9\s]', ' ', text)
	tokens = word_tokenize(text)
	tokens = [replacements.get(word, word) for word in tokens]
	tokens = [word for word in tokens if word not in stpwrds_en]
	tokens = [stemmer.stem(word) for word in tokens]
	return "emptytext" if len(tokens) == 0 else ' '.join(tokens)

	# --- Topic Labels ---
	topic_labels_neg = {
	1: "meal and entertainment service",
	2: "refund, cancellation, and booking tickets policy",
	3: "business class/premium facility",
	4: "baggage limits and price",
	5: "hidden charges"
	}

	topic_labels_pos = {
	1: "good food and crew service",
	2: "excellent economy seat",
	3: "refund and cancellation policy",
	4: "meals quality",
	5: "accommodation and assistance"
	}

	# --- Streamlit App ---
	def run():
	st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")

	st.markdown(
	"""
	Enter a customer review below to predict sentiment and topic.
	"""
	)

	with st.form(key='SQ-sentiment-analysis'):
	text = st.text_input('Customer Review', value='--customer review--')
	submitted = st.form_submit_button('Predict')

	if submitted:
	# Preprocess
	processed = text_preprocessing(text)
	seq = tokenizer.texts_to_sequences([processed])
	padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")

	# Sentiment Prediction
	pred_probs = sentiment_model.predict(padded)
	if pred_probs.shape[1] == 1:
	# Binary sigmoid
	p_pos = float(pred_probs[0][0])
	p_neg = 1 - p_pos
	sentiment_label = "Positive" if p_pos >= 0.5 else "Negative"
	confidence = max(p_pos, p_neg)
	else:
	# Softmax
	pred_class = np.argmax(pred_probs, axis=1)[0]
	label_map = {0: "Negative", 1: "Positive"}
	sentiment_label = label_map[pred_class]
	confidence = float(pred_probs[0][pred_class])

	color = "green" if sentiment_label == "Positive" else "red"
	st.markdown(
	f"<p style='font-size:22px; font-weight:bold; color:{color};'>"
	f"Predicted Sentiment: {sentiment_label} "
	f"(Confidence: {confidence:.2f})</p>",
	unsafe_allow_html=True
	)

	# Topic Prediction
	st.write("### Topic Modeling")
	if sentiment_label == "Negative":
	probs = topic_model_neg.transform([text])[0]
	topic_id = int(np.argmax(probs)) + 1
	topic_name = topic_labels_neg.get(topic_id, "Unknown Topic")
	st.write("Using Negative Model")
	else:
	probs = topic_model_pos.transform([text])[0]
	topic_id = int(np.argmax(probs)) + 1
	topic_name = topic_labels_pos.get(topic_id, "Unknown Topic")
	st.write("Using Positive Model")

	# Output
	st.markdown(
	f"<p style='font-size:20px; font-weight:bold; color:{color};'>"
	f"Topic {topic_id}: {topic_name}</p>",
	unsafe_allow_html=True
	)
	st.write("Probabilities:", probs.tolist())

	if __name__ == "__main__":
	run()