Sentiment_Analysis_And_Topic_Modelling

Sleeping

App Files Files Community

Sentiment_Analysis_And_Topic_Modelling / src /prediction_compile.py

hanantonio

Upload 2 files

bd862d2 verified 7 months ago

raw

history blame

4.59 kB

	import streamlit as st
	import os
	import re
	import pickle
	import joblib
	import nltk
	import numpy as np
	import pandas as pd
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow import keras
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from nltk.stem import PorterStemmer
	from huggingface_hub import hf_hub_download

	# =============================================
	# Setup NLTK with container-safe path
	# =============================================
	nltk_data_path = os.path.join("/tmp", "nltk_data")
	os.makedirs(nltk_data_path, exist_ok=True)
	nltk.data.path.append(nltk_data_path)
	nltk.download("stopwords", download_dir=nltk_data_path)
	nltk.download("punkt", download_dir=nltk_data_path)

	# =============================================
	# HF Hub repo
	# =============================================
	repo_id = "BesottenJenny/acre-sentiment-models"

	# =============================================
	# Cached loading functions
	# =============================================
	@st.cache_resource
	def load_sentiment_model():
	path = hf_hub_download(repo_id=repo_id, filename="best_model.keras")
	return keras.models.load_model(path)

	@st.cache_resource
	def load_tokenizer_params():
	tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.pkl")
	params_path = hf_hub_download(repo_id=repo_id, filename="params.pkl")
	with open(tokenizer_path, "rb") as f:
	tokenizer = pickle.load(f)
	with open(params_path, "rb") as f:
	params = pickle.load(f)
	return tokenizer, params

	@st.cache_resource
	def load_topic_models():
	neg_path = hf_hub_download(repo_id=repo_id, filename="fastopic_negative_model.pkl")
	pos_path = hf_hub_download(repo_id=repo_id, filename="fastopic_positive_model.pkl")
	neg_model = joblib.load(neg_path)
	pos_model = joblib.load(pos_path)
	return neg_model, pos_model

	# Load models
	sentiment_model = load_sentiment_model()
	tokenizer, params = load_tokenizer_params()
	topic_model_neg, topic_model_pos = load_topic_models()
	max_len = params["max_len"]

	# =============================================
	# Text preprocessing
	# =============================================
	negations = {"not", "no", "never"}
	stpwrds_en = set(stopwords.words("english")) - negations
	stemmer = PorterStemmer()

	replacements = {
	"sia": "sq",
	"flown": "fly",
	"flew": "fly",
	"alway": "always",
	"boarding": "board",
	"told": "tell",
	"said": "say",
	"booked": "book",
	"paid": "pay",
	"well": "good",
	"aircraft": "plane"
	}

	def text_preprocessing(text):
	text = text.lower()
	text = re.sub(r"\n", " ", text)
	text = text.strip()
	text = re.sub(r'[^a-z0-9\s]', ' ', text)
	tokens = word_tokenize(text)
	tokens = [replacements.get(word, word) for word in tokens]
	tokens = [word for word in tokens if word not in stpwrds_en]
	tokens = [stemmer.stem(word) for word in tokens]
	return "emptytext" if len(tokens) == 0 else ' '.join(tokens)

	# =============================================
	# Streamlit App
	# =============================================
	def run():
	st.title("ACRE - Automated Customer Review Analysis")
	st.subheader("Sentiment & Topic Prediction for SQ Customer Reviews")
	st.markdown(
	"Enter a customer review below to predict sentiment and topic."
	)

	with st.form(key='review_form'):
	text = st.text_area("Customer Review", value="--customer review--")
	submitted = st.form_submit_button("Predict")

	if submitted:
	processed = text_preprocessing(text)
	seq = tokenizer.texts_to_sequences([processed])
	padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")

	# Sentiment
	pred_probs = sentiment_model.predict(padded)
	pred_class = np.argmax(pred_probs, axis=1)[0]
	confidence = float(np.max(pred_probs))
	label_map = {0: "Negative", 1: "Positive"}
	sentiment_label = label_map[pred_class]

	st.write(f"Sentiment: {sentiment_label} (Confidence: {confidence:.2f})")

	# Topic Modeling
	if sentiment_label == "Negative":
	result = topic_model_neg.transform([text])
	else:
	result = topic_model_pos.transform([text])

	topics, probs = result
	st.write(f"Topic ID(s): {topics}")
	st.write(f"Probabilities: {probs.tolist()}")

	if __name__ == "__main__":
	run()