Spaces:

shukdev3
/

random

Running

App Files Files Community

random / app.py

shukdev3

Update app.py

0647b9e verified 21 days ago

Raw

History Blame Contribute Delete

3.63 kB

	import pickle
	import re
	import string
	import contractions
	import gradio as gr
	import nltk

	from bs4 import BeautifulSoup
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords

	from tensorflow.keras.models import load_model
	from tensorflow.keras.preprocessing.sequence import pad_sequences

	# --------------------------------------------------
	# NLTK Downloads
	# --------------------------------------------------
	nltk.download('stopwords', quiet=True)
	nltk.download('punkt', quiet=True)
	nltk.download('punkt_tab', quiet=True)

	# --------------------------------------------------
	# Load Model and Tokenizer
	# --------------------------------------------------
	MODEL_PATH = "bilstm_sentiment_model.keras"
	TOKENIZER_PATH = "BiLSTM_tokenizer.pkl"

	loaded_model = load_model(MODEL_PATH)

	with open(TOKENIZER_PATH, "rb") as f:
	loaded_tokenizer = pickle.load(f)

	print("✅ Model and Tokenizer loaded successfully")

	# --------------------------------------------------
	# Constants
	# --------------------------------------------------
	MAX_LEN = 200
	STOP_WORDS = set(stopwords.words("english"))

	# --------------------------------------------------
	# Text Preprocessing
	# --------------------------------------------------
	def preprocess_text(text: str) -> str:

	# Remove HTML
	text = BeautifulSoup(text, "html.parser").get_text()

	# Remove URLs
	text = re.sub(r"http\S+\|www\.\S+", "", text)

	# Normalize special characters
	text = text.replace("\u2019", "'").replace("\u2018", "'")
	text = text.replace("\u201c", '"').replace("\u201d", '"')
	text = text.replace("\u2013", "-").replace("\u2014", "-")
	text = text.encode("ascii", errors="ignore").decode("ascii")

	# Expand contractions
	text = contractions.fix(text)

	# Lowercase
	text = text.lower()

	# Remove punctuation
	text = text.translate(str.maketrans("", "", string.punctuation))

	# Remove numbers
	text = re.sub(r"\b\d+\b", "", text)

	# Remove extra spaces
	text = re.sub(r"\s+", " ", text).strip()

	# Tokenize and remove stopwords
	tokens = word_tokenize(text)
	tokens = [word for word in tokens if word not in STOP_WORDS]

	return " ".join(tokens)

	# --------------------------------------------------
	# Prediction Function
	# --------------------------------------------------
	def predict_sentiment(review_text):

	clean_text = preprocess_text(review_text)

	seq = loaded_tokenizer.texts_to_sequences([clean_text])

	padded = pad_sequences(
	seq,
	maxlen=MAX_LEN,
	padding="post",
	truncating="post"
	)

	score = float(loaded_model.predict(padded, verbose=0)[0][0])

	if score >= 0.5:
	sentiment = "Positive 😊"
	confidence = score * 100
	else:
	sentiment = "Negative 😞"
	confidence = (1 - score) * 100

	return (
	clean_text,
	sentiment,
	f"{confidence:.2f}%",
	round(score, 4)
	)

	# --------------------------------------------------
	# Gradio UI
	# --------------------------------------------------
	app = gr.Interface(
	fn=predict_sentiment,
	inputs=gr.Textbox(
	lines=5,
	placeholder="Enter a movie review...",
	label="Movie Review"
	),
	outputs=[
	gr.Textbox(label="Cleaned Text"),
	gr.Textbox(label="Predicted Sentiment"),
	gr.Textbox(label="Confidence"),
	gr.Number(label="Raw Score")
	],
	title="🎬 BiLSTM Movie Review Sentiment Analyzer",
	description="Enter a movie review and the trained BiLSTM model will predict whether the sentiment is positive or negative."
	)

	app.launch()