import pickle import re import string import contractions import gradio as gr import nltk from bs4 import BeautifulSoup from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.sequence import pad_sequences # -------------------------------------------------- # NLTK Downloads # -------------------------------------------------- nltk.download('stopwords', quiet=True) nltk.download('punkt', quiet=True) nltk.download('punkt_tab', quiet=True) # -------------------------------------------------- # Load Model and Tokenizer # -------------------------------------------------- MODEL_PATH = "bilstm_sentiment_model.keras" TOKENIZER_PATH = "BiLSTM_tokenizer.pkl" loaded_model = load_model(MODEL_PATH) with open(TOKENIZER_PATH, "rb") as f: loaded_tokenizer = pickle.load(f) print("✅ Model and Tokenizer loaded successfully") # -------------------------------------------------- # Constants # -------------------------------------------------- MAX_LEN = 200 STOP_WORDS = set(stopwords.words("english")) # -------------------------------------------------- # Text Preprocessing # -------------------------------------------------- def preprocess_text(text: str) -> str: # Remove HTML text = BeautifulSoup(text, "html.parser").get_text() # Remove URLs text = re.sub(r"http\S+|www\.\S+", "", text) # Normalize special characters text = text.replace("\u2019", "'").replace("\u2018", "'") text = text.replace("\u201c", '"').replace("\u201d", '"') text = text.replace("\u2013", "-").replace("\u2014", "-") text = text.encode("ascii", errors="ignore").decode("ascii") # Expand contractions text = contractions.fix(text) # Lowercase text = text.lower() # Remove punctuation text = text.translate(str.maketrans("", "", string.punctuation)) # Remove numbers text = re.sub(r"\b\d+\b", "", text) # Remove extra spaces text = re.sub(r"\s+", " ", text).strip() # Tokenize and remove stopwords tokens = word_tokenize(text) tokens = [word for word in tokens if word not in STOP_WORDS] return " ".join(tokens) # -------------------------------------------------- # Prediction Function # -------------------------------------------------- def predict_sentiment(review_text): clean_text = preprocess_text(review_text) seq = loaded_tokenizer.texts_to_sequences([clean_text]) padded = pad_sequences( seq, maxlen=MAX_LEN, padding="post", truncating="post" ) score = float(loaded_model.predict(padded, verbose=0)[0][0]) if score >= 0.5: sentiment = "Positive 😊" confidence = score * 100 else: sentiment = "Negative 😞" confidence = (1 - score) * 100 return ( clean_text, sentiment, f"{confidence:.2f}%", round(score, 4) ) # -------------------------------------------------- # Gradio UI # -------------------------------------------------- app = gr.Interface( fn=predict_sentiment, inputs=gr.Textbox( lines=5, placeholder="Enter a movie review...", label="Movie Review" ), outputs=[ gr.Textbox(label="Cleaned Text"), gr.Textbox(label="Predicted Sentiment"), gr.Textbox(label="Confidence"), gr.Number(label="Raw Score") ], title="🎬 BiLSTM Movie Review Sentiment Analyzer", description="Enter a movie review and the trained BiLSTM model will predict whether the sentiment is positive or negative." ) app.launch()