| import pickle |
| import re |
| import string |
| import contractions |
| import gradio as gr |
| import nltk |
|
|
| from bs4 import BeautifulSoup |
| from nltk.tokenize import word_tokenize |
| from nltk.corpus import stopwords |
|
|
| from tensorflow.keras.models import load_model |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
|
| |
| |
| |
| nltk.download('stopwords', quiet=True) |
| nltk.download('punkt', quiet=True) |
| nltk.download('punkt_tab', quiet=True) |
|
|
| |
| |
| |
| MODEL_PATH = "bilstm_sentiment_model.keras" |
| TOKENIZER_PATH = "BiLSTM_tokenizer.pkl" |
|
|
| loaded_model = load_model(MODEL_PATH) |
|
|
| with open(TOKENIZER_PATH, "rb") as f: |
| loaded_tokenizer = pickle.load(f) |
|
|
| print("β
Model and Tokenizer loaded successfully") |
|
|
| |
| |
| |
| MAX_LEN = 200 |
| STOP_WORDS = set(stopwords.words("english")) |
|
|
| |
| |
| |
| def preprocess_text(text: str) -> str: |
|
|
| |
| text = BeautifulSoup(text, "html.parser").get_text() |
|
|
| |
| text = re.sub(r"http\S+|www\.\S+", "", text) |
|
|
| |
| text = text.replace("\u2019", "'").replace("\u2018", "'") |
| text = text.replace("\u201c", '"').replace("\u201d", '"') |
| text = text.replace("\u2013", "-").replace("\u2014", "-") |
| text = text.encode("ascii", errors="ignore").decode("ascii") |
|
|
| |
| text = contractions.fix(text) |
|
|
| |
| text = text.lower() |
|
|
| |
| text = text.translate(str.maketrans("", "", string.punctuation)) |
|
|
| |
| text = re.sub(r"\b\d+\b", "", text) |
|
|
| |
| text = re.sub(r"\s+", " ", text).strip() |
|
|
| |
| tokens = word_tokenize(text) |
| tokens = [word for word in tokens if word not in STOP_WORDS] |
|
|
| return " ".join(tokens) |
|
|
| |
| |
| |
| def predict_sentiment(review_text): |
|
|
| clean_text = preprocess_text(review_text) |
|
|
| seq = loaded_tokenizer.texts_to_sequences([clean_text]) |
|
|
| padded = pad_sequences( |
| seq, |
| maxlen=MAX_LEN, |
| padding="post", |
| truncating="post" |
| ) |
|
|
| score = float(loaded_model.predict(padded, verbose=0)[0][0]) |
|
|
| if score >= 0.5: |
| sentiment = "Positive π" |
| confidence = score * 100 |
| else: |
| sentiment = "Negative π" |
| confidence = (1 - score) * 100 |
|
|
| return ( |
| clean_text, |
| sentiment, |
| f"{confidence:.2f}%", |
| round(score, 4) |
| ) |
|
|
| |
| |
| |
| app = gr.Interface( |
| fn=predict_sentiment, |
| inputs=gr.Textbox( |
| lines=5, |
| placeholder="Enter a movie review...", |
| label="Movie Review" |
| ), |
| outputs=[ |
| gr.Textbox(label="Cleaned Text"), |
| gr.Textbox(label="Predicted Sentiment"), |
| gr.Textbox(label="Confidence"), |
| gr.Number(label="Raw Score") |
| ], |
| title="π¬ BiLSTM Movie Review Sentiment Analyzer", |
| description="Enter a movie review and the trained BiLSTM model will predict whether the sentiment is positive or negative." |
| ) |
|
|
| app.launch() |