import streamlit as st import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F import numpy as np import os from PIL import Image from sklearn.preprocessing import StandardScaler, LabelEncoder from tokenizers import Tokenizer from tokenizers.models import WordLevel from tokenizers.pre_tokenizers import Whitespace from collections import Counter import torch from torch.nn.utils.rnn import pad_sequence st.set_page_config(layout="centered") # Add custom CSS for background image and styling # Add custom CSS for background image and styling st.markdown(""" """, unsafe_allow_html=True) # Custom title styling functions def colored_title(text, color): st.markdown(f"

{text}

", unsafe_allow_html=True) def colored_subheader(text, color): st.markdown(f"

{text}

", unsafe_allow_html=True) def colored_text(text, color): st.markdown(f"{text}", unsafe_allow_html=True) embedding_dim = 128 hidden_units = 128 num_classes = 3 class RNNModel(nn.Module): def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes): super(RNNModel, self).__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim) self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2) self.fc = nn.Linear(hidden_units, num_classes) def forward(self, x): x = self.embedding(x) output, _ = self.rnn(x) x = output[:, -1, :] # Use last timestep output x = self.fc(x) return F.softmax(x, dim=1) @st.cache_resource def load_model(vocab_size): model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes) try: state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu')) model.load_state_dict(state_dict) model.eval() return model except Exception as e: st.error(f"Error loading model: {str(e)}") return None @st.cache_data def load_data(): df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1') df = df.rename(columns={'0': 'labels', 'a': 'text'}) texts = df['text'].values unique_classes = (df['labels'].unique()) # Ensure consistent order class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)} # Tokenization (basic whitespace tokenizer) def tokenize(text): return text.lower().split() # Build vocabulary based on word frequency (similar to Keras Tokenizer) word_counts = Counter() for text in texts: word_counts.update(tokenize(text)) # Sort words by frequency (most common words get lower indices) sorted_words = [word for word, _ in word_counts.most_common()] # Create vocabulary mapping with and tokens vocab = {"": 0, "": 1} vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)}) # Initialize tokenizer tokenizer = Tokenizer(WordLevel(vocab, unk_token="")) tokenizer.pre_tokenizer = Whitespace() # Convert texts to sequences def text_to_sequence(texts): return [tokenizer.encode(text.lower()).ids for text in texts] X_train_seq = text_to_sequence(texts) max_len = max([len(seq) for seq in X_train_seq]) # Convert each sequence to a tensor individually X_train_seq = [torch.tensor(seq) for seq in X_train_seq] # Padding the sequences correctly X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq] X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0) X_train_pad = X_train_pad_reversed.flip(1) vocab_size = len(tokenizer.get_vocab()) return X_train_pad, texts, class_mapping, vocab_size def main(): colored_title("Text Classification using RNN", "black") # Load data X_test,texts,class_mapping, vocab_size = load_data() # Display test images for selection colored_subheader("Select a Row for Prediction:", "black") selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0) colored_text("Selected Text:","black") st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True) # Predict button if st.button("Predict"): model = load_model(vocab_size) if model is not None: with torch.no_grad(): output = model(X_test[[selected_index]]) predicted_class = torch.argmax(output, dim=1).item() # Display prediction result colored_subheader("Prediction Results:", "green") colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green") if __name__ == "__main__": main()