| import streamlit as st |
| import pandas as pd |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import numpy as np |
| import os |
| from PIL import Image |
| from sklearn.preprocessing import StandardScaler, LabelEncoder |
| from tokenizers import Tokenizer |
| from tokenizers.models import WordLevel |
| from tokenizers.pre_tokenizers import Whitespace |
| from collections import Counter |
| import torch |
| from torch.nn.utils.rnn import pad_sequence |
|
|
|
|
|
|
| st.set_page_config(layout="centered") |
|
|
| |
| |
| st.markdown(""" |
| <style> |
| .stApp { |
| background-image: url(""); |
| background-size: cover; |
| background-position: center; |
| background-repeat: no-repeat; |
| height: auto; /* Allows the page to expand for scrolling */ |
| overflow: auto; /* Enables scrolling if the page content overflows */ |
| # position : relative |
| } |
| |
| /* Adjust opacity of overlay to make content more visible */ |
| .stApp::before { |
| content: ""; |
| position: absolute; |
| top: 0; |
| left: 0; |
| width: 100%; |
| height: 100%; |
| background-color: rgba(255, 255, 255, 0.8); /* Slightly higher opacity */ |
| z-index: -1; |
| } |
| |
| /* Ensure content appears above the overlay */ |
| .stApp > * { |
| position: relative; |
| z-index: 2; |
| } |
| |
| /* Ensure the dataframe is visible */ |
| .dataframe { |
| background-color: rgba(255, 255, 255, 0.9) !important; |
| z-index: 3; |
| } |
| |
| /* Style text elements for better visibility */ |
| h1, h3, span, div { |
| text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2); |
| } |
| |
| /* Custom CSS for select box heading */ |
| div.stSelectbox > label { |
| color: #000000 !important; /* Change to your desired color */ |
| # background-color: black !important; /* Background color of the dropdown */ |
| font-size: 24px !important; /* Change font size */ |
| font-weight: bold !important; /* Make text bold */ |
| } |
| |
| /* Custom CSS for image caption */ |
| .custom-caption { |
| color: #000000 !important; /* Change to your desired color */ |
| font-size: 24px !important; /* Optional: Change font size */ |
| text-align: center; /* Center-align the caption */ |
| } |
| |
| .stMainBlockContainer { |
| background-color: white !important; /* Background color of the dropdown */ |
| } |
| |
| .stTextArea{ |
| color: #000000 !important |
| } |
| |
| </style> |
| """, unsafe_allow_html=True) |
|
|
|
|
| |
| def colored_title(text, color): |
| st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True) |
|
|
| def colored_subheader(text, color): |
| st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True) |
|
|
| def colored_text(text, color): |
| st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True) |
|
|
|
|
| embedding_dim = 128 |
| hidden_units = 128 |
| num_classes = 3 |
|
|
| class RNNModel(nn.Module): |
| def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes): |
| super(RNNModel, self).__init__() |
| self.embedding = nn.Embedding(vocab_size, embedding_dim) |
| self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2) |
| self.fc = nn.Linear(hidden_units, num_classes) |
|
|
| def forward(self, x): |
| x = self.embedding(x) |
| output, _ = self.rnn(x) |
| x = output[:, -1, :] |
| x = self.fc(x) |
| return F.softmax(x, dim=1) |
|
|
| @st.cache_resource |
| def load_model(vocab_size): |
|
|
| model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes) |
| try: |
| state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu')) |
| model.load_state_dict(state_dict) |
| model.eval() |
| return model |
| except Exception as e: |
| st.error(f"Error loading model: {str(e)}") |
| return None |
|
|
| @st.cache_data |
| def load_data(): |
|
|
| df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1') |
| df = df.rename(columns={'0': 'labels', 'a': 'text'}) |
| texts = df['text'].values |
| unique_classes = (df['labels'].unique()) |
| class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)} |
|
|
| |
| def tokenize(text): |
| return text.lower().split() |
|
|
| |
| word_counts = Counter() |
| for text in texts: |
| word_counts.update(tokenize(text)) |
|
|
| |
| sorted_words = [word for word, _ in word_counts.most_common()] |
|
|
| |
| vocab = {"<pad>": 0, "<unk>": 1} |
| vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)}) |
|
|
| |
| tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>")) |
| tokenizer.pre_tokenizer = Whitespace() |
|
|
| |
| def text_to_sequence(texts): |
| return [tokenizer.encode(text.lower()).ids for text in texts] |
|
|
| X_train_seq = text_to_sequence(texts) |
| max_len = max([len(seq) for seq in X_train_seq]) |
| |
| X_train_seq = [torch.tensor(seq) for seq in X_train_seq] |
|
|
| |
| X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq] |
| X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0) |
| X_train_pad = X_train_pad_reversed.flip(1) |
| vocab_size = len(tokenizer.get_vocab()) |
|
|
| return X_train_pad, texts, class_mapping, vocab_size |
|
|
|
|
| def main(): |
| colored_title("Text Classification using RNN", "black") |
|
|
| |
| X_test,texts,class_mapping, vocab_size = load_data() |
|
|
|
|
| |
| colored_subheader("Select a Row for Prediction:", "black") |
| selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0) |
|
|
| colored_text("Selected Text:","black") |
| st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True) |
|
|
| |
| if st.button("Predict"): |
| model = load_model(vocab_size) |
| if model is not None: |
| with torch.no_grad(): |
| output = model(X_test[[selected_index]]) |
| predicted_class = torch.argmax(output, dim=1).item() |
|
|
| |
| colored_subheader("Prediction Results:", "green") |
| colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green") |
| |
|
|
| |
| if __name__ == "__main__": |
| main() |