|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import torch |
|
|
import torch.nn as nn |
|
|
import torch.nn.functional as F |
|
|
import numpy as np |
|
|
import os |
|
|
from PIL import Image |
|
|
from sklearn.preprocessing import StandardScaler, LabelEncoder |
|
|
from tokenizers import Tokenizer |
|
|
from tokenizers.models import WordLevel |
|
|
from tokenizers.pre_tokenizers import Whitespace |
|
|
from collections import Counter |
|
|
import torch |
|
|
from torch.nn.utils.rnn import pad_sequence |
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(layout="centered") |
|
|
|
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.stApp { |
|
|
background-image: url(""); |
|
|
background-size: cover; |
|
|
background-position: center; |
|
|
background-repeat: no-repeat; |
|
|
height: auto; /* Allows the page to expand for scrolling */ |
|
|
overflow: auto; /* Enables scrolling if the page content overflows */ |
|
|
# position : relative |
|
|
} |
|
|
|
|
|
/* Adjust opacity of overlay to make content more visible */ |
|
|
.stApp::before { |
|
|
content: ""; |
|
|
position: absolute; |
|
|
top: 0; |
|
|
left: 0; |
|
|
width: 100%; |
|
|
height: 100%; |
|
|
background-color: rgba(255, 255, 255, 0.8); /* Slightly higher opacity */ |
|
|
z-index: -1; |
|
|
} |
|
|
|
|
|
/* Ensure content appears above the overlay */ |
|
|
.stApp > * { |
|
|
position: relative; |
|
|
z-index: 2; |
|
|
} |
|
|
|
|
|
/* Ensure the dataframe is visible */ |
|
|
.dataframe { |
|
|
background-color: rgba(255, 255, 255, 0.9) !important; |
|
|
z-index: 3; |
|
|
} |
|
|
|
|
|
/* Style text elements for better visibility */ |
|
|
h1, h3, span, div { |
|
|
text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2); |
|
|
} |
|
|
|
|
|
/* Custom CSS for select box heading */ |
|
|
div.stSelectbox > label { |
|
|
color: #000000 !important; /* Change to your desired color */ |
|
|
# background-color: black !important; /* Background color of the dropdown */ |
|
|
font-size: 24px !important; /* Change font size */ |
|
|
font-weight: bold !important; /* Make text bold */ |
|
|
} |
|
|
|
|
|
/* Custom CSS for image caption */ |
|
|
.custom-caption { |
|
|
color: #000000 !important; /* Change to your desired color */ |
|
|
font-size: 24px !important; /* Optional: Change font size */ |
|
|
text-align: center; /* Center-align the caption */ |
|
|
} |
|
|
|
|
|
.stMainBlockContainer { |
|
|
background-color: white !important; /* Background color of the dropdown */ |
|
|
} |
|
|
|
|
|
.stTextArea{ |
|
|
color: #000000 !important |
|
|
} |
|
|
|
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
|
|
def colored_title(text, color): |
|
|
st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True) |
|
|
|
|
|
def colored_subheader(text, color): |
|
|
st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True) |
|
|
|
|
|
def colored_text(text, color): |
|
|
st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
embedding_dim = 128 |
|
|
hidden_units = 128 |
|
|
num_classes = 3 |
|
|
|
|
|
class RNNModel(nn.Module): |
|
|
def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes): |
|
|
super(RNNModel, self).__init__() |
|
|
self.embedding = nn.Embedding(vocab_size, embedding_dim) |
|
|
self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2) |
|
|
self.fc = nn.Linear(hidden_units, num_classes) |
|
|
|
|
|
def forward(self, x): |
|
|
x = self.embedding(x) |
|
|
output, _ = self.rnn(x) |
|
|
x = output[:, -1, :] |
|
|
x = self.fc(x) |
|
|
return F.softmax(x, dim=1) |
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(vocab_size): |
|
|
|
|
|
model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes) |
|
|
try: |
|
|
state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu')) |
|
|
model.load_state_dict(state_dict) |
|
|
model.eval() |
|
|
return model |
|
|
except Exception as e: |
|
|
st.error(f"Error loading model: {str(e)}") |
|
|
return None |
|
|
|
|
|
@st.cache_data |
|
|
def load_data(): |
|
|
|
|
|
df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1') |
|
|
df = df.rename(columns={'0': 'labels', 'a': 'text'}) |
|
|
texts = df['text'].values |
|
|
unique_classes = (df['labels'].unique()) |
|
|
class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)} |
|
|
|
|
|
|
|
|
def tokenize(text): |
|
|
return text.lower().split() |
|
|
|
|
|
|
|
|
word_counts = Counter() |
|
|
for text in texts: |
|
|
word_counts.update(tokenize(text)) |
|
|
|
|
|
|
|
|
sorted_words = [word for word, _ in word_counts.most_common()] |
|
|
|
|
|
|
|
|
vocab = {"<pad>": 0, "<unk>": 1} |
|
|
vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)}) |
|
|
|
|
|
|
|
|
tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>")) |
|
|
tokenizer.pre_tokenizer = Whitespace() |
|
|
|
|
|
|
|
|
def text_to_sequence(texts): |
|
|
return [tokenizer.encode(text.lower()).ids for text in texts] |
|
|
|
|
|
X_train_seq = text_to_sequence(texts) |
|
|
max_len = max([len(seq) for seq in X_train_seq]) |
|
|
|
|
|
X_train_seq = [torch.tensor(seq) for seq in X_train_seq] |
|
|
|
|
|
|
|
|
X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq] |
|
|
X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0) |
|
|
X_train_pad = X_train_pad_reversed.flip(1) |
|
|
vocab_size = len(tokenizer.get_vocab()) |
|
|
|
|
|
return X_train_pad, texts, class_mapping, vocab_size |
|
|
|
|
|
|
|
|
def main(): |
|
|
colored_title("Text Classification using RNN", "black") |
|
|
|
|
|
|
|
|
X_test,texts,class_mapping, vocab_size = load_data() |
|
|
|
|
|
|
|
|
|
|
|
colored_subheader("Select a Row for Prediction:", "black") |
|
|
selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0) |
|
|
|
|
|
colored_text("Selected Text:","black") |
|
|
st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True) |
|
|
|
|
|
|
|
|
if st.button("Predict"): |
|
|
model = load_model(vocab_size) |
|
|
if model is not None: |
|
|
with torch.no_grad(): |
|
|
output = model(X_test[[selected_index]]) |
|
|
predicted_class = torch.argmax(output, dim=1).item() |
|
|
|
|
|
|
|
|
colored_subheader("Prediction Results:", "green") |
|
|
colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green") |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |