YAMITEK's picture
Upload 5 files
4a75943 verified
import streamlit as st
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
from PIL import Image
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.pre_tokenizers import Whitespace
from collections import Counter
import torch
from torch.nn.utils.rnn import pad_sequence
st.set_page_config(layout="centered")
# Add custom CSS for background image and styling
# Add custom CSS for background image and styling
st.markdown("""
<style>
.stApp {
background-image: url("");
background-size: cover;
background-position: center;
background-repeat: no-repeat;
height: auto; /* Allows the page to expand for scrolling */
overflow: auto; /* Enables scrolling if the page content overflows */
# position : relative
}
/* Adjust opacity of overlay to make content more visible */
.stApp::before {
content: "";
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(255, 255, 255, 0.8); /* Slightly higher opacity */
z-index: -1;
}
/* Ensure content appears above the overlay */
.stApp > * {
position: relative;
z-index: 2;
}
/* Ensure the dataframe is visible */
.dataframe {
background-color: rgba(255, 255, 255, 0.9) !important;
z-index: 3;
}
/* Style text elements for better visibility */
h1, h3, span, div {
text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2);
}
/* Custom CSS for select box heading */
div.stSelectbox > label {
color: #000000 !important; /* Change to your desired color */
# background-color: black !important; /* Background color of the dropdown */
font-size: 24px !important; /* Change font size */
font-weight: bold !important; /* Make text bold */
}
/* Custom CSS for image caption */
.custom-caption {
color: #000000 !important; /* Change to your desired color */
font-size: 24px !important; /* Optional: Change font size */
text-align: center; /* Center-align the caption */
}
.stMainBlockContainer {
background-color: white !important; /* Background color of the dropdown */
}
.stTextArea{
color: #000000 !important
}
</style>
""", unsafe_allow_html=True)
# Custom title styling functions
def colored_title(text, color):
st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True)
def colored_subheader(text, color):
st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True)
def colored_text(text, color):
st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True)
embedding_dim = 128
hidden_units = 128
num_classes = 3
class RNNModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
super(RNNModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2)
self.fc = nn.Linear(hidden_units, num_classes)
def forward(self, x):
x = self.embedding(x)
output, _ = self.rnn(x)
x = output[:, -1, :] # Use last timestep output
x = self.fc(x)
return F.softmax(x, dim=1)
@st.cache_resource
def load_model(vocab_size):
model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes)
try:
state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu'))
model.load_state_dict(state_dict)
model.eval()
return model
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None
@st.cache_data
def load_data():
df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1')
df = df.rename(columns={'0': 'labels', 'a': 'text'})
texts = df['text'].values
unique_classes = (df['labels'].unique()) # Ensure consistent order
class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)}
# Tokenization (basic whitespace tokenizer)
def tokenize(text):
return text.lower().split()
# Build vocabulary based on word frequency (similar to Keras Tokenizer)
word_counts = Counter()
for text in texts:
word_counts.update(tokenize(text))
# Sort words by frequency (most common words get lower indices)
sorted_words = [word for word, _ in word_counts.most_common()]
# Create vocabulary mapping with <pad> and <unk> tokens
vocab = {"<pad>": 0, "<unk>": 1}
vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)})
# Initialize tokenizer
tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>"))
tokenizer.pre_tokenizer = Whitespace()
# Convert texts to sequences
def text_to_sequence(texts):
return [tokenizer.encode(text.lower()).ids for text in texts]
X_train_seq = text_to_sequence(texts)
max_len = max([len(seq) for seq in X_train_seq])
# Convert each sequence to a tensor individually
X_train_seq = [torch.tensor(seq) for seq in X_train_seq]
# Padding the sequences correctly
X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq]
X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0)
X_train_pad = X_train_pad_reversed.flip(1)
vocab_size = len(tokenizer.get_vocab())
return X_train_pad, texts, class_mapping, vocab_size
def main():
colored_title("Text Classification using RNN", "black")
# Load data
X_test,texts,class_mapping, vocab_size = load_data()
# Display test images for selection
colored_subheader("Select a Row for Prediction:", "black")
selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0)
colored_text("Selected Text:","black")
st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True)
# Predict button
if st.button("Predict"):
model = load_model(vocab_size)
if model is not None:
with torch.no_grad():
output = model(X_test[[selected_index]])
predicted_class = torch.argmax(output, dim=1).item()
# Display prediction result
colored_subheader("Prediction Results:", "green")
colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green")
if __name__ == "__main__":
main()