Spaces:
Build error
Build error
File size: 7,029 Bytes
4a75943 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import streamlit as st
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
from PIL import Image
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.pre_tokenizers import Whitespace
from collections import Counter
import torch
from torch.nn.utils.rnn import pad_sequence
st.set_page_config(layout="centered")
# Add custom CSS for background image and styling
# Add custom CSS for background image and styling
st.markdown("""
<style>
.stApp {
background-image: url("");
background-size: cover;
background-position: center;
background-repeat: no-repeat;
height: auto; /* Allows the page to expand for scrolling */
overflow: auto; /* Enables scrolling if the page content overflows */
# position : relative
}
/* Adjust opacity of overlay to make content more visible */
.stApp::before {
content: "";
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(255, 255, 255, 0.8); /* Slightly higher opacity */
z-index: -1;
}
/* Ensure content appears above the overlay */
.stApp > * {
position: relative;
z-index: 2;
}
/* Ensure the dataframe is visible */
.dataframe {
background-color: rgba(255, 255, 255, 0.9) !important;
z-index: 3;
}
/* Style text elements for better visibility */
h1, h3, span, div {
text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2);
}
/* Custom CSS for select box heading */
div.stSelectbox > label {
color: #000000 !important; /* Change to your desired color */
# background-color: black !important; /* Background color of the dropdown */
font-size: 24px !important; /* Change font size */
font-weight: bold !important; /* Make text bold */
}
/* Custom CSS for image caption */
.custom-caption {
color: #000000 !important; /* Change to your desired color */
font-size: 24px !important; /* Optional: Change font size */
text-align: center; /* Center-align the caption */
}
.stMainBlockContainer {
background-color: white !important; /* Background color of the dropdown */
}
.stTextArea{
color: #000000 !important
}
</style>
""", unsafe_allow_html=True)
# Custom title styling functions
def colored_title(text, color):
st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True)
def colored_subheader(text, color):
st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True)
def colored_text(text, color):
st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True)
embedding_dim = 128
hidden_units = 128
num_classes = 3
class RNNModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
super(RNNModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2)
self.fc = nn.Linear(hidden_units, num_classes)
def forward(self, x):
x = self.embedding(x)
output, _ = self.rnn(x)
x = output[:, -1, :] # Use last timestep output
x = self.fc(x)
return F.softmax(x, dim=1)
@st.cache_resource
def load_model(vocab_size):
model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes)
try:
state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu'))
model.load_state_dict(state_dict)
model.eval()
return model
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None
@st.cache_data
def load_data():
df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1')
df = df.rename(columns={'0': 'labels', 'a': 'text'})
texts = df['text'].values
unique_classes = (df['labels'].unique()) # Ensure consistent order
class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)}
# Tokenization (basic whitespace tokenizer)
def tokenize(text):
return text.lower().split()
# Build vocabulary based on word frequency (similar to Keras Tokenizer)
word_counts = Counter()
for text in texts:
word_counts.update(tokenize(text))
# Sort words by frequency (most common words get lower indices)
sorted_words = [word for word, _ in word_counts.most_common()]
# Create vocabulary mapping with <pad> and <unk> tokens
vocab = {"<pad>": 0, "<unk>": 1}
vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)})
# Initialize tokenizer
tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>"))
tokenizer.pre_tokenizer = Whitespace()
# Convert texts to sequences
def text_to_sequence(texts):
return [tokenizer.encode(text.lower()).ids for text in texts]
X_train_seq = text_to_sequence(texts)
max_len = max([len(seq) for seq in X_train_seq])
# Convert each sequence to a tensor individually
X_train_seq = [torch.tensor(seq) for seq in X_train_seq]
# Padding the sequences correctly
X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq]
X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0)
X_train_pad = X_train_pad_reversed.flip(1)
vocab_size = len(tokenizer.get_vocab())
return X_train_pad, texts, class_mapping, vocab_size
def main():
colored_title("Text Classification using RNN", "black")
# Load data
X_test,texts,class_mapping, vocab_size = load_data()
# Display test images for selection
colored_subheader("Select a Row for Prediction:", "black")
selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0)
colored_text("Selected Text:","black")
st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True)
# Predict button
if st.button("Predict"):
model = load_model(vocab_size)
if model is not None:
with torch.no_grad():
output = model(X_test[[selected_index]])
predicted_class = torch.argmax(output, dim=1).item()
# Display prediction result
colored_subheader("Prediction Results:", "green")
colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green")
if __name__ == "__main__":
main() |