File size: 7,029 Bytes
4a75943
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import streamlit as st
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os
from PIL import Image
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.pre_tokenizers import Whitespace
from collections import Counter
import torch
from torch.nn.utils.rnn import pad_sequence



st.set_page_config(layout="centered")

# Add custom CSS for background image and styling
# Add custom CSS for background image and styling
st.markdown("""
    <style>
        .stApp {
            background-image: url("");
            background-size: cover;
            background-position: center;
            background-repeat: no-repeat;
            height: auto;  /* Allows the page to expand for scrolling */
            overflow: auto;  /* Enables scrolling if the page content overflows */
            # position : relative
        }

        /* Adjust opacity of overlay to make content more visible */
        .stApp::before {
            content: "";
            position: absolute;
            top: 0;
            left: 0;
            width: 100%;
            height: 100%;
            background-color: rgba(255, 255, 255, 0.8);  /* Slightly higher opacity */
            z-index: -1;
        }

        /* Ensure content appears above the overlay */
        .stApp > * {
            position: relative;
            z-index: 2;
        }

        /* Ensure the dataframe is visible */
        .dataframe {
            background-color: rgba(255, 255, 255, 0.9) !important;
            z-index: 3;
        }

        /* Style text elements for better visibility */
        h1, h3, span, div {
            text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2);
        }
            
        /* Custom CSS for select box heading */
        div.stSelectbox > label {
            color: #000000 !important;  /* Change to your desired color */
            # background-color: black !important;  /* Background color of the dropdown */
            font-size: 24px !important;  /* Change font size */
            font-weight: bold !important;  /* Make text bold */
        }

        /* Custom CSS for image caption */
        .custom-caption {
            color: #000000 !important;  /* Change to your desired color */
            font-size: 24px !important;  /* Optional: Change font size */
            text-align: center;  /* Center-align the caption */
        }
            
        .stMainBlockContainer {
            background-color: white !important;  /* Background color of the dropdown */
        }

        .stTextArea{
            color: #000000 !important
        }
            
    </style>
""", unsafe_allow_html=True)


# Custom title styling functions
def colored_title(text, color):
    st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True)

def colored_subheader(text, color):
    st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True)

def colored_text(text, color):
    st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True)


embedding_dim = 128                        
hidden_units = 128                         
num_classes = 3                             

class RNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
        super(RNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_units, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        output, _ = self.rnn(x)
        x = output[:, -1, :]  # Use last timestep output
        x = self.fc(x)
        return F.softmax(x, dim=1)

@st.cache_resource
def load_model(vocab_size):

    model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes)
    try:
        state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu'))
        model.load_state_dict(state_dict)
        model.eval()
        return model
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")
        return None

@st.cache_data
def load_data():

    df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1')
    df = df.rename(columns={'0': 'labels', 'a': 'text'})
    texts = df['text'].values
    unique_classes = (df['labels'].unique())  # Ensure consistent order
    class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)}

    # Tokenization (basic whitespace tokenizer)
    def tokenize(text):
        return text.lower().split()

    # Build vocabulary based on word frequency (similar to Keras Tokenizer)
    word_counts = Counter()
    for text in texts:
        word_counts.update(tokenize(text))

    # Sort words by frequency (most common words get lower indices)
    sorted_words = [word for word, _ in word_counts.most_common()]

    # Create vocabulary mapping with <pad> and <unk> tokens
    vocab = {"<pad>": 0, "<unk>": 1}
    vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)})

    # Initialize tokenizer
    tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>"))
    tokenizer.pre_tokenizer = Whitespace()

    # Convert texts to sequences
    def text_to_sequence(texts):
        return [tokenizer.encode(text.lower()).ids for text in texts]

    X_train_seq = text_to_sequence(texts)
    max_len = max([len(seq) for seq in X_train_seq])
    # Convert each sequence to a tensor individually
    X_train_seq = [torch.tensor(seq) for seq in X_train_seq]

    # Padding the sequences correctly
    X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq]
    X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0)
    X_train_pad = X_train_pad_reversed.flip(1)
    vocab_size = len(tokenizer.get_vocab()) 

    return X_train_pad, texts, class_mapping, vocab_size


def main():
    colored_title("Text Classification using RNN", "black")

    # Load data
    X_test,texts,class_mapping, vocab_size = load_data()


    # Display test images for selection
    colored_subheader("Select a Row for Prediction:", "black")
    selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0)

    colored_text("Selected Text:","black")
    st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True)

    # Predict button
    if st.button("Predict"):
        model = load_model(vocab_size)
        if model is not None:
            with torch.no_grad():
                output = model(X_test[[selected_index]])
                predicted_class = torch.argmax(output, dim=1).item()

            # Display prediction result
            colored_subheader("Prediction Results:", "green")
            colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green") 
    

            
if __name__ == "__main__":
    main()