Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import numpy as np | |
| import os | |
| from PIL import Image | |
| from sklearn.preprocessing import StandardScaler, LabelEncoder | |
| from tokenizers import Tokenizer | |
| from tokenizers.models import WordLevel | |
| from tokenizers.pre_tokenizers import Whitespace | |
| from collections import Counter | |
| import torch | |
| from torch.nn.utils.rnn import pad_sequence | |
| st.set_page_config(layout="centered") | |
| # Add custom CSS for background image and styling | |
| # Add custom CSS for background image and styling | |
| st.markdown(""" | |
| <style> | |
| .stApp { | |
| background-image: url(""); | |
| background-size: cover; | |
| background-position: center; | |
| background-repeat: no-repeat; | |
| height: auto; /* Allows the page to expand for scrolling */ | |
| overflow: auto; /* Enables scrolling if the page content overflows */ | |
| # position : relative | |
| } | |
| /* Adjust opacity of overlay to make content more visible */ | |
| .stApp::before { | |
| content: ""; | |
| position: absolute; | |
| top: 0; | |
| left: 0; | |
| width: 100%; | |
| height: 100%; | |
| background-color: rgba(255, 255, 255, 0.8); /* Slightly higher opacity */ | |
| z-index: -1; | |
| } | |
| /* Ensure content appears above the overlay */ | |
| .stApp > * { | |
| position: relative; | |
| z-index: 2; | |
| } | |
| /* Ensure the dataframe is visible */ | |
| .dataframe { | |
| background-color: rgba(255, 255, 255, 0.9) !important; | |
| z-index: 3; | |
| } | |
| /* Style text elements for better visibility */ | |
| h1, h3, span, div { | |
| text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2); | |
| } | |
| /* Custom CSS for select box heading */ | |
| div.stSelectbox > label { | |
| color: #000000 !important; /* Change to your desired color */ | |
| # background-color: black !important; /* Background color of the dropdown */ | |
| font-size: 24px !important; /* Change font size */ | |
| font-weight: bold !important; /* Make text bold */ | |
| } | |
| /* Custom CSS for image caption */ | |
| .custom-caption { | |
| color: #000000 !important; /* Change to your desired color */ | |
| font-size: 24px !important; /* Optional: Change font size */ | |
| text-align: center; /* Center-align the caption */ | |
| } | |
| .stMainBlockContainer { | |
| background-color: white !important; /* Background color of the dropdown */ | |
| } | |
| .stTextArea{ | |
| color: #000000 !important | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Custom title styling functions | |
| def colored_title(text, color): | |
| st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True) | |
| def colored_subheader(text, color): | |
| st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True) | |
| def colored_text(text, color): | |
| st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True) | |
| embedding_dim = 128 | |
| hidden_units = 128 | |
| num_classes = 3 | |
| class RNNModel(nn.Module): | |
| def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes): | |
| super(RNNModel, self).__init__() | |
| self.embedding = nn.Embedding(vocab_size, embedding_dim) | |
| self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2) | |
| self.fc = nn.Linear(hidden_units, num_classes) | |
| def forward(self, x): | |
| x = self.embedding(x) | |
| output, _ = self.rnn(x) | |
| x = output[:, -1, :] # Use last timestep output | |
| x = self.fc(x) | |
| return F.softmax(x, dim=1) | |
| def load_model(vocab_size): | |
| model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes) | |
| try: | |
| state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu')) | |
| model.load_state_dict(state_dict) | |
| model.eval() | |
| return model | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| return None | |
| def load_data(): | |
| df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1') | |
| df = df.rename(columns={'0': 'labels', 'a': 'text'}) | |
| texts = df['text'].values | |
| unique_classes = (df['labels'].unique()) # Ensure consistent order | |
| class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)} | |
| # Tokenization (basic whitespace tokenizer) | |
| def tokenize(text): | |
| return text.lower().split() | |
| # Build vocabulary based on word frequency (similar to Keras Tokenizer) | |
| word_counts = Counter() | |
| for text in texts: | |
| word_counts.update(tokenize(text)) | |
| # Sort words by frequency (most common words get lower indices) | |
| sorted_words = [word for word, _ in word_counts.most_common()] | |
| # Create vocabulary mapping with <pad> and <unk> tokens | |
| vocab = {"<pad>": 0, "<unk>": 1} | |
| vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)}) | |
| # Initialize tokenizer | |
| tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>")) | |
| tokenizer.pre_tokenizer = Whitespace() | |
| # Convert texts to sequences | |
| def text_to_sequence(texts): | |
| return [tokenizer.encode(text.lower()).ids for text in texts] | |
| X_train_seq = text_to_sequence(texts) | |
| max_len = max([len(seq) for seq in X_train_seq]) | |
| # Convert each sequence to a tensor individually | |
| X_train_seq = [torch.tensor(seq) for seq in X_train_seq] | |
| # Padding the sequences correctly | |
| X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq] | |
| X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0) | |
| X_train_pad = X_train_pad_reversed.flip(1) | |
| vocab_size = len(tokenizer.get_vocab()) | |
| return X_train_pad, texts, class_mapping, vocab_size | |
| def main(): | |
| colored_title("Text Classification using RNN", "black") | |
| # Load data | |
| X_test,texts,class_mapping, vocab_size = load_data() | |
| # Display test images for selection | |
| colored_subheader("Select a Row for Prediction:", "black") | |
| selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0) | |
| colored_text("Selected Text:","black") | |
| st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True) | |
| # Predict button | |
| if st.button("Predict"): | |
| model = load_model(vocab_size) | |
| if model is not None: | |
| with torch.no_grad(): | |
| output = model(X_test[[selected_index]]) | |
| predicted_class = torch.argmax(output, dim=1).item() | |
| # Display prediction result | |
| colored_subheader("Prediction Results:", "green") | |
| colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green") | |
| if __name__ == "__main__": | |
| main() |