Spaces:

saikiranmansa
/

Text_Classification_using_RNN

Sleeping

App Files Files Community

Text_Classification_using_RNN / app.py

saikiranmansa

Update app.py

bd9cf00 verified 12 months ago

raw

history blame contribute delete

7.03 kB

	import streamlit as st
	import pandas as pd
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import numpy as np
	import os
	from PIL import Image
	from sklearn.preprocessing import StandardScaler, LabelEncoder
	from tokenizers import Tokenizer
	from tokenizers.models import WordLevel
	from tokenizers.pre_tokenizers import Whitespace
	from collections import Counter
	import torch
	from torch.nn.utils.rnn import pad_sequence



	st.set_page_config(layout="centered")

	# Add custom CSS for background image and styling
	# Add custom CSS for background image and styling
	st.markdown("""
	<style>
	.stApp {
	background-image: url("");
	background-size: cover;
	background-position: center;
	background-repeat: no-repeat;
	height: auto; /* Allows the page to expand for scrolling */
	overflow: auto; /* Enables scrolling if the page content overflows */
	# position : relative
	}

	/* Adjust opacity of overlay to make content more visible */
	.stApp::before {
	content: "";
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-color: rgba(255, 255, 255, 0.8); /* Slightly higher opacity */
	z-index: -1;
	}

	/* Ensure content appears above the overlay */
	.stApp > * {
	position: relative;
	z-index: 2;
	}

	/* Ensure the dataframe is visible */
	.dataframe {
	background-color: rgba(255, 255, 255, 0.9) !important;
	z-index: 3;
	}

	/* Style text elements for better visibility */
	h1, h3, span, div {
	text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2);
	}

	/* Custom CSS for select box heading */
	div.stSelectbox > label {
	color: #000000 !important; /* Change to your desired color */
	# background-color: black !important; /* Background color of the dropdown */
	font-size: 24px !important; /* Change font size */
	font-weight: bold !important; /* Make text bold */
	}

	/* Custom CSS for image caption */
	.custom-caption {
	color: #000000 !important; /* Change to your desired color */
	font-size: 24px !important; /* Optional: Change font size */
	text-align: center; /* Center-align the caption */
	}

	.stMainBlockContainer {
	background-color: white !important; /* Background color of the dropdown */
	}

	.stTextArea{
	color: #000000 !important
	}

	</style>
	""", unsafe_allow_html=True)


	# Custom title styling functions
	def colored_title(text, color):
	st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True)

	def colored_subheader(text, color):
	st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True)

	def colored_text(text, color):
	st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True)


	embedding_dim = 128
	hidden_units = 128
	num_classes = 3

	class RNNModel(nn.Module):
	def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
	super(RNNModel, self).__init__()
	self.embedding = nn.Embedding(vocab_size, embedding_dim)
	self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2)
	self.fc = nn.Linear(hidden_units, num_classes)

	def forward(self, x):
	x = self.embedding(x)
	output, _ = self.rnn(x)
	x = output[:, -1, :] # Use last timestep output
	x = self.fc(x)
	return F.softmax(x, dim=1)

	@st.cache_resource
	def load_model(vocab_size):

	model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes)
	try:
	state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu'))
	model.load_state_dict(state_dict)
	model.eval()
	return model
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return None

	@st.cache_data
	def load_data():

	df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1')
	df = df.rename(columns={'0': 'labels', 'a': 'text'})
	texts = df['text'].values
	unique_classes = (df['labels'].unique()) # Ensure consistent order
	class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)}

	# Tokenization (basic whitespace tokenizer)
	def tokenize(text):
	return text.lower().split()

	# Build vocabulary based on word frequency (similar to Keras Tokenizer)
	word_counts = Counter()
	for text in texts:
	word_counts.update(tokenize(text))

	# Sort words by frequency (most common words get lower indices)
	sorted_words = [word for word, _ in word_counts.most_common()]

	# Create vocabulary mapping with <pad> and <unk> tokens
	vocab = {"<pad>": 0, "<unk>": 1}
	vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)})

	# Initialize tokenizer
	tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>"))
	tokenizer.pre_tokenizer = Whitespace()

	# Convert texts to sequences
	def text_to_sequence(texts):
	return [tokenizer.encode(text.lower()).ids for text in texts]

	X_train_seq = text_to_sequence(texts)
	max_len = max([len(seq) for seq in X_train_seq])
	# Convert each sequence to a tensor individually
	X_train_seq = [torch.tensor(seq) for seq in X_train_seq]

	# Padding the sequences correctly
	X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq]
	X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0)
	X_train_pad = X_train_pad_reversed.flip(1)
	vocab_size = len(tokenizer.get_vocab())

	return X_train_pad, texts, class_mapping, vocab_size


	def main():
	colored_title("Text Classification using RNN", "black")

	# Load data
	X_test,texts,class_mapping, vocab_size = load_data()


	# Display test images for selection
	colored_subheader("Select a Row for Prediction:", "black")
	selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0)

	colored_text("Selected Text:","black")
	st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True)

	# Predict button
	if st.button("Predict"):
	model = load_model(vocab_size)
	if model is not None:
	with torch.no_grad():
	output = model(X_test[[selected_index]])
	predicted_class = torch.argmax(output, dim=1).item()

	# Display prediction result
	colored_subheader("Prediction Results:", "green")
	colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green")



	if __name__ == "__main__":
	main()