Spaces:

YAMITEK
/

Text_Classification_using_RNN

Build error

App Files Files Community

Text_Classification_using_RNN / app.py

YAMITEK

Upload 5 files

4a75943 verified 9 months ago

raw

history blame contribute delete

7.03 kB

	import streamlit as st
	import pandas as pd
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import numpy as np
	import os
	from PIL import Image
	from sklearn.preprocessing import StandardScaler, LabelEncoder
	from tokenizers import Tokenizer
	from tokenizers.models import WordLevel
	from tokenizers.pre_tokenizers import Whitespace
	from collections import Counter
	import torch
	from torch.nn.utils.rnn import pad_sequence



	st.set_page_config(layout="centered")

	# Add custom CSS for background image and styling
	# Add custom CSS for background image and styling
	st.markdown("""
	<style>
	.stApp {
	background-image: url("");
	background-size: cover;
	background-position: center;
	background-repeat: no-repeat;
	height: auto; /* Allows the page to expand for scrolling */
	overflow: auto; /* Enables scrolling if the page content overflows */
	# position : relative
	}

	/* Adjust opacity of overlay to make content more visible */
	.stApp::before {
	content: "";
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	height: 100%;
	background-color: rgba(255, 255, 255, 0.8); /* Slightly higher opacity */
	z-index: -1;
	}

	/* Ensure content appears above the overlay */
	.stApp > * {
	position: relative;
	z-index: 2;
	}

	/* Ensure the dataframe is visible */
	.dataframe {
	background-color: rgba(255, 255, 255, 0.9) !important;
	z-index: 3;
	}

	/* Style text elements for better visibility */
	h1, h3, span, div {
	text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.2);
	}

	/* Custom CSS for select box heading */
	div.stSelectbox > label {
	color: #000000 !important; /* Change to your desired color */
	# background-color: black !important; /* Background color of the dropdown */
	font-size: 24px !important; /* Change font size */
	font-weight: bold !important; /* Make text bold */
	}

	/* Custom CSS for image caption */
	.custom-caption {
	color: #000000 !important; /* Change to your desired color */
	font-size: 24px !important; /* Optional: Change font size */
	text-align: center; /* Center-align the caption */
	}

	.stMainBlockContainer {
	background-color: white !important; /* Background color of the dropdown */
	}

	.stTextArea{
	color: #000000 !important
	}

	</style>
	""", unsafe_allow_html=True)


	# Custom title styling functions
	def colored_title(text, color):
	st.markdown(f"<h1 style='color: {color};'>{text}</h1>", unsafe_allow_html=True)

	def colored_subheader(text, color):
	st.markdown(f"<h3 style='color: {color};'>{text}</h3>", unsafe_allow_html=True)

	def colored_text(text, color):
	st.markdown(f"<span style='color: {color};'>{text}</span>", unsafe_allow_html=True)


	embedding_dim = 128
	hidden_units = 128
	num_classes = 3

	class RNNModel(nn.Module):
	def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
	super(RNNModel, self).__init__()
	self.embedding = nn.Embedding(vocab_size, embedding_dim)
	self.rnn = nn.RNN(embedding_dim, hidden_units, batch_first=True, dropout=0.2)
	self.fc = nn.Linear(hidden_units, num_classes)

	def forward(self, x):
	x = self.embedding(x)
	output, _ = self.rnn(x)
	x = output[:, -1, :] # Use last timestep output
	x = self.fc(x)
	return F.softmax(x, dim=1)

	@st.cache_resource
	def load_model(vocab_size):

	model = RNNModel(vocab_size,embedding_dim,hidden_units,num_classes)
	try:
	state_dict = torch.load('rnn_classification_model_weights.pth', map_location=torch.device('cpu'))
	model.load_state_dict(state_dict)
	model.eval()
	return model
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return None

	@st.cache_data
	def load_data():

	df=pd.read_csv("alldata_1_for_kaggle.csv",encoding='latin1')
	df = df.rename(columns={'0': 'labels', 'a': 'text'})
	texts = df['text'].values
	unique_classes = (df['labels'].unique()) # Ensure consistent order
	class_mapping = {i: f"{idx}" for i,idx in enumerate(unique_classes)}

	# Tokenization (basic whitespace tokenizer)
	def tokenize(text):
	return text.lower().split()

	# Build vocabulary based on word frequency (similar to Keras Tokenizer)
	word_counts = Counter()
	for text in texts:
	word_counts.update(tokenize(text))

	# Sort words by frequency (most common words get lower indices)
	sorted_words = [word for word, _ in word_counts.most_common()]

	# Create vocabulary mapping with <pad> and <unk> tokens
	vocab = {"<pad>": 0, "<unk>": 1}
	vocab.update({word: idx + 2 for idx, word in enumerate(sorted_words)})

	# Initialize tokenizer
	tokenizer = Tokenizer(WordLevel(vocab, unk_token="<unk>"))
	tokenizer.pre_tokenizer = Whitespace()

	# Convert texts to sequences
	def text_to_sequence(texts):
	return [tokenizer.encode(text.lower()).ids for text in texts]

	X_train_seq = text_to_sequence(texts)
	max_len = max([len(seq) for seq in X_train_seq])
	# Convert each sequence to a tensor individually
	X_train_seq = [torch.tensor(seq) for seq in X_train_seq]

	# Padding the sequences correctly
	X_train_seq_reversed = [seq.flip(0) for seq in X_train_seq]
	X_train_pad_reversed = pad_sequence(X_train_seq_reversed, batch_first=True, padding_value=0)
	X_train_pad = X_train_pad_reversed.flip(1)
	vocab_size = len(tokenizer.get_vocab())

	return X_train_pad, texts, class_mapping, vocab_size


	def main():
	colored_title("Text Classification using RNN", "black")

	# Load data
	X_test,texts,class_mapping, vocab_size = load_data()


	# Display test images for selection
	colored_subheader("Select a Row for Prediction:", "black")
	selected_index = st.selectbox("Select a row", options=range(len(texts)), index=0)

	colored_text("Selected Text:","black")
	st.text_area("Text Content:", value=texts[selected_index], height=150, disabled=True)

	# Predict button
	if st.button("Predict"):
	model = load_model(vocab_size)
	if model is not None:
	with torch.no_grad():
	output = model(X_test[[selected_index]])
	predicted_class = torch.argmax(output, dim=1).item()

	# Display prediction result
	colored_subheader("Prediction Results:", "green")
	colored_text(f"Predicted Class: {class_mapping[predicted_class]}", "green")



	if __name__ == "__main__":
	main()