Spaces:

didev007
/

text_classification_1

Sleeping

App Files Files Community

text_classification_1 / prediction.py

didev007

Upload 8 files

9942e88 verified almost 2 years ago

raw

history blame contribute delete

2.73 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from tensorflow.keras.models import load_model
	import re
	import nltk
	nltk.download('punkt')
	from nltk.tokenize import word_tokenize
	nltk.download('stopwords')
	from nltk.corpus import stopwords
	nltk.download('wordnet')
	from nltk.stem import WordNetLemmatizer

	# Load the model
	loaded_model = load_model('model_rnn')

	# Create a dictionary to map the labels to the categories
	label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
	5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
	9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
	13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
	16: 'Tabungan & Investasi'}

	def preprocessing(text):
	'''
	Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
	'''
	# Lowercase the text
	text = text.lower()

	# Normalize the text
	text = re.sub(r'\d+', '', text) # Remove numbers
	text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
	text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces

	# Tokenize the text
	tokens = word_tokenize(text)

	# Get the English stopwords
	stop_words = set(stopwords.words('indonesian'))
	stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])

	# Remove stopwords
	tokens = [word for word in tokens if word not in stop_words]

	# Lemmatize the text
	lemmatizer = WordNetLemmatizer()
	tokens = [lemmatizer.lemmatize(word) for word in tokens]

	# Combine tokens back into a single string
	text = ' '.join(tokens)

	return text

	def run():
	st.title('Notes Categorization')

	default = "konser twice"

	user_input = st.text_area("Enter the notes text here:", default, height=50)

	if st.button('Predict'):
	# Apply the function to the 'Text' column in the data
	text_processed = preprocessing(user_input)

	# The model expects input data in batch, even if just predicting on one sample
	# So, I'll add an extra dimension with np.expand_dims
	preprocessed_notes = np.expand_dims(text_processed, axis=0)

	# get the prediction
	predictions = loaded_model.predict(preprocessed_notes)

	# get the class with the highest probability
	predicted_class = np.argmax(predictions[0])

	# Decode the predicted class into the original category
	predicted_category = label_dict[predicted_class]

	st.write(f'The predicted category is: {predicted_category}')

	if __name__ == '__main__':
	main()