Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from tensorflow.keras.models import load_model | |
| import re | |
| import nltk | |
| nltk.download('punkt') | |
| from nltk.tokenize import word_tokenize | |
| nltk.download('stopwords') | |
| from nltk.corpus import stopwords | |
| nltk.download('wordnet') | |
| from nltk.stem import WordNetLemmatizer | |
| # Load the model | |
| loaded_model = load_model('model_rnn') | |
| # Create a dictionary to map the labels to the categories | |
| label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up', | |
| 5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal', | |
| 9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan', | |
| 13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi', | |
| 16: 'Tabungan & Investasi'} | |
| def preprocessing(text): | |
| ''' | |
| Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization | |
| ''' | |
| # Lowercase the text | |
| text = text.lower() | |
| # Normalize the text | |
| text = re.sub(r'\d+', '', text) # Remove numbers | |
| text = re.sub(r'[^\w\s]', '', text) # Remove punctuation | |
| text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces | |
| # Tokenize the text | |
| tokens = word_tokenize(text) | |
| # Get the English stopwords | |
| stop_words = set(stopwords.words('indonesian')) | |
| stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"]) | |
| # Remove stopwords | |
| tokens = [word for word in tokens if word not in stop_words] | |
| # Lemmatize the text | |
| lemmatizer = WordNetLemmatizer() | |
| tokens = [lemmatizer.lemmatize(word) for word in tokens] | |
| # Combine tokens back into a single string | |
| text = ' '.join(tokens) | |
| return text | |
| def run(): | |
| st.title('Notes Categorization') | |
| default = "konser twice" | |
| user_input = st.text_area("Enter the notes text here:", default, height=50) | |
| if st.button('Predict'): | |
| # Apply the function to the 'Text' column in the data | |
| text_processed = preprocessing(user_input) | |
| # The model expects input data in batch, even if just predicting on one sample | |
| # So, I'll add an extra dimension with np.expand_dims | |
| preprocessed_notes = np.expand_dims(text_processed, axis=0) | |
| # get the prediction | |
| predictions = loaded_model.predict(preprocessed_notes) | |
| # get the class with the highest probability | |
| predicted_class = np.argmax(predictions[0]) | |
| # Decode the predicted class into the original category | |
| predicted_category = label_dict[predicted_class] | |
| st.write(f'The predicted category is: {predicted_category}') | |
| if __name__ == '__main__': | |
| main() |