| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import tensorflow_hub as tf_hub |
| from tensorflow.keras.models import load_model |
| import re |
| import nltk |
| nltk.download('punkt') |
| from nltk.tokenize import word_tokenize |
| nltk.download('stopwords') |
| from nltk.corpus import stopwords |
| nltk.download('wordnet') |
| from nltk.stem import WordNetLemmatizer |
|
|
| |
| loaded_model = load_model('model_rnn') |
|
|
| |
| label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up', |
| 5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal', |
| 9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan', |
| 13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi', |
| 16: 'Tabungan & Investasi'} |
|
|
| def preprocessing(text): |
| ''' |
| Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization |
| ''' |
| |
| text = text.lower() |
|
|
| |
| text = re.sub(r'\d+', '', text) |
| text = re.sub(r'[^\w\s]', '', text) |
| text = re.sub(r'\s+', ' ', text).strip() |
|
|
| |
| tokens = word_tokenize(text) |
|
|
| |
| stop_words = set(stopwords.words('indonesian')) |
| stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"]) |
|
|
| |
| tokens = [word for word in tokens if word not in stop_words] |
|
|
| |
| lemmatizer = WordNetLemmatizer() |
| tokens = [lemmatizer.lemmatize(word) for word in tokens] |
|
|
| |
| text = ' '.join(tokens) |
|
|
| return text |
|
|
| def run(): |
| st.title('Notes Categorization') |
|
|
| default = "konser twice" |
|
|
| user_input = st.text_area("Enter the notes text here:", default, height=50) |
|
|
| if st.button('Predict'): |
| |
| text_processed = preprocessing(user_input) |
|
|
| |
| |
| preprocessed_notes = np.expand_dims(text_processed, axis=0) |
|
|
| |
| predictions = loaded_model.predict(preprocessed_notes) |
|
|
| |
| predicted_class = np.argmax(predictions[0]) |
|
|
| |
| predicted_category = label_dict[predicted_class] |
|
|
| st.write(f'The predicted category is: {predicted_category}') |
|
|
| if __name__ == '__main__': |
| main() |