Delete prediction.py
Browse files- prediction.py +0 -82
prediction.py
DELETED
|
@@ -1,82 +0,0 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import numpy as np
|
| 4 |
-
from tensorflow.keras.models import load_model
|
| 5 |
-
import re
|
| 6 |
-
import nltk
|
| 7 |
-
nltk.download('punkt')
|
| 8 |
-
from nltk.tokenize import word_tokenize
|
| 9 |
-
nltk.download('stopwords')
|
| 10 |
-
from nltk.corpus import stopwords
|
| 11 |
-
nltk.download('wordnet')
|
| 12 |
-
from nltk.stem import WordNetLemmatizer
|
| 13 |
-
|
| 14 |
-
# Load the model
|
| 15 |
-
loaded_model = load_model('model_rnn')
|
| 16 |
-
|
| 17 |
-
# Create a dictionary to map the labels to the categories
|
| 18 |
-
label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
|
| 19 |
-
5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
|
| 20 |
-
9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
|
| 21 |
-
13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
|
| 22 |
-
16: 'Tabungan & Investasi'}
|
| 23 |
-
|
| 24 |
-
def preprocessing(text):
|
| 25 |
-
'''
|
| 26 |
-
Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
|
| 27 |
-
'''
|
| 28 |
-
# Lowercase the text
|
| 29 |
-
text = text.lower()
|
| 30 |
-
|
| 31 |
-
# Normalize the text
|
| 32 |
-
text = re.sub(r'\d+', '', text) # Remove numbers
|
| 33 |
-
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
|
| 34 |
-
text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces
|
| 35 |
-
|
| 36 |
-
# Tokenize the text
|
| 37 |
-
tokens = word_tokenize(text)
|
| 38 |
-
|
| 39 |
-
# Get the English stopwords
|
| 40 |
-
stop_words = set(stopwords.words('indonesian'))
|
| 41 |
-
stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
|
| 42 |
-
|
| 43 |
-
# Remove stopwords
|
| 44 |
-
tokens = [word for word in tokens if word not in stop_words]
|
| 45 |
-
|
| 46 |
-
# Lemmatize the text
|
| 47 |
-
lemmatizer = WordNetLemmatizer()
|
| 48 |
-
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
| 49 |
-
|
| 50 |
-
# Combine tokens back into a single string
|
| 51 |
-
text = ' '.join(tokens)
|
| 52 |
-
|
| 53 |
-
return text
|
| 54 |
-
|
| 55 |
-
def run():
|
| 56 |
-
st.title('Notes Categorization')
|
| 57 |
-
|
| 58 |
-
default = "konser twice"
|
| 59 |
-
|
| 60 |
-
user_input = st.text_area("Enter the notes text here:", default, height=50)
|
| 61 |
-
|
| 62 |
-
if st.button('Predict'):
|
| 63 |
-
# Apply the function to the 'Text' column in the data
|
| 64 |
-
text_processed = preprocessing(user_input)
|
| 65 |
-
|
| 66 |
-
# The model expects input data in batch, even if just predicting on one sample
|
| 67 |
-
# So, I'll add an extra dimension with np.expand_dims
|
| 68 |
-
preprocessed_notes = np.expand_dims(text_processed, axis=0)
|
| 69 |
-
|
| 70 |
-
# get the prediction
|
| 71 |
-
predictions = loaded_model.predict(preprocessed_notes)
|
| 72 |
-
|
| 73 |
-
# get the class with the highest probability
|
| 74 |
-
predicted_class = np.argmax(predictions[0])
|
| 75 |
-
|
| 76 |
-
# Decode the predicted class into the original category
|
| 77 |
-
predicted_category = label_dict[predicted_class]
|
| 78 |
-
|
| 79 |
-
st.write(f'The predicted category is: {predicted_category}')
|
| 80 |
-
|
| 81 |
-
if __name__ == '__main__':
|
| 82 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|