Upload 8 files
Browse files- .gitattributes +1 -0
- app.py +24 -0
- model_rnn/fingerprint.pb +3 -0
- model_rnn/keras_metadata.pb +3 -0
- model_rnn/saved_model.pb +3 -0
- model_rnn/variables/variables.data-00000-of-00001 +3 -0
- model_rnn/variables/variables.index +0 -0
- prediction.py +83 -0
- requirements.txt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
model_rnn/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import prediction
|
| 4 |
+
|
| 5 |
+
# Set the page title and favicon
|
| 6 |
+
st.set_page_config(page_title="Notes Text Classification",
|
| 7 |
+
layout='wide',
|
| 8 |
+
initial_sidebar_state='expanded'
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
# Create a sidebar with a title and a selection box
|
| 12 |
+
st.sidebar.title("Choose a page:")
|
| 13 |
+
page = st.sidebar.selectbox("", ('Landing Page' , 'Data Prediction'))
|
| 14 |
+
|
| 15 |
+
# Display different content depending on the selected page
|
| 16 |
+
if page == 'Data Prediction':
|
| 17 |
+
prediction.run()
|
| 18 |
+
else:
|
| 19 |
+
# Add a header and a subheader with some text
|
| 20 |
+
st.title("What category does this note belong to?")
|
| 21 |
+
st.subheader("Find out the category with this space that uses NLP to do predictions.")
|
| 22 |
+
|
| 23 |
+
# Add an image about the case
|
| 24 |
+
st.image("https://imageio.forbes.com/specials-images/imageserve/60808d87824ab7edc3770486/Note-Pad-and-Pen-on-Yellow-background/960x0.jpg?height=474&width=711&fit=bounds")
|
model_rnn/fingerprint.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37d606dfcfb95b10bb426d66179918955507942eef68355640a24ec95fd18535
|
| 3 |
+
size 57
|
model_rnn/keras_metadata.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c769156491524cd04ccbcc89614a1c17acee03d76d4e99e6e73b6655a13a022f
|
| 3 |
+
size 35540
|
model_rnn/saved_model.pb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92fcd21f1a911fc986008fe7fd965c96a46293d0dd31546d0feddfc6e249be0d
|
| 3 |
+
size 5117841
|
model_rnn/variables/variables.data-00000-of-00001
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3077fbe89ebff4ce89b4501a9c4b34782544026b7c815a3fc6e772e1b324a023
|
| 3 |
+
size 275723988
|
model_rnn/variables/variables.index
ADDED
|
Binary file (4.25 kB). View file
|
|
|
prediction.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import tensorflow_hub as tf_hub
|
| 5 |
+
from tensorflow.keras.models import load_model
|
| 6 |
+
import re
|
| 7 |
+
import nltk
|
| 8 |
+
nltk.download('punkt')
|
| 9 |
+
from nltk.tokenize import word_tokenize
|
| 10 |
+
nltk.download('stopwords')
|
| 11 |
+
from nltk.corpus import stopwords
|
| 12 |
+
nltk.download('wordnet')
|
| 13 |
+
from nltk.stem import WordNetLemmatizer
|
| 14 |
+
|
| 15 |
+
# Load the model
|
| 16 |
+
loaded_model = load_model('model_rnn')
|
| 17 |
+
|
| 18 |
+
# Create a dictionary to map the labels to the categories
|
| 19 |
+
label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
|
| 20 |
+
5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
|
| 21 |
+
9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
|
| 22 |
+
13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
|
| 23 |
+
16: 'Tabungan & Investasi'}
|
| 24 |
+
|
| 25 |
+
def preprocessing(text):
|
| 26 |
+
'''
|
| 27 |
+
Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
|
| 28 |
+
'''
|
| 29 |
+
# Lowercase the text
|
| 30 |
+
text = text.lower()
|
| 31 |
+
|
| 32 |
+
# Normalize the text
|
| 33 |
+
text = re.sub(r'\d+', '', text) # Remove numbers
|
| 34 |
+
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
|
| 35 |
+
text = re.sub(r'\s+', ' ', text).strip() # Remove whitespaces
|
| 36 |
+
|
| 37 |
+
# Tokenize the text
|
| 38 |
+
tokens = word_tokenize(text)
|
| 39 |
+
|
| 40 |
+
# Get the English stopwords
|
| 41 |
+
stop_words = set(stopwords.words('indonesian'))
|
| 42 |
+
stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
|
| 43 |
+
|
| 44 |
+
# Remove stopwords
|
| 45 |
+
tokens = [word for word in tokens if word not in stop_words]
|
| 46 |
+
|
| 47 |
+
# Lemmatize the text
|
| 48 |
+
lemmatizer = WordNetLemmatizer()
|
| 49 |
+
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
| 50 |
+
|
| 51 |
+
# Combine tokens back into a single string
|
| 52 |
+
text = ' '.join(tokens)
|
| 53 |
+
|
| 54 |
+
return text
|
| 55 |
+
|
| 56 |
+
def run():
|
| 57 |
+
st.title('Notes Categorization')
|
| 58 |
+
|
| 59 |
+
default = "konser twice"
|
| 60 |
+
|
| 61 |
+
user_input = st.text_area("Enter the notes text here:", default, height=50)
|
| 62 |
+
|
| 63 |
+
if st.button('Predict'):
|
| 64 |
+
# Apply the function to the 'Text' column in the data
|
| 65 |
+
text_processed = preprocessing(user_input)
|
| 66 |
+
|
| 67 |
+
# The model expects input data in batch, even if just predicting on one sample
|
| 68 |
+
# So, I'll add an extra dimension with np.expand_dims
|
| 69 |
+
preprocessed_notes = np.expand_dims(text_processed, axis=0)
|
| 70 |
+
|
| 71 |
+
# get the prediction
|
| 72 |
+
predictions = loaded_model.predict(preprocessed_notes)
|
| 73 |
+
|
| 74 |
+
# get the class with the highest probability
|
| 75 |
+
predicted_class = np.argmax(predictions[0])
|
| 76 |
+
|
| 77 |
+
# Decode the predicted class into the original category
|
| 78 |
+
predicted_category = label_dict[predicted_class]
|
| 79 |
+
|
| 80 |
+
st.write(f'The predicted category is: {predicted_category}')
|
| 81 |
+
|
| 82 |
+
if __name__ == '__main__':
|
| 83 |
+
main()
|
requirements.txt
ADDED
|
File without changes
|