Spaces:

didev007
/

testing_text_classification

Sleeping

App Files Files Community

didev007 commited on Mar 20, 2024

Commit

24c0483

verified ·

1 Parent(s): 34030df

Delete prediction.py

Browse files

Files changed (1) hide show

prediction.py +0 -82

prediction.py DELETED Viewed

@@ -1,82 +0,0 @@
-import streamlit as st
-import pandas as pd
-import numpy as np
-from tensorflow.keras.models import load_model
-import re
-import nltk
-nltk.download('punkt')
-from nltk.tokenize import word_tokenize
-nltk.download('stopwords')
-from nltk.corpus import stopwords
-nltk.download('wordnet')
-from nltk.stem import WordNetLemmatizer
-# Load the model
-loaded_model = load_model('model_rnn')
-# Create a dictionary to map the labels to the categories
-label_dict = {0: 'Uang Masuk', 1: 'Uang Keluar', 2: 'Pinjaman', 3: 'Tagihan', 4: 'Top Up',
-             5: 'Biaya & Lainnya', 6: 'Transportasi', 7: 'Pendidikan', 8: 'Hadiah & Amal',
-             9: 'Belanja', 10: 'Hiburan',11: 'Makanan & Minuman', 12: 'Kesehatan',
-             13: 'Perawatan Diri', 14: 'Hobi & Gaya Hidup', 15: 'Pencairan Investasi',
-             16: 'Tabungan & Investasi'}
-def preprocessing(text):
-    '''
-    Preprocessing text by applying lowercasing, normalization, tokenization, stopword removal, and lemmatization
-    '''
-    # Lowercase the text
-    text = text.lower()
-    # Normalize the text
-    text = re.sub(r'\d+', '', text)  # Remove numbers
-    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
-    text = re.sub(r'\s+', ' ', text).strip()  # Remove whitespaces
-    # Tokenize the text
-    tokens = word_tokenize(text)
-    # Get the English stopwords
-    stop_words = set(stopwords.words('indonesian'))
-    stop_words.update(['the', 'yg', 'gk', 'nyagak', 'pake', 'pakai', 'i', "and"])
-    # Remove stopwords
-    tokens = [word for word in tokens if word not in stop_words]
-    # Lemmatize the text
-    lemmatizer = WordNetLemmatizer()
-    tokens = [lemmatizer.lemmatize(word) for word in tokens]
-    # Combine tokens back into a single string
-    text = ' '.join(tokens)
-    return text
-def run():
-    st.title('Notes Categorization')
-    default = "konser twice"
-    user_input = st.text_area("Enter the notes text here:", default, height=50)
-    if st.button('Predict'):
-        # Apply the function to the 'Text' column in the data
-        text_processed = preprocessing(user_input)
-        # The model expects input data in batch, even if just predicting on one sample
-        # So, I'll add an extra dimension with np.expand_dims
-        preprocessed_notes = np.expand_dims(text_processed, axis=0)
-        # get the prediction
-        predictions = loaded_model.predict(preprocessed_notes)
-        # get the class with the highest probability
-        predicted_class = np.argmax(predictions[0])
-        # Decode the predicted class into the original category
-        predicted_category = label_dict[predicted_class]
-        st.write(f'The predicted category is: {predicted_category}')
-if __name__ == '__main__':
-    main()