File size: 2,531 Bytes
950b23e bd0ff05 67ef43f bd0ff05 712a8ee bd0ff05 00689b5 bd0ff05 7700352 00689b5 bd0ff05 950b23e bd0ff05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
import pandas as pd
import re
import tensorflow as tf
import tensorflow_hub as tf_hub
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.models import load_model
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import nltk
# Use /tmp for NLTK data (writable in Hugging Face Spaces)
nltk_data_dir = "/tmp/nltk_data"
nltk.data.path.append(nltk_data_dir)
# Download the stopwords and punkt resources
nltk.download('stopwords', download_dir=nltk_data_dir)
nltk.download('punkt_tab', download_dir=nltk_data_dir)
# Load the trained model
model = tf.keras.models.load_model('src/model_final.keras',
custom_objects={'KerasLayer': tf_hub.KerasLayer})
# Load stopwords
# Define Stopwords
stpwds_id = list(set(stopwords.words('indonesian')))
stpwds_id.append('oh')
# Define Stemming
stemmer = StemmerFactory().create_stemmer()
# Create A Function for Text Preprocessing
def text_preprocessing(text):
# Case folding
text = text.lower()
# Mention removal
text = re.sub("@[A-Za-z0-9_]+", " ", text)
# Hashtags removal
text = re.sub("#[A-Za-z0-9_]+", " ", text)
# Newline removal (\n)
text = re.sub(r"\\n", " ",text)
# Whitespace removal
text = text.strip()
# URL removal
text = re.sub(r"http\S+", " ", text)
text = re.sub(r"www.\S+", " ", text)
# Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
text = re.sub("[^A-Za-z\s']", " ", text)
# Tokenization
tokens = word_tokenize(text)
# Stopwords removal
tokens = [word for word in tokens if word not in stpwds_id]
# Stemming
tokens = [stemmer.stem(word) for word in tokens]
# Combining Tokens
text = ' '.join(tokens)
return text
hub_layer = tf_hub.KerasLayer(
"https://www.kaggle.com/models/google/nnlm/TensorFlow2/id-dim128-with-normalization/1",
input_shape=[],
dtype=tf.string,
trainable=False
)
# Define the Streamlit interface
st.title('Sentiment Analysis App')
# Get user input
user_input = st.text_area("Enter the text for sentiment analysis:")
if st.button('Analyze'):
if user_input:
# Preprocess the input text
processed_text = text_preprocessing(user_input)
data_inf = hub_layer([processed_text])
prediction = model.predict(data_inf)
sentiment = "Positive" if prediction[0] > 0.5 else "Negative"
# Display the result
st.write(f"Sentiment: {sentiment}")
else:
st.write("Please enter some text.") |