Spaces:

rachman
/

sample_sentiment_analysis_v3

Sleeping

App Files Files Community

sample_sentiment_analysis_v3 / src /streamlit_app.py

rachman

Update src/streamlit_app.py

a9c99fb verified 6 months ago

raw

history blame contribute delete

2.54 kB

	import streamlit as st
	import pandas as pd
	import re
	import tensorflow as tf
	import tensorflow_hub as tf_hub
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	from tensorflow.keras.models import load_model
	from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
	import nltk

	# Use /tmp for NLTK data (writable in Hugging Face Spaces)
	nltk_data_dir = "/tmp/nltk_data"
	nltk.data.path.append(nltk_data_dir)

	# Download the stopwords and punkt resources
	nltk.download('stopwords', download_dir=nltk_data_dir)
	nltk.download('punkt_tab', download_dir=nltk_data_dir)

	# Load the trained model
	model = tf.keras.models.load_model('src/model_final.keras',
	custom_objects={'KerasLayer': tf_hub.KerasLayer})
	# Load stopwords
	# Define Stopwords
	stpwds_id = list(set(stopwords.words('indonesian')))
	stpwds_id.append('oh')

	# Define Stemming
	stemmer = StemmerFactory().create_stemmer()

	# Create A Function for Text Preprocessing

	def text_preprocessing(text):
	# Case folding
	text = text.lower()

	# Mention removal
	text = re.sub("@[A-Za-z0-9_]+", " ", text)

	# Hashtags removal
	text = re.sub("#[A-Za-z0-9_]+", " ", text)

	# Newline removal (\n)
	text = re.sub(r"\\n", " ",text)

	# Whitespace removal
	text = text.strip()

	# URL removal
	text = re.sub(r"http\S+", " ", text)
	text = re.sub(r"www.\S+", " ", text)

	# Non-letter removal (such as emoticon, symbol (like μ, $, 兀), etc
	text = re.sub("[^A-Za-z\s']", " ", text)

	# Tokenization
	tokens = word_tokenize(text)

	# Stopwords removal
	tokens = [word for word in tokens if word not in stpwds_id]

	# Stemming
	tokens = [stemmer.stem(word) for word in tokens]

	# Combining Tokens
	text = ' '.join(tokens)

	return text

	hub_layer = tf_hub.KerasLayer(
	"https://www.kaggle.com/models/google/nnlm/TensorFlow2/id-dim128-with-normalization/1",
	input_shape=[],
	dtype=tf.string,
	trainable=False
	)
	# Define the Streamlit interface
	st.title('Sentiment Analysis App')

	# Get user input
	user_input = st.text_area("Enter the text for sentiment analysis:")

	if st.button('Analyze'):
	if user_input:
	# Preprocess the input text
	processed_text = text_preprocessing(user_input)
	data_inf = hub_layer([processed_text])
	prediction = model.predict(data_inf)
	sentiment = "Positive" if prediction[0][0] > 0.5 else "Negative"


	# Display the result
	st.write(f"Sentiment: {sentiment}")
	else:
	st.write("Please enter some text.")