Spaces:

chimithecat
/

Sentiment_Text_CekBansos

Runtime error

App Files Files Community

Sentiment_Text_CekBansos / app.py

chimithecat

Create app.py

1aecb51 verified 7 months ago

raw

history blame contribute delete

3.74 kB

	import gradio as gr
	import joblib
	import re
	import numpy as np
	from sastrawi.stemmer.stemmer_factory import StemmerFactory
	from sastrawi.stopwords.stopwords_factory import StopWordRemoverFactory
	import nltk

	# --- Download NLTK data (only needs to run once) ---
	try:
	nltk.data.find('tokenizers/punkt')
	except nltk.downloader.DownloadError:
	nltk.download('punkt')

	# --- 1. Load Pre-trained Model and Vectorizer ---
	# These files should be in the same directory as your app.py file.
	model = joblib.load('best_svm_model.pkl')
	vectorizer = joblib.load('tfidf_vectorizer.pkl')

	# --- 2. Recreate the Preprocessing Functions ---
	# Initialize Sastrawi components
	stemmer = StemmerFactory().create_stemmer()
	stopword_remover = StopWordRemoverFactory().create_stop_word_remover()

	# Slang dictionary from your notebook
	slang_dict = {
	'yg': 'yang', 'ga': 'tidak', 'gak': 'tidak', 'udh': 'sudah', 'tdk': 'tidak',
	'bgt': 'banget', 'dg': 'dengan', 'klo': 'kalau', 'kalo': 'kalau', 'mksh': 'terima kasih',
	'terimakasih': 'terima kasih', 'bgs': 'bagus', 'ok': 'oke', 'blm': 'belum', 'sy': 'saya',
	'sya': 'saya', 'ak': 'aku', 'utk': 'untuk', 'tpi': 'tapi', 'tp': 'tapi', 'jd': 'jadi',
	'jg': 'juga', 'trs': 'terus', 'skrg': 'sekarang', 'bkin': 'bikin', 'dr': 'dari',
	'dn': 'dan', 'pke': 'pakai', 'gausah': 'tidak usah', 'ngga': 'tidak', 'bkn': 'bukan',
	'sdh': 'sudah', 'aja': 'saja', 'lg': 'lagi', 'mls': 'malas', 'gk': 'tidak',
	'knp': 'kenapa', 'krn': 'karena', 'gmn': 'bagaimana', 'gimana': 'bagaimana',
	'udah': 'sudah', 'sm': 'sama', 'gbs': 'tidak bisa', 'nggak': 'tidak', 'mantap': 'bagus',
	'cek': 'periksa', 'bansos': 'bantuan sosial'
	}

	def preprocess_text(text):
	# 1. Cleaning: numbers, punctuation, extra spaces
	text = re.sub(r'\d+', '', text)
	text = re.sub(r'[^\w\s]', '', text)
	text = re.sub(r'\s+', ' ', text).strip()

	# 2. Case folding
	text = text.lower()

	# 3. Slang normalization
	words = text.split()
	normalized_words = [slang_dict.get(word, word) for word in words]
	text = ' '.join(normalized_words)

	# 4. Stopword removal
	text = stopword_remover.remove(text)

	# 5. Stemming
	text = stemmer.stem(text)

	return text

	# --- 3. Prediction Function ---
	def predict_sentiment(sentence):
	# Preprocess the input sentence
	processed_text = preprocess_text(sentence)

	# Vectorize the text using the loaded TF-IDF vectorizer
	text_vector = vectorizer.transform([processed_text])

	# The model was trained with an additional 'thumbs_up_log_scaled' feature.
	# Since we only have a sentence, we'll assume a neutral value (0) for this feature.
	thumbs_up_feature = np.array([[0]])

	# Combine the TF-IDF vector with the thumbs_up feature
	# Note: hstack is used for sparse matrices
	final_vector = np.hstack([text_vector.toarray(), thumbs_up_feature])

	# Predict using the loaded model
	prediction = model.predict(final_vector)

	# Return the result
	return prediction[0].capitalize()

	# --- 4. Create Gradio Interface ---
	iface = gr.Interface(
	fn=predict_sentiment,
	inputs=gr.Textbox(lines=3, placeholder="Masukkan kalimat ulasan dalam Bahasa Indonesia..."),
	outputs="text",
	title="Analisis Sentimen Ulasan Aplikasi",
	description="Analisis sentimen untuk ulasan aplikasi 'Cek Bansos' menggunakan model SVM. Masukkan sebuah kalimat untuk memprediksi sentimennya (Positif, Negatif, atau Netral).",
	examples=[
	["aplikasinya bagus sekali dan sangat membantu"],
	["tidak bisa daftar, gagal terus padahal sinyal bagus"],
	["aplikasi ini biasa saja, tidak ada yang spesial"]
	]
	)

	# --- 5. Launch the App ---
	iface.launch()