Spaces:

chimithecat
/

Sentiment_Text_CekBansos

Runtime error

App Files Files Community

chimithecat commited on Jul 6, 2025

Commit

1aecb51

verified ·

1 Parent(s): 274ff27

Create app.py

Browse files

Files changed (1) hide show

app.py +98 -0

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+import joblib
+import re
+import numpy as np
+from sastrawi.stemmer.stemmer_factory import StemmerFactory
+from sastrawi.stopwords.stopwords_factory import StopWordRemoverFactory
+import nltk
+# --- Download NLTK data (only needs to run once) ---
+try:
+    nltk.data.find('tokenizers/punkt')
+except nltk.downloader.DownloadError:
+    nltk.download('punkt')
+# --- 1. Load Pre-trained Model and Vectorizer ---
+# These files should be in the same directory as your app.py file.
+model = joblib.load('best_svm_model.pkl')
+vectorizer = joblib.load('tfidf_vectorizer.pkl')
+# --- 2. Recreate the Preprocessing Functions ---
+# Initialize Sastrawi components
+stemmer = StemmerFactory().create_stemmer()
+stopword_remover = StopWordRemoverFactory().create_stop_word_remover()
+# Slang dictionary from your notebook
+slang_dict = {
+    'yg': 'yang', 'ga': 'tidak', 'gak': 'tidak', 'udh': 'sudah', 'tdk': 'tidak',
+    'bgt': 'banget', 'dg': 'dengan', 'klo': 'kalau', 'kalo': 'kalau', 'mksh': 'terima kasih',
+    'terimakasih': 'terima kasih', 'bgs': 'bagus', 'ok': 'oke', 'blm': 'belum', 'sy': 'saya',
+    'sya': 'saya', 'ak': 'aku', 'utk': 'untuk', 'tpi': 'tapi', 'tp': 'tapi', 'jd': 'jadi',
+    'jg': 'juga', 'trs': 'terus', 'skrg': 'sekarang', 'bkin': 'bikin', 'dr': 'dari',
+    'dn': 'dan', 'pke': 'pakai', 'gausah': 'tidak usah', 'ngga': 'tidak', 'bkn': 'bukan',
+    'sdh': 'sudah', 'aja': 'saja', 'lg': 'lagi', 'mls': 'malas', 'gk': 'tidak',
+    'knp': 'kenapa', 'krn': 'karena', 'gmn': 'bagaimana', 'gimana': 'bagaimana',
+    'udah': 'sudah', 'sm': 'sama', 'gbs': 'tidak bisa', 'nggak': 'tidak', 'mantap': 'bagus',
+    'cek': 'periksa', 'bansos': 'bantuan sosial'
+}
+def preprocess_text(text):
+    # 1. Cleaning: numbers, punctuation, extra spaces
+    text = re.sub(r'\d+', '', text)
+    text = re.sub(r'[^\w\s]', '', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    # 2. Case folding
+    text = text.lower()
+    # 3. Slang normalization
+    words = text.split()
+    normalized_words = [slang_dict.get(word, word) for word in words]
+    text = ' '.join(normalized_words)
+    # 4. Stopword removal
+    text = stopword_remover.remove(text)
+    # 5. Stemming
+    text = stemmer.stem(text)
+    return text
+# --- 3. Prediction Function ---
+def predict_sentiment(sentence):
+    # Preprocess the input sentence
+    processed_text = preprocess_text(sentence)
+    # Vectorize the text using the loaded TF-IDF vectorizer
+    text_vector = vectorizer.transform([processed_text])
+    # The model was trained with an additional 'thumbs_up_log_scaled' feature.
+    # Since we only have a sentence, we'll assume a neutral value (0) for this feature.
+    thumbs_up_feature = np.array([[0]])
+    # Combine the TF-IDF vector with the thumbs_up feature
+    # Note: hstack is used for sparse matrices
+    final_vector = np.hstack([text_vector.toarray(), thumbs_up_feature])
+    # Predict using the loaded model
+    prediction = model.predict(final_vector)
+    # Return the result
+    return prediction[0].capitalize()
+# --- 4. Create Gradio Interface ---
+iface = gr.Interface(
+    fn=predict_sentiment,
+    inputs=gr.Textbox(lines=3, placeholder="Masukkan kalimat ulasan dalam Bahasa Indonesia..."),
+    outputs="text",
+    title="Analisis Sentimen Ulasan Aplikasi",
+    description="Analisis sentimen untuk ulasan aplikasi 'Cek Bansos' menggunakan model SVM. Masukkan sebuah kalimat untuk memprediksi sentimennya (Positif, Negatif, atau Netral).",
+    examples=[
+        ["aplikasinya bagus sekali dan sangat membantu"],
+        ["tidak bisa daftar, gagal terus padahal sinyal bagus"],
+        ["aplikasi ini biasa saja, tidak ada yang spesial"]
+    ]
+)
+# --- 5. Launch the App ---
+iface.launch()