Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import joblib | |
| import re | |
| import numpy as np | |
| from sastrawi.stemmer.stemmer_factory import StemmerFactory | |
| from sastrawi.stopwords.stopwords_factory import StopWordRemoverFactory | |
| import nltk | |
| # --- Download NLTK data (only needs to run once) --- | |
| try: | |
| nltk.data.find('tokenizers/punkt') | |
| except nltk.downloader.DownloadError: | |
| nltk.download('punkt') | |
| # --- 1. Load Pre-trained Model and Vectorizer --- | |
| # These files should be in the same directory as your app.py file. | |
| model = joblib.load('best_svm_model.pkl') | |
| vectorizer = joblib.load('tfidf_vectorizer.pkl') | |
| # --- 2. Recreate the Preprocessing Functions --- | |
| # Initialize Sastrawi components | |
| stemmer = StemmerFactory().create_stemmer() | |
| stopword_remover = StopWordRemoverFactory().create_stop_word_remover() | |
| # Slang dictionary from your notebook | |
| slang_dict = { | |
| 'yg': 'yang', 'ga': 'tidak', 'gak': 'tidak', 'udh': 'sudah', 'tdk': 'tidak', | |
| 'bgt': 'banget', 'dg': 'dengan', 'klo': 'kalau', 'kalo': 'kalau', 'mksh': 'terima kasih', | |
| 'terimakasih': 'terima kasih', 'bgs': 'bagus', 'ok': 'oke', 'blm': 'belum', 'sy': 'saya', | |
| 'sya': 'saya', 'ak': 'aku', 'utk': 'untuk', 'tpi': 'tapi', 'tp': 'tapi', 'jd': 'jadi', | |
| 'jg': 'juga', 'trs': 'terus', 'skrg': 'sekarang', 'bkin': 'bikin', 'dr': 'dari', | |
| 'dn': 'dan', 'pke': 'pakai', 'gausah': 'tidak usah', 'ngga': 'tidak', 'bkn': 'bukan', | |
| 'sdh': 'sudah', 'aja': 'saja', 'lg': 'lagi', 'mls': 'malas', 'gk': 'tidak', | |
| 'knp': 'kenapa', 'krn': 'karena', 'gmn': 'bagaimana', 'gimana': 'bagaimana', | |
| 'udah': 'sudah', 'sm': 'sama', 'gbs': 'tidak bisa', 'nggak': 'tidak', 'mantap': 'bagus', | |
| 'cek': 'periksa', 'bansos': 'bantuan sosial' | |
| } | |
| def preprocess_text(text): | |
| # 1. Cleaning: numbers, punctuation, extra spaces | |
| text = re.sub(r'\d+', '', text) | |
| text = re.sub(r'[^\w\s]', '', text) | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| # 2. Case folding | |
| text = text.lower() | |
| # 3. Slang normalization | |
| words = text.split() | |
| normalized_words = [slang_dict.get(word, word) for word in words] | |
| text = ' '.join(normalized_words) | |
| # 4. Stopword removal | |
| text = stopword_remover.remove(text) | |
| # 5. Stemming | |
| text = stemmer.stem(text) | |
| return text | |
| # --- 3. Prediction Function --- | |
| def predict_sentiment(sentence): | |
| # Preprocess the input sentence | |
| processed_text = preprocess_text(sentence) | |
| # Vectorize the text using the loaded TF-IDF vectorizer | |
| text_vector = vectorizer.transform([processed_text]) | |
| # The model was trained with an additional 'thumbs_up_log_scaled' feature. | |
| # Since we only have a sentence, we'll assume a neutral value (0) for this feature. | |
| thumbs_up_feature = np.array([[0]]) | |
| # Combine the TF-IDF vector with the thumbs_up feature | |
| # Note: hstack is used for sparse matrices | |
| final_vector = np.hstack([text_vector.toarray(), thumbs_up_feature]) | |
| # Predict using the loaded model | |
| prediction = model.predict(final_vector) | |
| # Return the result | |
| return prediction[0].capitalize() | |
| # --- 4. Create Gradio Interface --- | |
| iface = gr.Interface( | |
| fn=predict_sentiment, | |
| inputs=gr.Textbox(lines=3, placeholder="Masukkan kalimat ulasan dalam Bahasa Indonesia..."), | |
| outputs="text", | |
| title="Analisis Sentimen Ulasan Aplikasi", | |
| description="Analisis sentimen untuk ulasan aplikasi 'Cek Bansos' menggunakan model SVM. Masukkan sebuah kalimat untuk memprediksi sentimennya (Positif, Negatif, atau Netral).", | |
| examples=[ | |
| ["aplikasinya bagus sekali dan sangat membantu"], | |
| ["tidak bisa daftar, gagal terus padahal sinyal bagus"], | |
| ["aplikasi ini biasa saja, tidak ada yang spesial"] | |
| ] | |
| ) | |
| # --- 5. Launch the App --- | |
| iface.launch() |