chimithecat commited on
Commit
1aecb51
·
verified ·
1 Parent(s): 274ff27

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import re
4
+ import numpy as np
5
+ from sastrawi.stemmer.stemmer_factory import StemmerFactory
6
+ from sastrawi.stopwords.stopwords_factory import StopWordRemoverFactory
7
+ import nltk
8
+
9
+ # --- Download NLTK data (only needs to run once) ---
10
+ try:
11
+ nltk.data.find('tokenizers/punkt')
12
+ except nltk.downloader.DownloadError:
13
+ nltk.download('punkt')
14
+
15
+ # --- 1. Load Pre-trained Model and Vectorizer ---
16
+ # These files should be in the same directory as your app.py file.
17
+ model = joblib.load('best_svm_model.pkl')
18
+ vectorizer = joblib.load('tfidf_vectorizer.pkl')
19
+
20
+ # --- 2. Recreate the Preprocessing Functions ---
21
+ # Initialize Sastrawi components
22
+ stemmer = StemmerFactory().create_stemmer()
23
+ stopword_remover = StopWordRemoverFactory().create_stop_word_remover()
24
+
25
+ # Slang dictionary from your notebook
26
+ slang_dict = {
27
+ 'yg': 'yang', 'ga': 'tidak', 'gak': 'tidak', 'udh': 'sudah', 'tdk': 'tidak',
28
+ 'bgt': 'banget', 'dg': 'dengan', 'klo': 'kalau', 'kalo': 'kalau', 'mksh': 'terima kasih',
29
+ 'terimakasih': 'terima kasih', 'bgs': 'bagus', 'ok': 'oke', 'blm': 'belum', 'sy': 'saya',
30
+ 'sya': 'saya', 'ak': 'aku', 'utk': 'untuk', 'tpi': 'tapi', 'tp': 'tapi', 'jd': 'jadi',
31
+ 'jg': 'juga', 'trs': 'terus', 'skrg': 'sekarang', 'bkin': 'bikin', 'dr': 'dari',
32
+ 'dn': 'dan', 'pke': 'pakai', 'gausah': 'tidak usah', 'ngga': 'tidak', 'bkn': 'bukan',
33
+ 'sdh': 'sudah', 'aja': 'saja', 'lg': 'lagi', 'mls': 'malas', 'gk': 'tidak',
34
+ 'knp': 'kenapa', 'krn': 'karena', 'gmn': 'bagaimana', 'gimana': 'bagaimana',
35
+ 'udah': 'sudah', 'sm': 'sama', 'gbs': 'tidak bisa', 'nggak': 'tidak', 'mantap': 'bagus',
36
+ 'cek': 'periksa', 'bansos': 'bantuan sosial'
37
+ }
38
+
39
+ def preprocess_text(text):
40
+ # 1. Cleaning: numbers, punctuation, extra spaces
41
+ text = re.sub(r'\d+', '', text)
42
+ text = re.sub(r'[^\w\s]', '', text)
43
+ text = re.sub(r'\s+', ' ', text).strip()
44
+
45
+ # 2. Case folding
46
+ text = text.lower()
47
+
48
+ # 3. Slang normalization
49
+ words = text.split()
50
+ normalized_words = [slang_dict.get(word, word) for word in words]
51
+ text = ' '.join(normalized_words)
52
+
53
+ # 4. Stopword removal
54
+ text = stopword_remover.remove(text)
55
+
56
+ # 5. Stemming
57
+ text = stemmer.stem(text)
58
+
59
+ return text
60
+
61
+ # --- 3. Prediction Function ---
62
+ def predict_sentiment(sentence):
63
+ # Preprocess the input sentence
64
+ processed_text = preprocess_text(sentence)
65
+
66
+ # Vectorize the text using the loaded TF-IDF vectorizer
67
+ text_vector = vectorizer.transform([processed_text])
68
+
69
+ # The model was trained with an additional 'thumbs_up_log_scaled' feature.
70
+ # Since we only have a sentence, we'll assume a neutral value (0) for this feature.
71
+ thumbs_up_feature = np.array([[0]])
72
+
73
+ # Combine the TF-IDF vector with the thumbs_up feature
74
+ # Note: hstack is used for sparse matrices
75
+ final_vector = np.hstack([text_vector.toarray(), thumbs_up_feature])
76
+
77
+ # Predict using the loaded model
78
+ prediction = model.predict(final_vector)
79
+
80
+ # Return the result
81
+ return prediction[0].capitalize()
82
+
83
+ # --- 4. Create Gradio Interface ---
84
+ iface = gr.Interface(
85
+ fn=predict_sentiment,
86
+ inputs=gr.Textbox(lines=3, placeholder="Masukkan kalimat ulasan dalam Bahasa Indonesia..."),
87
+ outputs="text",
88
+ title="Analisis Sentimen Ulasan Aplikasi",
89
+ description="Analisis sentimen untuk ulasan aplikasi 'Cek Bansos' menggunakan model SVM. Masukkan sebuah kalimat untuk memprediksi sentimennya (Positif, Negatif, atau Netral).",
90
+ examples=[
91
+ ["aplikasinya bagus sekali dan sangat membantu"],
92
+ ["tidak bisa daftar, gagal terus padahal sinyal bagus"],
93
+ ["aplikasi ini biasa saja, tidak ada yang spesial"]
94
+ ]
95
+ )
96
+
97
+ # --- 5. Launch the App ---
98
+ iface.launch()