Zepero commited on
Commit
ae5dd4c
·
verified ·
1 Parent(s): 835d26c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -319
app.py CHANGED
@@ -1,319 +1,182 @@
1
- import json
2
- import random
3
- import pickle
4
- import numpy as np
5
- import re
6
- import requests
7
-
8
- from flask import Flask, request, jsonify
9
- from sentence_transformers import SentenceTransformer
10
- from sklearn.metrics.pairwise import cosine_similarity
11
-
12
- class ImprovedBPJSChatbot:
13
- def __init__(self):
14
- self.load_models()
15
- self.load_intents()
16
-
17
- def load_models(self):
18
- """Load semua model yang diperlukan"""
19
- print("Memuat model dan konfigurasi...")
20
-
21
- # Load konfigurasi
22
- with open('model_config.pkl', 'rb') as f:
23
- config = pickle.load(f)
24
-
25
- # Load sentence transformer
26
- self.st_model = SentenceTransformer(config['model_name'])
27
- self.preprocessing_enabled = config['preprocessing_enabled']
28
-
29
- # Load classifier
30
- with open('svm_model.pkl', 'rb') as f:
31
- self.clf = pickle.load(f)
32
-
33
- # Load label encoder
34
- with open('label_encoder.pkl', 'rb') as f:
35
- self.label_encoder = pickle.load(f)
36
-
37
- print("Semua model berhasil dimuat!")
38
-
39
- def load_intents(self):
40
- """Load data intents untuk responses"""
41
- with open('intents.json', 'r', encoding='utf-8') as f:
42
- self.intents_data = json.load(f)
43
-
44
- self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
45
-
46
- # Buat embeddings untuk semua patterns (untuk similarity fallback)
47
- self.pattern_embeddings = []
48
- self.pattern_tags = []
49
-
50
- for intent in self.intents_data['intents']:
51
- for pattern in intent['patterns']:
52
- processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
53
- embedding = self.st_model.encode(processed_pattern)
54
- self.pattern_embeddings.append(embedding)
55
- self.pattern_tags.append(intent['tag'])
56
-
57
- self.pattern_embeddings = np.array(self.pattern_embeddings)
58
-
59
- def preprocess_text(self, text):
60
- """Preprocessing teks yang sama dengan training"""
61
- text = text.lower()
62
-
63
- # Normalisasi singkatan
64
- text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
65
- text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
66
- text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
67
- text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
68
- text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
69
-
70
- # Hapus karakter khusus
71
- text = re.sub(r'[^\w\s]', ' ', text)
72
- text = re.sub(r'\s+', ' ', text).strip()
73
-
74
- return text
75
-
76
- def get_prediction_confidence(self, msg_embedding):
77
- """Dapatkan prediksi dengan confidence score"""
78
- # Prediksi probabilitas
79
- probabilities = self.clf.predict_proba(msg_embedding)[0]
80
- max_prob = np.max(probabilities)
81
- predicted_class = np.argmax(probabilities)
82
- predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
83
-
84
- return predicted_tag, max_prob
85
-
86
- def similarity_fallback(self, msg_embedding, threshold=0.7):
87
- """Fallback menggunakan cosine similarity"""
88
- similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
89
- max_similarity_idx = np.argmax(similarities)
90
- max_similarity = similarities[max_similarity_idx]
91
-
92
- if max_similarity >= threshold:
93
- return self.pattern_tags[max_similarity_idx], max_similarity
94
-
95
- return 'fallback', max_similarity
96
-
97
- def get_contextual_response(self, tag, user_message):
98
- """Pilih response yang paling kontekstual"""
99
- responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
100
-
101
- # Jika hanya ada satu response, return langsung
102
- if len(responses) == 1:
103
- return responses[0]
104
-
105
- # Pilih response berdasarkan kata kunci dalam pesan user
106
- user_words = set(user_message.lower().split())
107
-
108
- best_response = responses[0]
109
- best_score = 0
110
-
111
- for response in responses:
112
- response_words = set(response.lower().split())
113
- # Hitung kesamaan kata
114
- common_words = user_words.intersection(response_words)
115
- score = len(common_words)
116
-
117
- if score > best_score:
118
- best_score = score
119
- best_response = response
120
-
121
- # Jika tidak ada yang cocok, pilih random
122
- if best_score == 0:
123
- return random.choice(responses)
124
-
125
- return best_response
126
-
127
- def generate_response(self, message):
128
- """Generate response dengan multiple strategies"""
129
- if not message.strip():
130
- return "Tolong kirim sebuah pesan."
131
-
132
- # Preprocessing
133
- processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
134
- msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
135
-
136
- # Strategy 1: SVM prediction dengan confidence
137
- predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
138
-
139
- # Strategy 2: Similarity fallback jika confidence rendah
140
- if confidence < 0.6: # Threshold bisa di-adjust
141
- fallback_tag, similarity = self.similarity_fallback(msg_embedding)
142
- if similarity > confidence:
143
- predicted_tag = fallback_tag
144
-
145
- # Strategy 3: Contextual response selection
146
- response = self.get_contextual_response(predicted_tag, message)
147
-
148
- # Logging untuk debugging
149
- print(f"Input: {message}")
150
- print(f"Processed: {processed_msg}")
151
- print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
152
-
153
- return response
154
-
155
- # Inisialisasi chatbot
156
- chatbot = ImprovedBPJSChatbot()
157
-
158
- # Flask app
159
- app = Flask(__name__)
160
-
161
- @app.route('/chat', methods=['POST'])
162
- def chat():
163
- try:
164
- msg = request.json.get("message", "").strip()
165
- response = chatbot.generate_response(msg)
166
- return jsonify({"reply": response})
167
- except Exception as e:
168
- print(f"Error: {e}")
169
- return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})
170
-
171
- @app.route('/health', methods=['GET'])
172
- def health():
173
- return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})
174
-
175
- if __name__ == '__main__':
176
- app.run(port=7860, debug=False) #ganti dari 5000 ke 7860
177
-
178
- # import json
179
- # import random
180
- # import pickle
181
- # import numpy as np
182
- # import re
183
- # import requests
184
-
185
- # from flask import Flask, request, jsonify
186
- # from sentence_transformers import SentenceTransformer
187
- # from sklearn.metrics.pairwise import cosine_similarity
188
-
189
- # DASHBOARD_UPDATE_URL = 'http://localhost:3000/api/update_status' # Ganti ke URL dashboard kamu
190
-
191
- # class ImprovedBPJSChatbot:
192
- # def __init__(self):
193
- # self.load_models()
194
- # self.load_intents()
195
-
196
- # def load_models(self):
197
- # print("Memuat model dan konfigurasi...")
198
- # with open('model_config.pkl', 'rb') as f:
199
- # config = pickle.load(f)
200
- # self.st_model = SentenceTransformer(config['model_name'])
201
- # self.preprocessing_enabled = config['preprocessing_enabled']
202
- # with open('svm_model.pkl', 'rb') as f:
203
- # self.clf = pickle.load(f)
204
- # with open('label_encoder.pkl', 'rb') as f:
205
- # self.label_encoder = pickle.load(f)
206
- # print("Semua model berhasil dimuat!")
207
-
208
- # def load_intents(self):
209
- # with open('intents.json', 'r', encoding='utf-8') as f:
210
- # self.intents_data = json.load(f)
211
- # self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
212
- # self.pattern_embeddings = []
213
- # self.pattern_tags = []
214
- # for intent in self.intents_data['intents']:
215
- # for pattern in intent['patterns']:
216
- # processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
217
- # embedding = self.st_model.encode(processed_pattern)
218
- # self.pattern_embeddings.append(embedding)
219
- # self.pattern_tags.append(intent['tag'])
220
- # self.pattern_embeddings = np.array(self.pattern_embeddings)
221
-
222
- # def preprocess_text(self, text):
223
- # text = text.lower()
224
- # text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
225
- # text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
226
- # text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
227
- # text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
228
- # text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
229
- # text = re.sub(r'[^\w\s]', ' ', text)
230
- # text = re.sub(r'\s+', ' ', text).strip()
231
- # return text
232
-
233
- # def get_prediction_confidence(self, msg_embedding):
234
- # probabilities = self.clf.predict_proba(msg_embedding)[0]
235
- # max_prob = np.max(probabilities)
236
- # predicted_class = np.argmax(probabilities)
237
- # predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
238
- # return predicted_tag, max_prob
239
-
240
- # def similarity_fallback(self, msg_embedding, threshold=0.7):
241
- # similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
242
- # max_similarity_idx = np.argmax(similarities)
243
- # max_similarity = similarities[max_similarity_idx]
244
- # if max_similarity >= threshold:
245
- # return self.pattern_tags[max_similarity_idx], max_similarity
246
- # return 'fallback', max_similarity
247
-
248
- # def get_contextual_response(self, tag, user_message):
249
- # responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
250
- # if len(responses) == 1:
251
- # return responses[0]
252
- # user_words = set(user_message.lower().split())
253
- # best_response = responses[0]
254
- # best_score = 0
255
- # for response in responses:
256
- # response_words = set(response.lower().split())
257
- # score = len(user_words.intersection(response_words))
258
- # if score > best_score:
259
- # best_score = score
260
- # best_response = response
261
- # return best_response if best_score > 0 else random.choice(responses)
262
-
263
- # def trigger_dashboard_update(self, user_message):
264
- # """Kirim request ke dashboard untuk update status"""
265
- # try:
266
- # payload = {
267
- # "status": "selesai",
268
- # "message": user_message
269
- # }
270
- # response = requests.post(DASHBOARD_UPDATE_URL, json=payload)
271
- # print(f"[Dashboard] Update status berhasil: {response.status_code}")
272
- # except Exception as e:
273
- # print(f"[Dashboard] Gagal update status: {e}")
274
-
275
- # def generate_response(self, message):
276
- # if not message.strip():
277
- # return "Tolong kirim sebuah pesan."
278
-
279
- # processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
280
- # msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
281
- # predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
282
-
283
- # if confidence < 0.6:
284
- # fallback_tag, similarity = self.similarity_fallback(msg_embedding)
285
- # if similarity > confidence:
286
- # predicted_tag = fallback_tag
287
-
288
- # # 🔔 Trigger dashboard jika tag = pembayaran_selesai
289
- # if predicted_tag == "pembayaran_selesai":
290
- # self.trigger_dashboard_update(message)
291
-
292
- # response = self.get_contextual_response(predicted_tag, message)
293
-
294
- # print(f"Input: {message}")
295
- # print(f"Processed: {processed_msg}")
296
- # print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
297
-
298
- # return response
299
-
300
- # # Flask app setup
301
- # chatbot = ImprovedBPJSChatbot()
302
- # app = Flask(__name__)
303
-
304
- # @app.route('/chat', methods=['POST'])
305
- # def chat():
306
- # try:
307
- # msg = request.json.get("message", "").strip()
308
- # response = chatbot.generate_response(msg)
309
- # return jsonify({"reply": response})
310
- # except Exception as e:
311
- # print(f"Error: {e}")
312
- # return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})
313
-
314
- # @app.route('/health', methods=['GET'])
315
- # def health():
316
- # return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})
317
-
318
- # if __name__ == '__main__':
319
- # app.run(port=7860, debug=False)
 
1
+ import json
2
+ import random
3
+ import pickle
4
+ import numpy as np
5
+ import re
6
+ from flask import Flask, request, jsonify
7
+ from sentence_transformers import SentenceTransformer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+ # import os
10
+
11
+ # os.environ['HF_HOME'] = '/tmp/huggingface'
12
+ # os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface/transformers'
13
+ # os.environ['HF_DATASETS_CACHE'] = '/tmp/huggingface/datasets'
14
+ # os.environ['HF_METRICS_CACHE'] = '/tmp/huggingface/metrics'
15
+
16
+
17
+ class ImprovedBPJSChatbot:
18
+ def __init__(self):
19
+ self.load_models()
20
+ self.load_intents()
21
+
22
+ def load_models(self):
23
+ """Load semua model yang diperlukan"""
24
+ print("Memuat model dan konfigurasi...")
25
+
26
+ # Load konfigurasi
27
+ with open('model_config.pkl', 'rb') as f:
28
+ config = pickle.load(f)
29
+
30
+ # Load sentence transformer
31
+ self.st_model = SentenceTransformer("Dyna-99/local-st-model")
32
+ self.preprocessing_enabled = config['preprocessing_enabled']
33
+
34
+ # Load classifier
35
+ with open('svm_model.pkl', 'rb') as f:
36
+ self.clf = pickle.load(f)
37
+
38
+ # Load label encoder
39
+ with open('label_encoder.pkl', 'rb') as f:
40
+ self.label_encoder = pickle.load(f)
41
+
42
+ print("Semua model berhasil dimuat!")
43
+
44
+ def load_intents(self):
45
+ """Load data intents untuk responses"""
46
+ with open('intents.json', 'r', encoding='utf-8') as f:
47
+ self.intents_data = json.load(f)
48
+
49
+ self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
50
+
51
+ # Buat embeddings untuk semua patterns (untuk similarity fallback)
52
+ self.pattern_embeddings = []
53
+ self.pattern_tags = []
54
+
55
+ for intent in self.intents_data['intents']:
56
+ for pattern in intent['patterns']:
57
+ processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
58
+ embedding = self.st_model.encode(processed_pattern)
59
+ self.pattern_embeddings.append(embedding)
60
+ self.pattern_tags.append(intent['tag'])
61
+
62
+ self.pattern_embeddings = np.array(self.pattern_embeddings)
63
+
64
+ def preprocess_text(self, text):
65
+ """Preprocessing teks yang sama dengan training"""
66
+ text = text.lower()
67
+
68
+ # Normalisasi singkatan
69
+ text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
70
+ text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
71
+ text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
72
+ text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
73
+ text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
74
+
75
+ # Hapus karakter khusus
76
+ text = re.sub(r'[^\w\s]', ' ', text)
77
+ text = re.sub(r'\s+', ' ', text).strip()
78
+
79
+ return text
80
+
81
+ def get_prediction_confidence(self, msg_embedding):
82
+ """Dapatkan prediksi dengan confidence score"""
83
+ # Prediksi probabilitas
84
+ probabilities = self.clf.predict_proba(msg_embedding)[0]
85
+ max_prob = np.max(probabilities)
86
+ predicted_class = np.argmax(probabilities)
87
+ predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
88
+
89
+ return predicted_tag, max_prob
90
+
91
+ def similarity_fallback(self, msg_embedding, threshold=0.7):
92
+ """Fallback menggunakan cosine similarity"""
93
+ similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
94
+ max_similarity_idx = np.argmax(similarities)
95
+ max_similarity = similarities[max_similarity_idx]
96
+
97
+ if max_similarity >= threshold:
98
+ return self.pattern_tags[max_similarity_idx], max_similarity
99
+
100
+ return 'fallback', max_similarity
101
+
102
+ def get_contextual_response(self, tag, user_message):
103
+ """Pilih response yang paling kontekstual"""
104
+ responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
105
+
106
+ # Jika hanya ada satu response, return langsung
107
+ if len(responses) == 1:
108
+ return responses[0]
109
+
110
+ # Pilih response berdasarkan kata kunci dalam pesan user
111
+ user_words = set(user_message.lower().split())
112
+
113
+ best_response = responses[0]
114
+ best_score = 0
115
+
116
+ for response in responses:
117
+ response_words = set(response.lower().split())
118
+ # Hitung kesamaan kata
119
+ common_words = user_words.intersection(response_words)
120
+ score = len(common_words)
121
+
122
+ if score > best_score:
123
+ best_score = score
124
+ best_response = response
125
+
126
+ # Jika tidak ada yang cocok, pilih random
127
+ if best_score == 0:
128
+ return random.choice(responses)
129
+
130
+ return best_response
131
+
132
+ def generate_response(self, message):
133
+ """Generate response dengan multiple strategies"""
134
+ if not message.strip():
135
+ return "Tolong kirim sebuah pesan."
136
+
137
+ # Preprocessing
138
+ processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
139
+ msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
140
+
141
+ # Strategy 1: SVM prediction dengan confidence
142
+ predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
143
+
144
+ # Strategy 2: Similarity fallback jika confidence rendah
145
+ if confidence < 0.6: # Threshold bisa di-adjust
146
+ fallback_tag, similarity = self.similarity_fallback(msg_embedding)
147
+ if similarity > confidence:
148
+ predicted_tag = fallback_tag
149
+
150
+ # Strategy 3: Contextual response selection
151
+ response = self.get_contextual_response(predicted_tag, message)
152
+
153
+ # Logging untuk debugging
154
+ print(f"Input: {message}")
155
+ print(f"Processed: {processed_msg}")
156
+ print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
157
+
158
+ return response
159
+
160
+ # Inisialisasi chatbot
161
+ chatbot = ImprovedBPJSChatbot()
162
+
163
+ # Flask app
164
+ app = Flask(__name__)
165
+
166
+ @app.route('/chat', methods=['POST'])
167
+ def chat():
168
+ try:
169
+ msg = request.json.get("message", "").strip()
170
+ response = chatbot.generate_response(msg)
171
+ return jsonify({"reply": response})
172
+ except Exception as e:
173
+ print(f"Error: {e}")
174
+ return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})
175
+
176
+ @app.route('/health', methods=['GET'])
177
+ def health():
178
+ return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})
179
+
180
+ if __name__ == '__main__':
181
+ app.run(host='0.0.0.0',port=7860, debug=False) #ganti dari 5000 ke 7860
182
+