Dyna-99 commited on
Commit
f24cd0e
·
verified ·
1 Parent(s): 27472a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -173
app.py CHANGED
@@ -1,174 +1,174 @@
1
- import json
2
- import random
3
- import pickle
4
- import numpy as np
5
- import re
6
- from flask import Flask, request, jsonify
7
- from sentence_transformers import SentenceTransformer
8
- from sklearn.metrics.pairwise import cosine_similarity
9
-
10
- class ImprovedBPJSChatbot:
11
- def __init__(self):
12
- self.load_models()
13
- self.load_intents()
14
-
15
- def load_models(self):
16
- """Load semua model yang diperlukan"""
17
- print("Memuat model dan konfigurasi...")
18
-
19
- # Load konfigurasi
20
- with open('model_config.pkl', 'rb') as f:
21
- config = pickle.load(f)
22
-
23
- # Load sentence transformer
24
- self.st_model = SentenceTransformer(config['model_name'])
25
- self.preprocessing_enabled = config['preprocessing_enabled']
26
-
27
- # Load classifier
28
- with open('svm_model.pkl', 'rb') as f:
29
- self.clf = pickle.load(f)
30
-
31
- # Load label encoder
32
- with open('label_encoder.pkl', 'rb') as f:
33
- self.label_encoder = pickle.load(f)
34
-
35
- print("Semua model berhasil dimuat!")
36
-
37
- def load_intents(self):
38
- """Load data intents untuk responses"""
39
- with open('intents.json', 'r', encoding='utf-8') as f:
40
- self.intents_data = json.load(f)
41
-
42
- self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
43
-
44
- # Buat embeddings untuk semua patterns (untuk similarity fallback)
45
- self.pattern_embeddings = []
46
- self.pattern_tags = []
47
-
48
- for intent in self.intents_data['intents']:
49
- for pattern in intent['patterns']:
50
- processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
51
- embedding = self.st_model.encode(processed_pattern)
52
- self.pattern_embeddings.append(embedding)
53
- self.pattern_tags.append(intent['tag'])
54
-
55
- self.pattern_embeddings = np.array(self.pattern_embeddings)
56
-
57
- def preprocess_text(self, text):
58
- """Preprocessing teks yang sama dengan training"""
59
- text = text.lower()
60
-
61
- # Normalisasi singkatan
62
- text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
63
- text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
64
- text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
65
- text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
66
- text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
67
-
68
- # Hapus karakter khusus
69
- text = re.sub(r'[^\w\s]', ' ', text)
70
- text = re.sub(r'\s+', ' ', text).strip()
71
-
72
- return text
73
-
74
- def get_prediction_confidence(self, msg_embedding):
75
- """Dapatkan prediksi dengan confidence score"""
76
- # Prediksi probabilitas
77
- probabilities = self.clf.predict_proba(msg_embedding)[0]
78
- max_prob = np.max(probabilities)
79
- predicted_class = np.argmax(probabilities)
80
- predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
81
-
82
- return predicted_tag, max_prob
83
-
84
- def similarity_fallback(self, msg_embedding, threshold=0.7):
85
- """Fallback menggunakan cosine similarity"""
86
- similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
87
- max_similarity_idx = np.argmax(similarities)
88
- max_similarity = similarities[max_similarity_idx]
89
-
90
- if max_similarity >= threshold:
91
- return self.pattern_tags[max_similarity_idx], max_similarity
92
-
93
- return 'fallback', max_similarity
94
-
95
- def get_contextual_response(self, tag, user_message):
96
- """Pilih response yang paling kontekstual"""
97
- responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
98
-
99
- # Jika hanya ada satu response, return langsung
100
- if len(responses) == 1:
101
- return responses[0]
102
-
103
- # Pilih response berdasarkan kata kunci dalam pesan user
104
- user_words = set(user_message.lower().split())
105
-
106
- best_response = responses[0]
107
- best_score = 0
108
-
109
- for response in responses:
110
- response_words = set(response.lower().split())
111
- # Hitung kesamaan kata
112
- common_words = user_words.intersection(response_words)
113
- score = len(common_words)
114
-
115
- if score > best_score:
116
- best_score = score
117
- best_response = response
118
-
119
- # Jika tidak ada yang cocok, pilih random
120
- if best_score == 0:
121
- return random.choice(responses)
122
-
123
- return best_response
124
-
125
- def generate_response(self, message):
126
- """Generate response dengan multiple strategies"""
127
- if not message.strip():
128
- return "Tolong kirim sebuah pesan."
129
-
130
- # Preprocessing
131
- processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
132
- msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
133
-
134
- # Strategy 1: SVM prediction dengan confidence
135
- predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
136
-
137
- # Strategy 2: Similarity fallback jika confidence rendah
138
- if confidence < 0.6: # Threshold bisa di-adjust
139
- fallback_tag, similarity = self.similarity_fallback(msg_embedding)
140
- if similarity > confidence:
141
- predicted_tag = fallback_tag
142
-
143
- # Strategy 3: Contextual response selection
144
- response = self.get_contextual_response(predicted_tag, message)
145
-
146
- # Logging untuk debugging
147
- print(f"Input: {message}")
148
- print(f"Processed: {processed_msg}")
149
- print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
150
-
151
- return response
152
-
153
- # Inisialisasi chatbot
154
- chatbot = ImprovedBPJSChatbot()
155
-
156
- # Flask app
157
- app = Flask(__name__)
158
-
159
- @app.route('/chat', methods=['POST'])
160
- def chat():
161
- try:
162
- msg = request.json.get("message", "").strip()
163
- response = chatbot.generate_response(msg)
164
- return jsonify({"reply": response})
165
- except Exception as e:
166
- print(f"Error: {e}")
167
- return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})
168
-
169
- @app.route('/health', methods=['GET'])
170
- def health():
171
- return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})
172
-
173
- if __name__ == '__main__':
174
  app.run(port=7860, debug=False) #ganti dari 5000 ke 7860
 
1
+ import json
2
+ import random
3
+ import pickle
4
+ import numpy as np
5
+ import re
6
+ from flask import Flask, request, jsonify
7
+ from sentence_transformers import SentenceTransformer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+
10
+ class ImprovedBPJSChatbot:
11
+ def __init__(self):
12
+ self.load_models()
13
+ self.load_intents()
14
+
15
+ def load_models(self):
16
+ """Load semua model yang diperlukan"""
17
+ print("Memuat model dan konfigurasi...")
18
+
19
+ # Load konfigurasi
20
+ with open('model_config.pkl', 'rb') as f:
21
+ config = pickle.load(f)
22
+
23
+ # Load sentence transformer
24
+ self.st_model = SentenceTransformer("./local_st_model")
25
+ self.preprocessing_enabled = config['preprocessing_enabled']
26
+
27
+ # Load classifier
28
+ with open('svm_model.pkl', 'rb') as f:
29
+ self.clf = pickle.load(f)
30
+
31
+ # Load label encoder
32
+ with open('label_encoder.pkl', 'rb') as f:
33
+ self.label_encoder = pickle.load(f)
34
+
35
+ print("Semua model berhasil dimuat!")
36
+
37
+ def load_intents(self):
38
+ """Load data intents untuk responses"""
39
+ with open('intents.json', 'r', encoding='utf-8') as f:
40
+ self.intents_data = json.load(f)
41
+
42
+ self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
43
+
44
+ # Buat embeddings untuk semua patterns (untuk similarity fallback)
45
+ self.pattern_embeddings = []
46
+ self.pattern_tags = []
47
+
48
+ for intent in self.intents_data['intents']:
49
+ for pattern in intent['patterns']:
50
+ processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
51
+ embedding = self.st_model.encode(processed_pattern)
52
+ self.pattern_embeddings.append(embedding)
53
+ self.pattern_tags.append(intent['tag'])
54
+
55
+ self.pattern_embeddings = np.array(self.pattern_embeddings)
56
+
57
+ def preprocess_text(self, text):
58
+ """Preprocessing teks yang sama dengan training"""
59
+ text = text.lower()
60
+
61
+ # Normalisasi singkatan
62
+ text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
63
+ text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
64
+ text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
65
+ text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
66
+ text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
67
+
68
+ # Hapus karakter khusus
69
+ text = re.sub(r'[^\w\s]', ' ', text)
70
+ text = re.sub(r'\s+', ' ', text).strip()
71
+
72
+ return text
73
+
74
+ def get_prediction_confidence(self, msg_embedding):
75
+ """Dapatkan prediksi dengan confidence score"""
76
+ # Prediksi probabilitas
77
+ probabilities = self.clf.predict_proba(msg_embedding)[0]
78
+ max_prob = np.max(probabilities)
79
+ predicted_class = np.argmax(probabilities)
80
+ predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
81
+
82
+ return predicted_tag, max_prob
83
+
84
+ def similarity_fallback(self, msg_embedding, threshold=0.7):
85
+ """Fallback menggunakan cosine similarity"""
86
+ similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
87
+ max_similarity_idx = np.argmax(similarities)
88
+ max_similarity = similarities[max_similarity_idx]
89
+
90
+ if max_similarity >= threshold:
91
+ return self.pattern_tags[max_similarity_idx], max_similarity
92
+
93
+ return 'fallback', max_similarity
94
+
95
+ def get_contextual_response(self, tag, user_message):
96
+ """Pilih response yang paling kontekstual"""
97
+ responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
98
+
99
+ # Jika hanya ada satu response, return langsung
100
+ if len(responses) == 1:
101
+ return responses[0]
102
+
103
+ # Pilih response berdasarkan kata kunci dalam pesan user
104
+ user_words = set(user_message.lower().split())
105
+
106
+ best_response = responses[0]
107
+ best_score = 0
108
+
109
+ for response in responses:
110
+ response_words = set(response.lower().split())
111
+ # Hitung kesamaan kata
112
+ common_words = user_words.intersection(response_words)
113
+ score = len(common_words)
114
+
115
+ if score > best_score:
116
+ best_score = score
117
+ best_response = response
118
+
119
+ # Jika tidak ada yang cocok, pilih random
120
+ if best_score == 0:
121
+ return random.choice(responses)
122
+
123
+ return best_response
124
+
125
+ def generate_response(self, message):
126
+ """Generate response dengan multiple strategies"""
127
+ if not message.strip():
128
+ return "Tolong kirim sebuah pesan."
129
+
130
+ # Preprocessing
131
+ processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
132
+ msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
133
+
134
+ # Strategy 1: SVM prediction dengan confidence
135
+ predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
136
+
137
+ # Strategy 2: Similarity fallback jika confidence rendah
138
+ if confidence < 0.6: # Threshold bisa di-adjust
139
+ fallback_tag, similarity = self.similarity_fallback(msg_embedding)
140
+ if similarity > confidence:
141
+ predicted_tag = fallback_tag
142
+
143
+ # Strategy 3: Contextual response selection
144
+ response = self.get_contextual_response(predicted_tag, message)
145
+
146
+ # Logging untuk debugging
147
+ print(f"Input: {message}")
148
+ print(f"Processed: {processed_msg}")
149
+ print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
150
+
151
+ return response
152
+
153
+ # Inisialisasi chatbot
154
+ chatbot = ImprovedBPJSChatbot()
155
+
156
+ # Flask app
157
+ app = Flask(__name__)
158
+
159
+ @app.route('/chat', methods=['POST'])
160
+ def chat():
161
+ try:
162
+ msg = request.json.get("message", "").strip()
163
+ response = chatbot.generate_response(msg)
164
+ return jsonify({"reply": response})
165
+ except Exception as e:
166
+ print(f"Error: {e}")
167
+ return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})
168
+
169
+ @app.route('/health', methods=['GET'])
170
+ def health():
171
+ return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})
172
+
173
+ if __name__ == '__main__':
174
  app.run(port=7860, debug=False) #ganti dari 5000 ke 7860