Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,174 +1,174 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import random
|
| 3 |
-
import pickle
|
| 4 |
-
import numpy as np
|
| 5 |
-
import re
|
| 6 |
-
from flask import Flask, request, jsonify
|
| 7 |
-
from sentence_transformers import SentenceTransformer
|
| 8 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
-
|
| 10 |
-
class ImprovedBPJSChatbot:
|
| 11 |
-
def __init__(self):
|
| 12 |
-
self.load_models()
|
| 13 |
-
self.load_intents()
|
| 14 |
-
|
| 15 |
-
def load_models(self):
|
| 16 |
-
"""Load semua model yang diperlukan"""
|
| 17 |
-
print("Memuat model dan konfigurasi...")
|
| 18 |
-
|
| 19 |
-
# Load konfigurasi
|
| 20 |
-
with open('model_config.pkl', 'rb') as f:
|
| 21 |
-
config = pickle.load(f)
|
| 22 |
-
|
| 23 |
-
# Load sentence transformer
|
| 24 |
-
self.st_model = SentenceTransformer(
|
| 25 |
-
self.preprocessing_enabled = config['preprocessing_enabled']
|
| 26 |
-
|
| 27 |
-
# Load classifier
|
| 28 |
-
with open('svm_model.pkl', 'rb') as f:
|
| 29 |
-
self.clf = pickle.load(f)
|
| 30 |
-
|
| 31 |
-
# Load label encoder
|
| 32 |
-
with open('label_encoder.pkl', 'rb') as f:
|
| 33 |
-
self.label_encoder = pickle.load(f)
|
| 34 |
-
|
| 35 |
-
print("Semua model berhasil dimuat!")
|
| 36 |
-
|
| 37 |
-
def load_intents(self):
|
| 38 |
-
"""Load data intents untuk responses"""
|
| 39 |
-
with open('intents.json', 'r', encoding='utf-8') as f:
|
| 40 |
-
self.intents_data = json.load(f)
|
| 41 |
-
|
| 42 |
-
self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
|
| 43 |
-
|
| 44 |
-
# Buat embeddings untuk semua patterns (untuk similarity fallback)
|
| 45 |
-
self.pattern_embeddings = []
|
| 46 |
-
self.pattern_tags = []
|
| 47 |
-
|
| 48 |
-
for intent in self.intents_data['intents']:
|
| 49 |
-
for pattern in intent['patterns']:
|
| 50 |
-
processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
|
| 51 |
-
embedding = self.st_model.encode(processed_pattern)
|
| 52 |
-
self.pattern_embeddings.append(embedding)
|
| 53 |
-
self.pattern_tags.append(intent['tag'])
|
| 54 |
-
|
| 55 |
-
self.pattern_embeddings = np.array(self.pattern_embeddings)
|
| 56 |
-
|
| 57 |
-
def preprocess_text(self, text):
|
| 58 |
-
"""Preprocessing teks yang sama dengan training"""
|
| 59 |
-
text = text.lower()
|
| 60 |
-
|
| 61 |
-
# Normalisasi singkatan
|
| 62 |
-
text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
|
| 63 |
-
text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
|
| 64 |
-
text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
|
| 65 |
-
text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
|
| 66 |
-
text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
|
| 67 |
-
|
| 68 |
-
# Hapus karakter khusus
|
| 69 |
-
text = re.sub(r'[^\w\s]', ' ', text)
|
| 70 |
-
text = re.sub(r'\s+', ' ', text).strip()
|
| 71 |
-
|
| 72 |
-
return text
|
| 73 |
-
|
| 74 |
-
def get_prediction_confidence(self, msg_embedding):
|
| 75 |
-
"""Dapatkan prediksi dengan confidence score"""
|
| 76 |
-
# Prediksi probabilitas
|
| 77 |
-
probabilities = self.clf.predict_proba(msg_embedding)[0]
|
| 78 |
-
max_prob = np.max(probabilities)
|
| 79 |
-
predicted_class = np.argmax(probabilities)
|
| 80 |
-
predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
|
| 81 |
-
|
| 82 |
-
return predicted_tag, max_prob
|
| 83 |
-
|
| 84 |
-
def similarity_fallback(self, msg_embedding, threshold=0.7):
|
| 85 |
-
"""Fallback menggunakan cosine similarity"""
|
| 86 |
-
similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
|
| 87 |
-
max_similarity_idx = np.argmax(similarities)
|
| 88 |
-
max_similarity = similarities[max_similarity_idx]
|
| 89 |
-
|
| 90 |
-
if max_similarity >= threshold:
|
| 91 |
-
return self.pattern_tags[max_similarity_idx], max_similarity
|
| 92 |
-
|
| 93 |
-
return 'fallback', max_similarity
|
| 94 |
-
|
| 95 |
-
def get_contextual_response(self, tag, user_message):
|
| 96 |
-
"""Pilih response yang paling kontekstual"""
|
| 97 |
-
responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
|
| 98 |
-
|
| 99 |
-
# Jika hanya ada satu response, return langsung
|
| 100 |
-
if len(responses) == 1:
|
| 101 |
-
return responses[0]
|
| 102 |
-
|
| 103 |
-
# Pilih response berdasarkan kata kunci dalam pesan user
|
| 104 |
-
user_words = set(user_message.lower().split())
|
| 105 |
-
|
| 106 |
-
best_response = responses[0]
|
| 107 |
-
best_score = 0
|
| 108 |
-
|
| 109 |
-
for response in responses:
|
| 110 |
-
response_words = set(response.lower().split())
|
| 111 |
-
# Hitung kesamaan kata
|
| 112 |
-
common_words = user_words.intersection(response_words)
|
| 113 |
-
score = len(common_words)
|
| 114 |
-
|
| 115 |
-
if score > best_score:
|
| 116 |
-
best_score = score
|
| 117 |
-
best_response = response
|
| 118 |
-
|
| 119 |
-
# Jika tidak ada yang cocok, pilih random
|
| 120 |
-
if best_score == 0:
|
| 121 |
-
return random.choice(responses)
|
| 122 |
-
|
| 123 |
-
return best_response
|
| 124 |
-
|
| 125 |
-
def generate_response(self, message):
|
| 126 |
-
"""Generate response dengan multiple strategies"""
|
| 127 |
-
if not message.strip():
|
| 128 |
-
return "Tolong kirim sebuah pesan."
|
| 129 |
-
|
| 130 |
-
# Preprocessing
|
| 131 |
-
processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
|
| 132 |
-
msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
|
| 133 |
-
|
| 134 |
-
# Strategy 1: SVM prediction dengan confidence
|
| 135 |
-
predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
|
| 136 |
-
|
| 137 |
-
# Strategy 2: Similarity fallback jika confidence rendah
|
| 138 |
-
if confidence < 0.6: # Threshold bisa di-adjust
|
| 139 |
-
fallback_tag, similarity = self.similarity_fallback(msg_embedding)
|
| 140 |
-
if similarity > confidence:
|
| 141 |
-
predicted_tag = fallback_tag
|
| 142 |
-
|
| 143 |
-
# Strategy 3: Contextual response selection
|
| 144 |
-
response = self.get_contextual_response(predicted_tag, message)
|
| 145 |
-
|
| 146 |
-
# Logging untuk debugging
|
| 147 |
-
print(f"Input: {message}")
|
| 148 |
-
print(f"Processed: {processed_msg}")
|
| 149 |
-
print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
|
| 150 |
-
|
| 151 |
-
return response
|
| 152 |
-
|
| 153 |
-
# Inisialisasi chatbot
|
| 154 |
-
chatbot = ImprovedBPJSChatbot()
|
| 155 |
-
|
| 156 |
-
# Flask app
|
| 157 |
-
app = Flask(__name__)
|
| 158 |
-
|
| 159 |
-
@app.route('/chat', methods=['POST'])
|
| 160 |
-
def chat():
|
| 161 |
-
try:
|
| 162 |
-
msg = request.json.get("message", "").strip()
|
| 163 |
-
response = chatbot.generate_response(msg)
|
| 164 |
-
return jsonify({"reply": response})
|
| 165 |
-
except Exception as e:
|
| 166 |
-
print(f"Error: {e}")
|
| 167 |
-
return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})
|
| 168 |
-
|
| 169 |
-
@app.route('/health', methods=['GET'])
|
| 170 |
-
def health():
|
| 171 |
-
return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})
|
| 172 |
-
|
| 173 |
-
if __name__ == '__main__':
|
| 174 |
app.run(port=7860, debug=False) #ganti dari 5000 ke 7860
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import random
|
| 3 |
+
import pickle
|
| 4 |
+
import numpy as np
|
| 5 |
+
import re
|
| 6 |
+
from flask import Flask, request, jsonify
|
| 7 |
+
from sentence_transformers import SentenceTransformer
|
| 8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
+
|
| 10 |
+
class ImprovedBPJSChatbot:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.load_models()
|
| 13 |
+
self.load_intents()
|
| 14 |
+
|
| 15 |
+
def load_models(self):
|
| 16 |
+
"""Load semua model yang diperlukan"""
|
| 17 |
+
print("Memuat model dan konfigurasi...")
|
| 18 |
+
|
| 19 |
+
# Load konfigurasi
|
| 20 |
+
with open('model_config.pkl', 'rb') as f:
|
| 21 |
+
config = pickle.load(f)
|
| 22 |
+
|
| 23 |
+
# Load sentence transformer
|
| 24 |
+
self.st_model = SentenceTransformer("./local_st_model")
|
| 25 |
+
self.preprocessing_enabled = config['preprocessing_enabled']
|
| 26 |
+
|
| 27 |
+
# Load classifier
|
| 28 |
+
with open('svm_model.pkl', 'rb') as f:
|
| 29 |
+
self.clf = pickle.load(f)
|
| 30 |
+
|
| 31 |
+
# Load label encoder
|
| 32 |
+
with open('label_encoder.pkl', 'rb') as f:
|
| 33 |
+
self.label_encoder = pickle.load(f)
|
| 34 |
+
|
| 35 |
+
print("Semua model berhasil dimuat!")
|
| 36 |
+
|
| 37 |
+
def load_intents(self):
|
| 38 |
+
"""Load data intents untuk responses"""
|
| 39 |
+
with open('intents.json', 'r', encoding='utf-8') as f:
|
| 40 |
+
self.intents_data = json.load(f)
|
| 41 |
+
|
| 42 |
+
self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
|
| 43 |
+
|
| 44 |
+
# Buat embeddings untuk semua patterns (untuk similarity fallback)
|
| 45 |
+
self.pattern_embeddings = []
|
| 46 |
+
self.pattern_tags = []
|
| 47 |
+
|
| 48 |
+
for intent in self.intents_data['intents']:
|
| 49 |
+
for pattern in intent['patterns']:
|
| 50 |
+
processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
|
| 51 |
+
embedding = self.st_model.encode(processed_pattern)
|
| 52 |
+
self.pattern_embeddings.append(embedding)
|
| 53 |
+
self.pattern_tags.append(intent['tag'])
|
| 54 |
+
|
| 55 |
+
self.pattern_embeddings = np.array(self.pattern_embeddings)
|
| 56 |
+
|
| 57 |
+
def preprocess_text(self, text):
|
| 58 |
+
"""Preprocessing teks yang sama dengan training"""
|
| 59 |
+
text = text.lower()
|
| 60 |
+
|
| 61 |
+
# Normalisasi singkatan
|
| 62 |
+
text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
|
| 63 |
+
text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
|
| 64 |
+
text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
|
| 65 |
+
text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
|
| 66 |
+
text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
|
| 67 |
+
|
| 68 |
+
# Hapus karakter khusus
|
| 69 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
| 70 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 71 |
+
|
| 72 |
+
return text
|
| 73 |
+
|
| 74 |
+
def get_prediction_confidence(self, msg_embedding):
|
| 75 |
+
"""Dapatkan prediksi dengan confidence score"""
|
| 76 |
+
# Prediksi probabilitas
|
| 77 |
+
probabilities = self.clf.predict_proba(msg_embedding)[0]
|
| 78 |
+
max_prob = np.max(probabilities)
|
| 79 |
+
predicted_class = np.argmax(probabilities)
|
| 80 |
+
predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
|
| 81 |
+
|
| 82 |
+
return predicted_tag, max_prob
|
| 83 |
+
|
| 84 |
+
def similarity_fallback(self, msg_embedding, threshold=0.7):
|
| 85 |
+
"""Fallback menggunakan cosine similarity"""
|
| 86 |
+
similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
|
| 87 |
+
max_similarity_idx = np.argmax(similarities)
|
| 88 |
+
max_similarity = similarities[max_similarity_idx]
|
| 89 |
+
|
| 90 |
+
if max_similarity >= threshold:
|
| 91 |
+
return self.pattern_tags[max_similarity_idx], max_similarity
|
| 92 |
+
|
| 93 |
+
return 'fallback', max_similarity
|
| 94 |
+
|
| 95 |
+
def get_contextual_response(self, tag, user_message):
|
| 96 |
+
"""Pilih response yang paling kontekstual"""
|
| 97 |
+
responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
|
| 98 |
+
|
| 99 |
+
# Jika hanya ada satu response, return langsung
|
| 100 |
+
if len(responses) == 1:
|
| 101 |
+
return responses[0]
|
| 102 |
+
|
| 103 |
+
# Pilih response berdasarkan kata kunci dalam pesan user
|
| 104 |
+
user_words = set(user_message.lower().split())
|
| 105 |
+
|
| 106 |
+
best_response = responses[0]
|
| 107 |
+
best_score = 0
|
| 108 |
+
|
| 109 |
+
for response in responses:
|
| 110 |
+
response_words = set(response.lower().split())
|
| 111 |
+
# Hitung kesamaan kata
|
| 112 |
+
common_words = user_words.intersection(response_words)
|
| 113 |
+
score = len(common_words)
|
| 114 |
+
|
| 115 |
+
if score > best_score:
|
| 116 |
+
best_score = score
|
| 117 |
+
best_response = response
|
| 118 |
+
|
| 119 |
+
# Jika tidak ada yang cocok, pilih random
|
| 120 |
+
if best_score == 0:
|
| 121 |
+
return random.choice(responses)
|
| 122 |
+
|
| 123 |
+
return best_response
|
| 124 |
+
|
| 125 |
+
def generate_response(self, message):
|
| 126 |
+
"""Generate response dengan multiple strategies"""
|
| 127 |
+
if not message.strip():
|
| 128 |
+
return "Tolong kirim sebuah pesan."
|
| 129 |
+
|
| 130 |
+
# Preprocessing
|
| 131 |
+
processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
|
| 132 |
+
msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
|
| 133 |
+
|
| 134 |
+
# Strategy 1: SVM prediction dengan confidence
|
| 135 |
+
predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
|
| 136 |
+
|
| 137 |
+
# Strategy 2: Similarity fallback jika confidence rendah
|
| 138 |
+
if confidence < 0.6: # Threshold bisa di-adjust
|
| 139 |
+
fallback_tag, similarity = self.similarity_fallback(msg_embedding)
|
| 140 |
+
if similarity > confidence:
|
| 141 |
+
predicted_tag = fallback_tag
|
| 142 |
+
|
| 143 |
+
# Strategy 3: Contextual response selection
|
| 144 |
+
response = self.get_contextual_response(predicted_tag, message)
|
| 145 |
+
|
| 146 |
+
# Logging untuk debugging
|
| 147 |
+
print(f"Input: {message}")
|
| 148 |
+
print(f"Processed: {processed_msg}")
|
| 149 |
+
print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
|
| 150 |
+
|
| 151 |
+
return response
|
| 152 |
+
|
| 153 |
+
# Inisialisasi chatbot
|
| 154 |
+
chatbot = ImprovedBPJSChatbot()
|
| 155 |
+
|
| 156 |
+
# Flask app
|
| 157 |
+
app = Flask(__name__)
|
| 158 |
+
|
| 159 |
+
@app.route('/chat', methods=['POST'])
|
| 160 |
+
def chat():
|
| 161 |
+
try:
|
| 162 |
+
msg = request.json.get("message", "").strip()
|
| 163 |
+
response = chatbot.generate_response(msg)
|
| 164 |
+
return jsonify({"reply": response})
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"Error: {e}")
|
| 167 |
+
return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})
|
| 168 |
+
|
| 169 |
+
@app.route('/health', methods=['GET'])
|
| 170 |
+
def health():
|
| 171 |
+
return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})
|
| 172 |
+
|
| 173 |
+
if __name__ == '__main__':
|
| 174 |
app.run(port=7860, debug=False) #ganti dari 5000 ke 7860
|