fariedalfarizi commited on
Commit
4480a83
·
1 Parent(s): 897c408

Fix structure false positive - prevent 'assalamualaikum' detected as closing due to 'salam' substring

Browse files
Files changed (1) hide show
  1. app/services/structure.py +25 -8
app/services/structure.py CHANGED
@@ -108,19 +108,36 @@ class StructureService:
108
  if probs_closing > 0.8:
109
  predictions[i]['predicted_label'] = 'closing'
110
 
111
- # Rule 3: Detect keywords
112
- closing_keywords = ['demikian', 'terima kasih', 'sekian', 'akhir kata',
113
- 'wassalam', 'selamat pagi dan', 'sampai jumpa']
114
- opening_keywords = ['selamat pagi', 'selamat siang', 'assalamualaikum',
115
- 'hadirin', 'pertama-tama', 'izinkan saya']
 
 
 
 
 
 
 
 
 
 
116
 
117
  for pred in predictions:
118
  text_lower = pred['text'].lower()
119
 
120
- if any(kw in text_lower for kw in closing_keywords):
121
- pred['predicted_label'] = 'closing'
122
- elif any(kw in text_lower for kw in opening_keywords):
 
 
 
 
 
123
  pred['predicted_label'] = 'opening'
 
 
124
 
125
  return predictions
126
 
 
108
  if probs_closing > 0.8:
109
  predictions[i]['predicted_label'] = 'closing'
110
 
111
+ # Rule 3: Detect keywords dengan exact matching
112
+ # CLOSING: Hanya kalimat dengan kata kunci penutup yang JELAS
113
+ closing_keywords = [
114
+ 'demikian', 'terima kasih', 'terimakasih', 'sekian', 'akhir kata',
115
+ 'wassalamualaikum', 'wassalam', 'waalaikumsalam',
116
+ 'sampai jumpa', 'sampai bertemu', 'salam penutup'
117
+ ]
118
+
119
+ # OPENING: Kata kunci pembuka
120
+ opening_keywords = [
121
+ 'selamat pagi', 'selamat siang', 'selamat sore', 'selamat malam',
122
+ 'assalamualaikum', 'assalamu alaikum',
123
+ 'hadirin yang', 'bapak ibu', 'pertama-tama', 'izinkan saya',
124
+ 'perkenalkan', 'yang terhormat'
125
+ ]
126
 
127
  for pred in predictions:
128
  text_lower = pred['text'].lower()
129
 
130
+ # Check OPENING first (lebih prioritas untuk kalimat awal)
131
+ is_opening_keyword = any(kw in text_lower for kw in opening_keywords)
132
+
133
+ # Check CLOSING - tapi EXCLUDE jika ada opening keyword
134
+ # Ini prevent "assalamualaikum" salah dideteksi sebagai closing karena "salam"
135
+ is_closing_keyword = any(kw in text_lower for kw in closing_keywords)
136
+
137
+ if is_opening_keyword and not is_closing_keyword:
138
  pred['predicted_label'] = 'opening'
139
+ elif is_closing_keyword and not is_opening_keyword:
140
+ pred['predicted_label'] = 'closing'
141
 
142
  return predictions
143