fariedalfarizi commited on
Commit
e05726c
·
1 Parent(s): 4480a83

Fix structure detection - require keywords for opening/closing rules, prevent false positive from blind confidence trust

Browse files
Files changed (1) hide show
  1. app/services/structure.py +18 -17
app/services/structure.py CHANGED
@@ -94,29 +94,13 @@ class StructureService:
94
 
95
  n = len(predictions)
96
 
97
- # Rule 1: 2 kalimat pertama cenderung opening
98
- for i in range(min(2, n)):
99
- if predictions[i]['confidence'] > 0.5:
100
- probs_opening = predictions[i].get('confidence', 0)
101
- if probs_opening > 0.8:
102
- predictions[i]['predicted_label'] = 'opening'
103
-
104
- # Rule 2: 2 kalimat terakhir cenderung closing
105
- for i in range(max(0, n-2), n):
106
- if predictions[i]['confidence'] > 0.5:
107
- probs_closing = predictions[i].get('confidence', 0)
108
- if probs_closing > 0.8:
109
- predictions[i]['predicted_label'] = 'closing'
110
-
111
- # Rule 3: Detect keywords dengan exact matching
112
- # CLOSING: Hanya kalimat dengan kata kunci penutup yang JELAS
113
  closing_keywords = [
114
  'demikian', 'terima kasih', 'terimakasih', 'sekian', 'akhir kata',
115
  'wassalamualaikum', 'wassalam', 'waalaikumsalam',
116
  'sampai jumpa', 'sampai bertemu', 'salam penutup'
117
  ]
118
 
119
- # OPENING: Kata kunci pembuka
120
  opening_keywords = [
121
  'selamat pagi', 'selamat siang', 'selamat sore', 'selamat malam',
122
  'assalamualaikum', 'assalamu alaikum',
@@ -124,6 +108,23 @@ class StructureService:
124
  'perkenalkan', 'yang terhormat'
125
  ]
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  for pred in predictions:
128
  text_lower = pred['text'].lower()
129
 
 
94
 
95
  n = len(predictions)
96
 
97
+ # Define keywords first (will be used in rules)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  closing_keywords = [
99
  'demikian', 'terima kasih', 'terimakasih', 'sekian', 'akhir kata',
100
  'wassalamualaikum', 'wassalam', 'waalaikumsalam',
101
  'sampai jumpa', 'sampai bertemu', 'salam penutup'
102
  ]
103
 
 
104
  opening_keywords = [
105
  'selamat pagi', 'selamat siang', 'selamat sore', 'selamat malam',
106
  'assalamualaikum', 'assalamu alaikum',
 
108
  'perkenalkan', 'yang terhormat'
109
  ]
110
 
111
+ # Rule 1: 2 kalimat pertama cenderung opening (HANYA jika ada opening keyword)
112
+ for i in range(min(2, n)):
113
+ text_lower = predictions[i]['text'].lower()
114
+ has_opening_kw = any(kw in text_lower for kw in opening_keywords)
115
+
116
+ if has_opening_kw and predictions[i]['confidence'] > 0.5:
117
+ predictions[i]['predicted_label'] = 'opening'
118
+
119
+ # Rule 2: 2 kalimat terakhir cenderung closing (HANYA jika ada closing keyword)
120
+ for i in range(max(0, n-2), n):
121
+ text_lower = predictions[i]['text'].lower()
122
+ has_closing_kw = any(kw in text_lower for kw in closing_keywords)
123
+
124
+ if has_closing_kw and predictions[i]['confidence'] > 0.5:
125
+ predictions[i]['predicted_label'] = 'closing'
126
+
127
+ # Rule 3: Keyword detection untuk semua kalimat (override model prediction)
128
  for pred in predictions:
129
  text_lower = pred['text'].lower()
130