rblueeyes commited on
Commit
6c83ef5
·
verified ·
1 Parent(s): eb3e6ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -21
app.py CHANGED
@@ -107,14 +107,6 @@ def sliding_window(kalimat, window=4):
107
  for i in range(len(kalimat) - window + 1)
108
  ]
109
 
110
- def event_completeness(text):
111
- text = text.lower()
112
- return sum(any(w in text for w in words) for words in EVENT_UNITS.values())
113
-
114
- def is_metaphor(text):
115
- text = text.lower()
116
- return any(a in text and b in text for a, b in METAPHOR_PAIRS)
117
-
118
  def rating_usia(kategori):
119
  if "sadisme" in kategori:
120
  return 21
@@ -125,7 +117,7 @@ def rating_usia(kategori):
125
  return 0
126
 
127
  # ======================
128
- # CORE ANALYSIS + DEBUG (JANGAN DIUBAH)
129
  # ======================
130
  def analyze_text(judul, isi):
131
  kalimat = split_kalimat(isi)
@@ -166,21 +158,28 @@ def analyze_text(judul, isi):
166
  return usia, ", ".join(sorted(detected))
167
 
168
  # ======================
169
- # FILTER ABUSIVE (TERPISAH, AMAN)
170
  # ======================
171
  def filter_abusive(isi):
 
 
 
 
172
  paragraphs = re.split(r'\n+', isi)
173
  output = []
174
 
175
- for para in paragraphs:
176
  para = para.strip()
177
  if not para:
178
  continue
179
 
 
180
  sentences = re.split(r'(?<=[.!?])\s+', para)
181
  notes = []
182
 
183
- for s in sentences:
 
 
184
  inputs = abusive_tokenizer(
185
  s,
186
  return_tensors="pt",
@@ -195,20 +194,30 @@ def filter_abusive(isi):
195
  probs = F.softmax(out.logits, dim=-1)
196
  pred = torch.argmax(probs, dim=-1).item()
197
 
198
- if id2label[pred] == "Abusif":
199
- notes.append(
200
- f'Kalimat "{s}" mengandung kalimat abusif, tidak baik diucapkan'
201
- )
 
 
 
202
 
203
  output.append(para)
204
  output.extend(notes)
205
 
206
- return "\n".join(output)
 
 
 
 
 
207
 
208
  # ======================
209
- # ROUTER /ANALYZE (MODE-BASED)
210
  # ======================
211
  def analyze_router(judul, isi, mode):
 
 
212
  if mode == "rating":
213
  usia, kategori = analyze_text(judul, isi)
214
  return {
@@ -217,8 +226,9 @@ def analyze_router(judul, isi, mode):
217
  }
218
 
219
  if mode == "abusive":
 
220
  return {
221
- "filtered_text": filter_abusive(isi)
222
  }
223
 
224
  return {
@@ -226,7 +236,7 @@ def analyze_router(judul, isi, mode):
226
  }
227
 
228
  # ======================
229
- # GRADIO API (ROUTE TETAP /analyze)
230
  # ======================
231
  demo = gr.Interface(
232
  fn=analyze_router,
@@ -248,4 +258,4 @@ if __name__ == "__main__":
248
  server_port=7860,
249
  ssr_mode=False,
250
  show_error=True
251
- )
 
107
  for i in range(len(kalimat) - window + 1)
108
  ]
109
 
 
 
 
 
 
 
 
 
110
  def rating_usia(kategori):
111
  if "sadisme" in kategori:
112
  return 21
 
117
  return 0
118
 
119
  # ======================
120
+ # CORE ANALYSIS + DEBUG (ASLI — JANGAN DIUBAH)
121
  # ======================
122
  def analyze_text(judul, isi):
123
  kalimat = split_kalimat(isi)
 
158
  return usia, ", ".join(sorted(detected))
159
 
160
  # ======================
161
+ # FILTER ABUSIVE (TERPISAH + LOG LENGKAP)
162
  # ======================
163
  def filter_abusive(isi):
164
+ log("\n" + "=" * 80)
165
+ log("[ABUSIVE] START")
166
+ log(f"[ABUSIVE] INPUT:\n{isi}")
167
+
168
  paragraphs = re.split(r'\n+', isi)
169
  output = []
170
 
171
+ for p_idx, para in enumerate(paragraphs):
172
  para = para.strip()
173
  if not para:
174
  continue
175
 
176
+ log(f"\n[ABUSIVE] PARAGRAPH {p_idx}: {para}")
177
  sentences = re.split(r'(?<=[.!?])\s+', para)
178
  notes = []
179
 
180
+ for s_idx, s in enumerate(sentences):
181
+ log(f"[ABUSIVE] SENTENCE {s_idx}: {s}")
182
+
183
  inputs = abusive_tokenizer(
184
  s,
185
  return_tensors="pt",
 
194
  probs = F.softmax(out.logits, dim=-1)
195
  pred = torch.argmax(probs, dim=-1).item()
196
 
197
+ label = id2label[pred]
198
+ log(f"[ABUSIVE] PRED={label}")
199
+
200
+ if label == "Abusif":
201
+ note = f'Kalimat "{s}" mengandung kalimat abusif, tidak baik diucapkan'
202
+ notes.append(note)
203
+ log(f"[ABUSIVE] ⚠️ {note}")
204
 
205
  output.append(para)
206
  output.extend(notes)
207
 
208
+ result = "\n".join(output)
209
+ log("\n[ABUSIVE] OUTPUT:")
210
+ log(result)
211
+ log("=" * 80)
212
+
213
+ return result
214
 
215
  # ======================
216
+ # ROUTER /ANALYZE (MODE BASED — TETAP)
217
  # ======================
218
  def analyze_router(judul, isi, mode):
219
+ log(f"\n[ROUTER] MODE = {mode}")
220
+
221
  if mode == "rating":
222
  usia, kategori = analyze_text(judul, isi)
223
  return {
 
226
  }
227
 
228
  if mode == "abusive":
229
+ filtered = filter_abusive(isi)
230
  return {
231
+ "filtered_text": filtered
232
  }
233
 
234
  return {
 
236
  }
237
 
238
  # ======================
239
+ # GRADIO API ROUTE TETAP /analyze
240
  # ======================
241
  demo = gr.Interface(
242
  fn=analyze_router,
 
258
  server_port=7860,
259
  ssr_mode=False,
260
  show_error=True
261
+ )