rblueeyes commited on
Commit
7c5f4f3
·
verified ·
1 Parent(s): 9fb96e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -58
app.py CHANGED
@@ -1,54 +1,76 @@
1
  import re
 
2
  import gradio as gr
3
  from transformers import pipeline
4
 
5
- # =========================
6
- # LOAD MODEL (ONCE)
7
- # =========================
8
  classifier = pipeline(
9
  "zero-shot-classification",
10
  model="joeddav/xlm-roberta-large-xnli",
11
- device=-1,
12
  use_fast=False
13
  )
14
 
15
- # =========================
16
- # SEMANTIC EVENT DEFINITIONS
17
- # =========================
18
  SEMANTIC_EVENT = {
19
  "sadisme": [
20
  "kekerasan ekstrem yang menyebabkan penderitaan fisik parah pada manusia",
21
- "penyiksaan tubuh manusia hingga kematian",
22
- "pengorbanan manusia secara kejam"
 
23
  ],
24
- "kekerasan_fisik": [
25
  "tindakan fisik yang melukai tubuh manusia",
26
- "serangan fisik yang menyebabkan cedera atau kematian"
27
  ],
28
- "kekerasan_verbal": [
29
- "ancaman serius terhadap seseorang",
30
- "penghinaan keras dan intimidasi verbal"
31
  ],
32
  "seksual": [
33
- "aktivitas seksual eksplisit antara manusia",
34
- "tindakan seksual yang digambarkan secara jelas"
35
  ],
36
  "perjudian": [
37
- "aktivitas perjudian dengan taruhan uang atau barang"
38
  ],
39
  "narkoba": [
40
- "penggunaan atau peredaran narkotika atau obat terlarang"
41
  ]
42
  }
43
 
44
- # =========================
45
- # UTILITIES
46
- # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def split_kalimat(text):
48
  return [
49
- s.strip()
50
- for s in re.split(r'(?<=[.!?])\s+', text)
51
- if len(s.strip()) >= 30
52
  ]
53
 
54
  def sliding_window(kalimat, window=4):
@@ -57,62 +79,90 @@ def sliding_window(kalimat, window=4):
57
  for i in range(len(kalimat) - window + 1)
58
  ]
59
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def rating_usia(kategori):
61
  if "sadisme" in kategori:
62
  return 21
63
  if any(k in kategori for k in ["seksual", "perjudian", "narkoba"]):
64
  return 17
65
- if any(k in kategori for k in ["kekerasan_fisik", "kekerasan_verbal"]):
66
  return 13
67
  return 0
68
 
69
- # =========================
70
- # CORE ANALYSIS FUNCTION
71
- # =========================
72
- def analyze_story(judul, isi):
73
- if not isi or len(isi) < 100:
74
- return {
75
- "categories": [],
76
- "age_rating": 0
77
- }
78
-
79
  kalimat = split_kalimat(isi)
80
  windows = sliding_window(kalimat)
81
 
82
  detected = set()
83
 
84
- for window in windows:
85
  for kategori, desc in SEMANTIC_EVENT.items():
86
- result = classifier(
87
- window,
88
- desc,
89
- hypothesis_template="Teks ini mengandung {}."
90
- )
91
- if max(result["scores"]) >= 0.80:
92
- detected.add(kategori)
 
 
 
 
 
 
93
 
94
  return {
95
- "categories": sorted(list(detected)),
96
- "age_rating": rating_usia(detected)
97
  }
98
 
99
- # =========================
100
- # GRADIO INTERFACE
101
- # =========================
102
- def api_wrapper(judul, isi):
103
- result = analyze_story(judul, isi)
104
- return result
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  demo = gr.Interface(
107
- fn=api_wrapper,
108
  inputs=[
109
- gr.Textbox(label="Judul", lines=1),
110
- gr.Textbox(label="Isi Cerita", lines=12)
 
 
 
 
111
  ],
112
- outputs=gr.JSON(label="Hasil Analisis"),
113
- title="READEAR — Semantic Content Rating",
114
- description="Analisis konten cerita berbasis semantic (zero-shot NLP)"
115
  )
116
 
 
 
 
117
  if __name__ == "__main__":
118
- demo.launch()
 
1
  import re
2
+ from flask import Flask, request, jsonify
3
  import gradio as gr
4
  from transformers import pipeline
5
 
6
+ # ======================
7
+ # MODEL
8
+ # ======================
9
  classifier = pipeline(
10
  "zero-shot-classification",
11
  model="joeddav/xlm-roberta-large-xnli",
12
+ framework="pt",
13
  use_fast=False
14
  )
15
 
16
+ # ======================
17
+ # CONFIG
18
+ # ======================
19
  SEMANTIC_EVENT = {
20
  "sadisme": [
21
  "kekerasan ekstrem yang menyebabkan penderitaan fisik parah pada manusia",
22
+ "tubuh manusia dipotong atau dimutilasi",
23
+ "orang tua membunuh anaknya sendiri",
24
+ "manusia dikorbankan dengan cara kejam"
25
  ],
26
+ "kekerasan fisik": [
27
  "tindakan fisik yang melukai tubuh manusia",
28
+ "kekerasan yang menyebabkan cedera atau kematian"
29
  ],
30
+ "kekerasan verbal": [
31
+ "ancaman serius atau penghinaan keras"
 
32
  ],
33
  "seksual": [
34
+ "aktivitas seksual eksplisit"
 
35
  ],
36
  "perjudian": [
37
+ "aktivitas perjudian dengan taruhan"
38
  ],
39
  "narkoba": [
40
+ "penggunaan atau peredaran narkoba"
41
  ]
42
  }
43
 
44
+ EVENT_UNITS = {
45
+ "aktor": ["ayah", "ibu", "orang", "dia", "mereka"],
46
+ "korban": ["anak", "putri", "manusia"],
47
+ "aksi": ["membunuh", "menusuk", "memukul", "menyiksa", "memotong"],
48
+ "dampak": ["mati", "meninggal", "darah", "luka"]
49
+ }
50
+
51
+ METAPHOR_PAIRS = [
52
+ ("menusuk", "hati"),
53
+ ("terbunuh", "rindu"),
54
+ ("terluka", "perasaan")
55
+ ]
56
+
57
+ THRESHOLD = {
58
+ "sadisme": 0.48,
59
+ "kekerasan fisik": 0.50,
60
+ "kekerasan verbal": 0.55,
61
+ "seksual": 0.58,
62
+ "perjudian": 0.60,
63
+ "narkoba": 0.60
64
+ }
65
+
66
+ # ======================
67
+ # UTIL
68
+ # ======================
69
  def split_kalimat(text):
70
  return [
71
+ k.strip()
72
+ for k in re.split(r'(?<=[.!?])\s+', text)
73
+ if len(k.strip()) >= 30
74
  ]
75
 
76
  def sliding_window(kalimat, window=4):
 
79
  for i in range(len(kalimat) - window + 1)
80
  ]
81
 
82
+ def event_completeness(text):
83
+ score = 0
84
+ text = text.lower()
85
+ for words in EVENT_UNITS.values():
86
+ if any(w in text for w in words):
87
+ score += 1
88
+ return score
89
+
90
+ def is_metaphor(text):
91
+ text = text.lower()
92
+ return any(a in text and b in text for a, b in METAPHOR_PAIRS)
93
+
94
  def rating_usia(kategori):
95
  if "sadisme" in kategori:
96
  return 21
97
  if any(k in kategori for k in ["seksual", "perjudian", "narkoba"]):
98
  return 17
99
+ if any(k in kategori for k in ["kekerasan fisik", "kekerasan verbal"]):
100
  return 13
101
  return 0
102
 
103
+ # ======================
104
+ # CORE ANALYSIS
105
+ # ======================
106
+ def analyze_text(judul, isi):
 
 
 
 
 
 
107
  kalimat = split_kalimat(isi)
108
  windows = sliding_window(kalimat)
109
 
110
  detected = set()
111
 
112
+ for w in windows:
113
  for kategori, desc in SEMANTIC_EVENT.items():
114
+ res = classifier(w, desc, hypothesis_template="Teks ini menggambarkan {}.")
115
+ score = max(res["scores"])
116
+
117
+ if score < THRESHOLD[kategori]:
118
+ continue
119
+
120
+ if score < 0.60:
121
+ if event_completeness(w) < 2:
122
+ continue
123
+ if is_metaphor(w):
124
+ continue
125
+
126
+ detected.add(kategori)
127
 
128
  return {
129
+ "rating_usia": rating_usia(detected),
130
+ "kategori": sorted(list(detected))
131
  }
132
 
133
+ # ======================
134
+ # FLASK API (FlutterFlow)
135
+ # ======================
136
+ app = Flask(__name__)
137
+
138
+ @app.route("/analyze", methods=["POST"])
139
+ def analyze_api():
140
+ data = request.json
141
+ result = analyze_text(data.get("judul", ""), data.get("isi", ""))
142
+ return jsonify(result)
143
+
144
+ # ======================
145
+ # GRADIO DEMO (JANGAN DIHILANGKAN)
146
+ # ======================
147
+ def gradio_analyze(judul, isi):
148
+ r = analyze_text(judul, isi)
149
+ return r["rating_usia"], ", ".join(r["kategori"])
150
 
151
  demo = gr.Interface(
152
+ fn=gradio_analyze,
153
  inputs=[
154
+ gr.Textbox(label="Judul"),
155
+ gr.Textbox(label="Isi Cerita", lines=10)
156
+ ],
157
+ outputs=[
158
+ gr.Number(label="Rating Usia"),
159
+ gr.Textbox(label="Kategori")
160
  ],
161
+ title="Semantic Content Rating Demo"
 
 
162
  )
163
 
164
+ # ======================
165
+ # ENTRY POINT
166
+ # ======================
167
  if __name__ == "__main__":
168
+ demo.launch(server_name="0.0.0.0", server_port=8080)