Dzeisonov commited on
Commit
d56fb76
·
1 Parent(s): 1c99454

Upload App

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/FlaskProject1.iml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="Flask">
4
+ <option name="enabled" value="true" />
5
+ </component>
6
+ <component name="NewModuleRootManager">
7
+ <content url="file://$MODULE_DIR$" />
8
+ <orderEntry type="jdk" jdkName="Python 3.13 (PythonProject) (2)" jdkType="Python SDK" />
9
+ <orderEntry type="sourceFolder" forTests="false" />
10
+ </component>
11
+ <component name="TemplatesService">
12
+ <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
13
+ <option name="TEMPLATE_FOLDERS">
14
+ <list>
15
+ <option value="$MODULE_DIR$/../FlaskProject1\templates" />
16
+ </list>
17
+ </option>
18
+ </component>
19
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.13 (PythonProject) (2)" />
5
+ </component>
6
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/FlaskProject1.iml" filepath="$PROJECT_DIR$/.idea/FlaskProject1.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gunakan Python 3.9
2
+ FROM python:3.9
3
+
4
+ # Set folder kerja
5
+ WORKDIR /code
6
+
7
+ # Copy requirements dan install dependencies
8
+ COPY ./requirements.txt /code/requirements.txt
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ # Copy seluruh file codingan ke server
12
+ COPY . .
13
+
14
+ # Buat cache folder agar model tidak didownload berulang kali
15
+ RUN mkdir -p /code/cache
16
+ ENV TRANSFORMERS_CACHE=/code/cache
17
+
18
+ # Buka port 7860 (Port wajib Hugging Face)
19
+ EXPOSE 7860
20
+
21
+ # Jalankan aplikasi menggunakan Gunicorn
22
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
__pycache__/app.cpython-311.pyc ADDED
Binary file (2.24 kB). View file
 
__pycache__/model_utils.cpython-311.pyc ADDED
Binary file (5.74 kB). View file
 
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for
2
+ from model_utils import predict_text, process_file, AVAILABLE_MODELS
3
+
4
+ app = Flask(__name__)
5
+
6
+ @app.route('/', methods=['GET', 'POST'])
7
+ def index():
8
+ if request.method == 'POST':
9
+ model_choice = request.form.get('model_choice', 'toxic_bert')
10
+ model_info = AVAILABLE_MODELS.get(model_choice, {}).get('desc', 'Unknown Model')
11
+
12
+ # 1. Cek Upload File
13
+ if 'file_input' in request.files and request.files['file_input'].filename != '':
14
+ file = request.files['file_input']
15
+ results = process_file(file, model_choice)
16
+ return render_template('result_file.html', results=results, model_name=model_info)
17
+
18
+ # 2. Cek Input Manual
19
+ raw_text = request.form.get('user_text')
20
+ if raw_text:
21
+ result = predict_text(raw_text, model_choice)
22
+ # Debugging print
23
+ # print("Hasil:", result)
24
+ return render_template('result.html', data=result, model_name=model_info)
25
+
26
+ return render_template('index.html', models=AVAILABLE_MODELS)
27
+
28
+ @app.route('/ulang')
29
+ def ulang():
30
+ return redirect(url_for('index'))
31
+
32
+ if __name__ == '__main__':
33
+ app.run(debug=True, port=5000)
model_utils.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ import re
4
+ import string
5
+ import pandas as pd
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+
8
+ # ==========================================
9
+ # KONFIGURASI MODEL
10
+ # ==========================================
11
+ AVAILABLE_MODELS = {
12
+ "toxic_bert": {
13
+ "name": "Dzeisonov/indobert-toxic-classifier",
14
+ "desc": "IndoBERT (Base) - Toxic Classifier"
15
+ },
16
+ "toxic_roberta": {
17
+ "name": "Dzeisonov/indoroberta-toxic-classifier",
18
+ "desc": "IndoRoBERTa (Base) - Toxic Classifier"
19
+ }
20
+ }
21
+
22
+ # Cache global
23
+ loaded_models = {}
24
+
25
+ def get_model_and_tokenizer(model_key):
26
+ """Load model secara lazy loading."""
27
+ if model_key not in AVAILABLE_MODELS:
28
+ model_key = "toxic_bert"
29
+
30
+ if model_key in loaded_models:
31
+ return loaded_models[model_key]['tokenizer'], loaded_models[model_key]['model']
32
+
33
+ config = AVAILABLE_MODELS[model_key]
34
+ print(f"⏳ Sedang memuat model baru: {config['name']} ...")
35
+
36
+ try:
37
+ tokenizer = AutoTokenizer.from_pretrained(config['name'])
38
+ model = AutoModelForSequenceClassification.from_pretrained(config['name'])
39
+ loaded_models[model_key] = {'tokenizer': tokenizer, 'model': model}
40
+ print("✅ Model berhasil dimuat!")
41
+ return tokenizer, model
42
+ except Exception as e:
43
+ print(f"❌ Gagal memuat model: {e}")
44
+ return None, None
45
+
46
+ def preprocess_text(text):
47
+ if not isinstance(text, str) or not text: return ""
48
+ text = text.lower()
49
+ text = re.sub(r"http\S+|www.\S+|@\w+|#|\d+", "", text)
50
+ text = text.translate(str.maketrans("", "", string.punctuation))
51
+ text = re.sub(r"\s+", " ", text).strip()
52
+ return text
53
+
54
+ def predict_text(text, model_key):
55
+ """Prediksi satu kalimat."""
56
+ tokenizer, model = get_model_and_tokenizer(model_key)
57
+
58
+ if not model or not tokenizer:
59
+ return {"original_text": text, "label": "ERROR", "score": "0%"}
60
+
61
+ clean_text = preprocess_text(text)
62
+ if not clean_text:
63
+ return {"original_text": text, "label": "Kosong", "score": "0%"}
64
+
65
+ inputs = tokenizer(clean_text, return_tensors="pt", truncation=True, max_length=512)
66
+
67
+ with torch.no_grad():
68
+ outputs = model(**inputs)
69
+ probs = F.softmax(outputs.logits, dim=-1)
70
+ label_id = torch.argmax(probs, dim=1).item()
71
+ confidence = probs[0][label_id].item()
72
+
73
+ predicted_label = model.config.id2label[label_id]
74
+
75
+ # Standarisasi Label (Toxic / Non-Toxic)
76
+ if predicted_label in ["LABEL_1", "Toxic", "toxic", "1"]:
77
+ final_label = "Toxic"
78
+ else:
79
+ final_label = "Non-Toxic"
80
+
81
+ return {
82
+ "original_text": text,
83
+ "text_clean": clean_text,
84
+ "label": final_label,
85
+ "score": f"{confidence:.1%}" # Mengembalikan persentase (misal: 98.5%)
86
+ }
87
+
88
+ def process_file(file_obj, model_key):
89
+ """Memproses file CSV, Excel, atau TXT."""
90
+ results = []
91
+ texts = []
92
+
93
+ try:
94
+ filename = file_obj.filename.lower()
95
+
96
+ # 1. Jika file CSV
97
+ if filename.endswith('.csv'):
98
+ df = pd.read_csv(file_obj)
99
+ texts = df.iloc[:, 0].astype(str).tolist() # Ambil kolom pertama
100
+
101
+ # 2. Jika file Excel (.xlsx / .xls)
102
+ elif filename.endswith(('.xlsx', '.xls')):
103
+ df = pd.read_excel(file_obj)
104
+ texts = df.iloc[:, 0].astype(str).tolist() # Ambil kolom pertama
105
+
106
+ # 3. Jika file TXT (fallback)
107
+ else:
108
+ content = file_obj.read().decode("utf-8")
109
+ texts = content.splitlines()
110
+
111
+ # Batasi 50 baris agar server tidak hang
112
+ for text in texts[:50]:
113
+ if text.strip():
114
+ res = predict_text(text, model_key)
115
+ results.append(res)
116
+
117
+ except Exception as e:
118
+ print(f"Error processing file: {e}")
119
+ return []
120
+
121
+ return results
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ flask
2
+ torch
3
+ transformers
4
+ pandas
5
+ openpyxl
6
+ gunicorn
7
+ uvicorn
templates/index.html ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="id">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Indo Toxic Detector</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body { background-color: #f0f2f5; font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
10
+ .main-card {
11
+ border-radius: 20px;
12
+ border: none;
13
+ box-shadow: 0 15px 35px rgba(0,0,0,0.1);
14
+ background: white;
15
+ }
16
+ .header-title { color: #2c3e50; font-weight: 800; }
17
+ .btn-custom { background-color: #2c3e50; color: white; border-radius: 10px; padding: 12px; }
18
+ .btn-custom:hover { background-color: #1a252f; color: white; }
19
+ .info-box {
20
+ background-color: #e8f4fd;
21
+ border-left: 4px solid #0d6efd;
22
+ border-radius: 5px;
23
+ font-size: 0.9rem;
24
+ }
25
+ </style>
26
+ </head>
27
+ <body>
28
+ <div class="container py-5 min-vh-100 d-flex justify-content-center align-items-center">
29
+ <div class="col-lg-6 col-md-8">
30
+ <div class="text-center mb-4">
31
+ <h1 class="header-title">🛡️ Indo Toxicity Detector</h1>
32
+ <p class="text-muted">Klasifikasi ujaran kebencian dengan AI</p>
33
+ </div>
34
+
35
+ <div class="card main-card p-4">
36
+ <form action="/" method="POST" enctype="multipart/form-data">
37
+
38
+ <!-- Pilihan Model -->
39
+ <div class="mb-4">
40
+ <label class="form-label fw-bold text-uppercase small text-secondary">Pilih Model AI</label>
41
+ <select name="model_choice" class="form-select bg-light border-0 py-3">
42
+ {% for key, val in models.items() %}
43
+ <option value="{{ key }}">{{ val.desc }}</option>
44
+ {% endfor %}
45
+ </select>
46
+ </div>
47
+
48
+ <hr class="my-4">
49
+
50
+ <!-- Input Manual -->
51
+ <div class="mb-3">
52
+ <label class="form-label fw-bold">📝 Input Manual</label>
53
+ <textarea class="form-control bg-light" name="user_text" rows="3" placeholder="Ketik kalimat di sini..."></textarea>
54
+ </div>
55
+
56
+ <div class="text-center text-muted small my-2">- ATAU -</div>
57
+
58
+ <!-- Input File -->
59
+ <div class="mb-4">
60
+ <label class="form-label fw-bold">📂 Upload File</label>
61
+ <input type="file" class="form-control mb-2" name="file_input" accept=".csv, .txt, .xlsx, .xls">
62
+
63
+ <!-- PANDUAN FORMAT FILE -->
64
+ <div class="info-box p-3 mt-3">
65
+ <h6 class="fw-bold mb-2">ℹ️ Panduan Format File:</h6>
66
+ <ul class="mb-0 ps-3">
67
+ <li class="mb-1">
68
+ <strong>Excel / CSV:</strong> Teks harus berada di
69
+ <span class="badge bg-primary">Kolom Pertama (A)</span>.
70
+ </li>
71
+ <li>
72
+ <strong>TXT:</strong> Satu kalimat per baris.
73
+ </li>
74
+ </ul>
75
+ <div class="text-muted small mt-2 fst-italic">*Maksimal 50 baris akan diproses.</div>
76
+ </div>
77
+ </div>
78
+
79
+ <button type="submit" class="btn btn-custom w-100 fw-bold">
80
+ 🔍 Mulai Analisis
81
+ </button>
82
+ </form>
83
+ </div>
84
+ </div>
85
+ </div>
86
+ </body>
87
+ </html>
templates/result.html ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="id">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Hasil Analisis</title>
6
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
7
+ <style>
8
+ body { background-color: #f8f9fa; display: flex; align-items: center; min-height: 100vh;}
9
+ .card { border-radius: 20px; border: none; box-shadow: 0 10px 30px rgba(0,0,0,0.08); }
10
+ .badge-status { font-size: 1.2rem; padding: 10px 25px; border-radius: 50px; }
11
+ .bg-toxic { background-color: #e74c3c; color: white; }
12
+ .bg-safe { background-color: #27ae60; color: white; }
13
+ </style>
14
+ </head>
15
+ <body>
16
+ <div class="container">
17
+ <div class="row justify-content-center">
18
+ <div class="col-md-7">
19
+ <div class="card p-5 text-center">
20
+
21
+ <h6 class="text-muted text-uppercase ls-2">Teks Asli</h6>
22
+ <p class="lead fst-italic mb-4">"{{ data.original_text }}"</p>
23
+ <hr>
24
+
25
+ <h6 class="text-muted text-uppercase mt-4">Hasil Prediksi</h6>
26
+ <p class="small text-muted mb-3">Model: {{ model_name }}</p>
27
+
28
+ <div>
29
+ {% if data.label == 'Toxic' %}
30
+ <span class="badge badge-status bg-toxic">☣️ TOXIC</span>
31
+ <div class="alert alert-danger mt-4 border-0 bg-danger bg-opacity-10 text-danger">
32
+ <strong>Peringatan!</strong> Kalimat ini mengandung unsur negatif/kasar.
33
+ </div>
34
+ {% else %}
35
+ <span class="badge badge-status bg-safe">✅ NON-TOXIC</span>
36
+ <div class="alert alert-success mt-4 border-0 bg-success bg-opacity-10 text-success">
37
+ <strong>Aman.</strong> Kalimat ini bersih.
38
+ </div>
39
+ {% endif %}
40
+ </div>
41
+
42
+ <p class="mt-2 text-muted small">Confidence: <strong>{{ data.score }}</strong></p>
43
+
44
+ <a href="{{ url_for('ulang') }}" class="btn btn-outline-dark mt-4 px-5 rounded-pill">Cek Lagi</a>
45
+ </div>
46
+ </div>
47
+ </div>
48
+ </div>
49
+ </body>
50
+ </html>
templates/result_file.html ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="id">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Hasil Batch Analysis</title>
6
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
7
+ <style>
8
+ .badge-toxic { background-color: #e74c3c; color: white; }
9
+ .badge-safe { background-color: #27ae60; color: white; }
10
+ </style>
11
+ </head>
12
+ <body class="bg-light p-5">
13
+ <div class="container bg-white p-5 rounded shadow-sm">
14
+ <div class="d-flex justify-content-between align-items-center mb-4">
15
+ <h2>📄 Hasil Analisis File</h2>
16
+ <a href="{{ url_for('ulang') }}" class="btn btn-outline-secondary">⬅️ Kembali</a>
17
+ </div>
18
+
19
+ <p class="text-muted">Model: <strong>{{ model_name }}</strong></p>
20
+
21
+ <div class="table-responsive">
22
+ <table class="table table-hover align-middle">
23
+ <thead class="table-dark">
24
+ <tr>
25
+ <th style="width: 60%;">Teks</th>
26
+ <th style="width: 20%;">Prediksi</th>
27
+ <th style="width: 20%;">Confidence</th>
28
+ </tr>
29
+ </thead>
30
+ <tbody>
31
+ {% for row in results %}
32
+ <tr>
33
+ <td class="text-break">{{ row.original_text }}</td>
34
+ <td>
35
+ {% if row.label == 'Toxic' %}
36
+ <span class="badge badge-toxic rounded-pill px-3">Toxic</span>
37
+ {% else %}
38
+ <span class="badge badge-safe rounded-pill px-3">Non-Toxic</span>
39
+ {% endif %}
40
+ </td>
41
+ <td>{{ row.score }}</td>
42
+ </tr>
43
+ {% else %}
44
+ <tr>
45
+ <td colspan="3" class="text-center text-muted py-4">Tidak ada data atau format file salah.</td>
46
+ </tr>
47
+ {% endfor %}
48
+ </tbody>
49
+ </table>
50
+ </div>
51
+ </div>
52
+ </body>
53
+ </html>