Spaces:

ramadn
/

gambling-detector

Sleeping

App Files Files Community

rdsarjito commited on Nov 8, 2025

Commit

dab4200

1 Parent(s): c0f13b0

[UPDATE]UI

Browse files

Files changed (1) hide show

app.py +35 -341

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import torch.nn as nn
 from PIL import Image
 import requests
 import easyocr
-from transformers import AutoTokenizer, AutoModel
 from torchvision import transforms
 from torchvision import models
 from torchvision.transforms import functional as F
@@ -75,60 +75,18 @@ class LateFusionModel(nn.Module):
         return fused_logits, image_logits, text_logits, weights
-# Load Fusion Model
-# First, create the model architecture
-print("Creating fusion model architecture...")
-# Create image model (same as image-only model)
-fusion_image_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
-num_features = fusion_image_model.classifier[1].in_features
-fusion_image_model.classifier = nn.Linear(num_features, 1)
-# Create text model (IndoBERT)
-fusion_text_model_base = AutoModel.from_pretrained('indobenchmark/indobert-base-p1')
-# Add classification head for text model
-class TextClassifier(nn.Module):
-    def __init__(self, base_model):
-        super(TextClassifier, self).__init__()
-        self.base_model = base_model
-        self.classifier = nn.Linear(base_model.config.hidden_size, 1)
-    def forward(self, input_ids, attention_mask):
-        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
-        pooled_output = outputs.pooler_output
-        logits = self.classifier(pooled_output)
-        # Return object with logits attribute
-        class ModelOutput:
-            def __init__(self, logits):
-                self.logits = logits
-        return ModelOutput(logits)
-fusion_text_model = TextClassifier(fusion_text_model_base)
-# Create fusion model
-fusion_model = LateFusionModel(fusion_image_model, fusion_text_model)
-# Load state_dict
-fusion_model_path = "models/best_mlp_fusion_model_state_dict.pt"
-if os.path.exists(fusion_model_path):
-    try:
-        state_dict = torch.load(fusion_model_path, map_location=device, weights_only=False)
-        # Handle potential DataParallel prefix
-        if any(key.startswith('module.') for key in state_dict.keys()):
-            state_dict = {key.replace('module.', ''): value for key, value in state_dict.items()}
-        fusion_model.load_state_dict(state_dict, strict=False)
-        fusion_model.to(device)
-        fusion_model.eval()
-        print("Fusion model loaded successfully!")
-    except Exception as e:
-        print(f"Warning: Error loading fusion model state_dict: {e}")
-        print("Using fusion model with default weights...")
-        fusion_model.to(device)
-        fusion_model.eval()
 else:
-    print(f"Warning: Fusion model not found at {fusion_model_path}")
-    print("Creating fusion model with default weights...")
-    fusion_model.to(device)
-    fusion_model.eval()
 # Load Image-Only Model
 # Load image model from state_dict
@@ -349,8 +307,7 @@ def predict_single_url(url):
     print(f"Processing URL: {url}")
     screenshot_path = take_screenshot(url)
     if not screenshot_path:
-        error_msg = f"❌ Gagal mengambil screenshot untuk {url}\n\nKemungkinan penyebab:\n• Terlalu banyak redirect\n• Website memblokir akses otomatis\n• Masalah koneksi jaringan\n• URL tidak valid"
-        return {"Gambling": 0.0, "Non-Gambling": 1.0}, f"Error: Screenshot capture failed", None, "", ""
     text = extract_text_from_image(screenshot_path)
     raw_text = text  # Store raw text before cleaning
@@ -371,11 +328,7 @@ def predict_single_url(url):
         confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
         print(f"[Image-Only] URL: {url}")
         print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
-        # Format label output as dictionary for better display
-        label_dict = {"Gambling": confidence if is_gambling else 0.0,
-                     "Non-Gambling": 1 - confidence if is_gambling else confidence}
-        return label_dict, f"{confidence:.1%} (Image-Only Model)", screenshot_path, raw_text, ""
     else:
         clean_text_data = clean_text(text)
@@ -399,10 +352,7 @@ def predict_single_url(url):
         print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
         print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
-        # Format label output as dictionary for better display
-        label_dict = {"Gambling": confidence if is_gambling else 0.0,
-                     "Non-Gambling": 1 - confidence if is_gambling else confidence}
-        return label_dict, f"{confidence:.1%} (Fusion Model)", screenshot_path, raw_text, clean_text_data
 def predict_batch_urls(file_obj):
     results = []
@@ -425,253 +375,26 @@ def predict_batch_urls(file_obj):
 # --- Gradio App ---
-# Custom CSS for professional styling
-custom_css = """
-/* Main container styling */
-.gradio-container {
-    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-    max-width: 1200px !important;
-}
-/* Header styling */
-.main-header {
-    text-align: center;
-    padding: 2rem 0;
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    color: white;
-    border-radius: 12px;
-    margin-bottom: 2rem;
-    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-}
-.main-header h1 {
-    margin: 0;
-    font-size: 2.5rem;
-    font-weight: 700;
-    letter-spacing: -0.5px;
-}
-.main-header p {
-    margin: 0.5rem 0 0 0;
-    font-size: 1.1rem;
-    opacity: 0.95;
-}
-/* Tab styling */
-.tab-nav {
-    background: #f8f9fa;
-    border-radius: 8px;
-    padding: 0.5rem;
-    margin-bottom: 1.5rem;
-}
-/* Input section styling */
-.input-section {
-    background: #ffffff;
-    padding: 1.5rem;
-    border-radius: 12px;
-    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
-    margin-bottom: 1.5rem;
-}
-/* Button styling */
-.primary-button {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-    color: white !important;
-    border: none !important;
-    padding: 0.75rem 2rem !important;
-    font-size: 1rem !important;
-    font-weight: 600 !important;
-    border-radius: 8px !important;
-    transition: all 0.3s ease !important;
-    box-shadow: 0 4px 6px rgba(102, 126, 234, 0.3) !important;
-}
-.primary-button:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
-}
-/* Output section styling */
-.output-section {
-    background: #f8f9fa;
-    padding: 1.5rem;
-    border-radius: 12px;
-    margin-top: 1.5rem;
-}
-/* Label output styling */
-.label-container {
-    background: white;
-    padding: 1.5rem;
-    border-radius: 10px;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
-    text-align: center;
-}
-.label-gambling {
-    color: #dc3545;
-    font-size: 1.5rem;
-    font-weight: 700;
-}
-.label-non-gambling {
-    color: #28a745;
-    font-size: 1.5rem;
-    font-weight: 700;
-}
-/* Confidence badge */
-.confidence-badge {
-    display: inline-block;
-    padding: 0.5rem 1rem;
-    border-radius: 20px;
-    font-weight: 600;
-    background: #e9ecef;
-    color: #495057;
-}
-/* Image container */
-.image-container {
-    border-radius: 10px;
-    overflow: hidden;
-    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-}
-/* Text output styling */
-.text-output {
-    background: white;
-    padding: 1rem;
-    border-radius: 8px;
-    border: 1px solid #e9ecef;
-    font-family: 'Monaco', 'Courier New', monospace;
-    font-size: 0.9rem;
-}
-/* Info box */
-.info-box {
-    background: #e7f3ff;
-    border-left: 4px solid #2196F3;
-    padding: 1rem;
-    border-radius: 4px;
-    margin: 1rem 0;
-}
-/* Section titles */
-.section-title {
-    font-size: 1.25rem;
-    font-weight: 600;
-    color: #495057;
-    margin-bottom: 1rem;
-    display: flex;
-    align-items: center;
-    gap: 0.5rem;
-}
-/* Card styling */
-.card {
-    background: white;
-    border-radius: 10px;
-    padding: 1.5rem;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
-    margin-bottom: 1rem;
-}
-/* Loading animation */
-@keyframes pulse {
-    0%, 100% {
-        opacity: 1;
-    }
-    50% {
-        opacity: 0.5;
-    }
-}
-.loading {
-    animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
-}
-"""
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
-    # Header Section
-    gr.HTML("""
-    <div class="main-header">
-        <h1>🕵️ Gambling Website Detection</h1>
-        <p>AI-Powered Detection System untuk Identifikasi Website Perjudian</p>
-    </div>
-    """)
-    gr.Markdown("""
-    <div style="text-align: center; color: #6c757d; margin-bottom: 2rem;">
-        Sistem deteksi cerdas yang menggunakan <strong>Fusion Model</strong> (Image + Text) untuk mengidentifikasi
-        website perjudian dengan akurasi tinggi. Upload URL atau batch file untuk analisis.
-    </div>
-    """)
-    with gr.Tab("🔍 Single URL Detection"):
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### 📝 Input URL")
-                url_input = gr.Textbox(
-                    label="Masukkan URL Website",
-                    placeholder="Contoh: https://example.com",
-                    info="Masukkan URL lengkap website yang ingin dianalisis",
-                    scale=1
-                )
-                predict_button = gr.Button(
-                    "🚀 Analisis Website",
-                    variant="primary",
-                    scale=1,
-                    elem_classes="primary-button"
-                )
-        gr.Markdown("---")
         with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### 📊 Hasil Prediksi")
-                label_output = gr.Label(
-                    label="Status Deteksi",
-                    elem_classes="label-container"
-                )
-                confidence_output = gr.Textbox(
-                    label="Tingkat Keyakinan",
-                    interactive=False,
-                    elem_classes="confidence-badge"
-                )
-            with gr.Column(scale=1):
-                gr.Markdown("### 📸 Screenshot Website")
-                screenshot_output = gr.Image(
-                    label="Screenshot",
-                    type="filepath",
-                    elem_classes="image-container"
-                )
         with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### 📄 Raw OCR Text")
-                raw_text_output = gr.Textbox(
-                    label="Teks yang Diekstrak dari Gambar",
-                    lines=6,
-                    interactive=False,
-                    elem_classes="text-output"
-                )
-            with gr.Column(scale=1):
-                gr.Markdown("### ✨ Cleaned Text")
-                cleaned_text_output = gr.Textbox(
-                    label="Teks yang Sudah Dibersihkan",
-                    lines=6,
-                    interactive=False,
-                    elem_classes="text-output"
-                )
-        gr.Markdown("""
-        <div class="info-box">
-            <strong>ℹ️ Informasi:</strong> Sistem akan mengambil screenshot website, mengekstrak teks menggunakan OCR,
-            dan menganalisis menggunakan model AI untuk menentukan apakah website tersebut terkait perjudian.
-        </div>
-        """)
         predict_button.click(
             fn=predict_single_url,
@@ -685,40 +408,11 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
             ]
         )
-    with gr.Tab("📦 Batch URL Detection"):
-        gr.Markdown("### 📤 Upload File Batch")
-        gr.Markdown("""
-        <div class="info-box">
-            <strong>📋 Format File:</strong> Upload file .txt yang berisi daftar URL, satu URL per baris.
-            Sistem akan memproses semua URL secara berurutan dan menampilkan hasil dalam tabel.
-        </div>
-        """)
-        with gr.Row():
-            with gr.Column(scale=2):
-                file_input = gr.File(
-                    label="Pilih File .txt",
-                    file_types=[".txt"],
-                    type="filepath"
-                )
-            with gr.Column(scale=1):
-                batch_predict_button = gr.Button(
-                    "🚀 Proses Batch",
-                    variant="primary",
-                    elem_classes="primary-button"
-                )
-        gr.Markdown("### 📊 Hasil Batch Processing")
-        batch_output = gr.DataFrame(
-            label="Hasil Analisis",
-            wrap=True,
-            interactive=False
-        )
-        batch_predict_button.click(
-            fn=predict_batch_urls,
-            inputs=file_input,
-            outputs=batch_output
-        )
 app.launch()

 from PIL import Image
 import requests
 import easyocr
+from transformers import AutoTokenizer
 from torchvision import transforms
 from torchvision import models
 from torchvision.transforms import functional as F
         return fused_logits, image_logits, text_logits, weights
+# Load model
+model_path = "models/best_fusion_model.pt"
+if os.path.exists(model_path):
+    fusion_model = torch.load(model_path, map_location=device, weights_only=False)
 else:
+    model_path = hf_hub_download(repo_id="azzandr/gambling-fusion-model", filename="best_fusion_model.pt")
+    fusion_model = torch.load(model_path, map_location=device, weights_only=False)
+# fusion_model = unwrap_dataparallel(fusion_model)
+fusion_model.to(device)
+fusion_model.eval()
+print("Fusion model loaded successfully!")
 # Load Image-Only Model
 # Load image model from state_dict
     print(f"Processing URL: {url}")
     screenshot_path = take_screenshot(url)
     if not screenshot_path:
+        return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL", "Screenshot capture failed", None, "", ""
     text = extract_text_from_image(screenshot_path)
     raw_text = text  # Store raw text before cleaning
         confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
         print(f"[Image-Only] URL: {url}")
         print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
+        return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, ""
     else:
         clean_text_data = clean_text(text)
         print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
         print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
+        return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data
 def predict_batch_urls(file_obj):
     results = []
 # --- Gradio App ---
+with gr.Blocks() as app:
+    gr.Markdown("# 🕵️ Gambling Website Detection (URL Based)")
+    with gr.Tab("Single URL"):
+        url_input = gr.Textbox(label="Enter Website URL")
+        predict_button = gr.Button("Predict")
         with gr.Row():
+            with gr.Column():
+                label_output = gr.Label()
+                confidence_output = gr.Textbox(label="Confidence", interactive=False)
+            with gr.Column():
+                screenshot_output = gr.Image(label="Screenshot", type="filepath")
         with gr.Row():
+            with gr.Column():
+                raw_text_output = gr.Textbox(label="Raw OCR Text", lines=5)
+            with gr.Column():
+                cleaned_text_output = gr.Textbox(label="Cleaned Text", lines=5)
         predict_button.click(
             fn=predict_single_url,
             ]
         )
+    with gr.Tab("Batch URLs"):
+        file_input = gr.File(label="Upload .txt file with URLs (one per line)")
+        batch_predict_button = gr.Button("Batch Predict")
+        batch_output = gr.DataFrame()
+        batch_predict_button.click(fn=predict_batch_urls, inputs=file_input, outputs=batch_output)
 app.launch()