Spaces:

ramadn
/

gambling-detector

Sleeping

App Files Files Community

rdsarjito commited on Nov 8, 2025

Commit

c48b7e8

1 Parent(s): 83d56a4

[UPDATE]UI

Browse files

Files changed (1) hide show

app.py +236 -46

app.py CHANGED Viewed

@@ -346,7 +346,9 @@ def predict_single_url(url):
     print(f"Processing URL: {url}")
     screenshot_path = take_screenshot(url)
     if not screenshot_path:
-        return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL", "Screenshot capture failed", None, "", ""
     text = extract_text_from_image(screenshot_path)
     raw_text = text  # Store raw text before cleaning
@@ -363,11 +365,22 @@ def predict_single_url(url):
             threshold = 0.6
             is_gambling = image_probs[0] > threshold
-        label = "Gambling" if is_gambling else "Non-Gambling"
-        confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
         print(f"[Image-Only] URL: {url}")
-        print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
-        return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, ""
     else:
         clean_text_data = clean_text(text)
@@ -382,16 +395,36 @@ def predict_single_url(url):
             threshold = 0.6
             is_gambling = fused_probs[0] > threshold
-        label = "Gambling" if is_gambling else "Non-Gambling"
-        confidence = fused_probs[0].item() if is_gambling else 1 - fused_probs[0].item()
         # ✨ Log detail
         print(f"[Fusion Model] URL: {url}")
         print(f"Image Model Prediction Probability: {image_probs[0]:.2f}")
         print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
-        print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
-        return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data
 def predict_batch_urls(file_obj):
     results = []
@@ -414,44 +447,201 @@ def predict_batch_urls(file_obj):
 # --- Gradio App ---
-with gr.Blocks() as app:
-    gr.Markdown("# 🕵️ Gambling Website Detection (URL Based)")
-    with gr.Tab("Single URL"):
-        url_input = gr.Textbox(label="Enter Website URL")
-        predict_button = gr.Button("Predict")
-        with gr.Row():
-            with gr.Column():
-                label_output = gr.Label()
-                confidence_output = gr.Textbox(label="Confidence", interactive=False)
-            with gr.Column():
-                screenshot_output = gr.Image(label="Screenshot", type="filepath")
-        with gr.Row():
-            with gr.Column():
-                raw_text_output = gr.Textbox(label="Raw OCR Text", lines=5)
-            with gr.Column():
-                cleaned_text_output = gr.Textbox(label="Cleaned Text", lines=5)
-        predict_button.click(
-            fn=predict_single_url,
-            inputs=url_input,
-            outputs=[
-                label_output,
-                confidence_output,
-                screenshot_output,
-                raw_text_output,
-                cleaned_text_output
-            ]
-        )
-    with gr.Tab("Batch URLs"):
-        file_input = gr.File(label="Upload .txt file with URLs (one per line)")
-        batch_predict_button = gr.Button("Batch Predict")
-        batch_output = gr.DataFrame()
-        batch_predict_button.click(fn=predict_batch_urls, inputs=file_input, outputs=batch_output)
 app.launch()

     print(f"Processing URL: {url}")
     screenshot_path = take_screenshot(url)
     if not screenshot_path:
+        error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
+        error_msg = f"**❌ Error:** Unable to capture screenshot for `{url}`\n\n**Possible reasons:**\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL"
+        return error_label, error_msg, None, "", "", "**Model:** Screenshot capture failed"
     text = extract_text_from_image(screenshot_path)
     raw_text = text  # Store raw text before cleaning
             threshold = 0.6
             is_gambling = image_probs[0] > threshold
+        gambling_prob = image_probs[0].item()
+        non_gambling_prob = 1 - gambling_prob
+        label_dict = {
+            "Gambling": gambling_prob,
+            "Non-Gambling": non_gambling_prob
+        }
+        confidence = gambling_prob if is_gambling else non_gambling_prob
+        confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Image-Only Model (EfficientNet-B3)\n\n**Prediction:** {'🟥 Gambling' if is_gambling else '🟩 Non-Gambling'}"
+        model_info = f"**Model Type:** Image-Only\n**Architecture:** EfficientNet-B3\n**Gambling Probability:** {gambling_prob:.1%}\n**Non-Gambling Probability:** {non_gambling_prob:.1%}"
         print(f"[Image-Only] URL: {url}")
+        print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
+        return label_dict, confidence_md, screenshot_path, raw_text, "", model_info
     else:
         clean_text_data = clean_text(text)
             threshold = 0.6
             is_gambling = fused_probs[0] > threshold
+        gambling_prob = fused_probs[0].item()
+        non_gambling_prob = 1 - gambling_prob
+        label_dict = {
+            "Gambling": gambling_prob,
+            "Non-Gambling": non_gambling_prob
+        }
+        confidence = gambling_prob if is_gambling else non_gambling_prob
+        image_weight = weights[0].item()
+        text_weight = weights[1].item()
+        confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Fusion Model (Image + Text)\n\n**Prediction:** {'🟥 Gambling' if is_gambling else '🟩 Non-Gambling'}"
+        model_info = f"""**Model Type:** Fusion Model
+**Image Model:** EfficientNet-B3 (Weight: {image_weight:.1%})
+**Text Model:** IndoBERT (Weight: {text_weight:.1%})
+**Individual Predictions:**
+- 🖼️ Image Model: {image_probs[0].item():.1%}
+- 📝 Text Model: {text_probs[0].item():.1%}
+- 🔗 Fusion Result: {gambling_prob:.1%}"""
         # ✨ Log detail
         print(f"[Fusion Model] URL: {url}")
         print(f"Image Model Prediction Probability: {image_probs[0]:.2f}")
         print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
+        print(f"Fusion Final Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
+        return label_dict, confidence_md, screenshot_path, raw_text, clean_text_data, model_info
 def predict_batch_urls(file_obj):
     results = []
 # --- Gradio App ---
+# Custom CSS for professional styling
+custom_css = """
+    .main-header {
+        text-align: center;
+        padding: 2rem 0;
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        border-radius: 10px;
+        margin-bottom: 2rem;
+    }
+    .main-header h1 {
+        margin: 0;
+        font-size: 2.5rem;
+        font-weight: 700;
+    }
+    .main-header p {
+        margin: 0.5rem 0 0 0;
+        font-size: 1.1rem;
+        opacity: 0.9;
+    }
+    .result-card {
+        background: #f8f9fa;
+        padding: 1.5rem;
+        border-radius: 10px;
+        border: 2px solid #e9ecef;
+        margin: 1rem 0;
+    }
+    .info-box {
+        background: #e7f3ff;
+        padding: 1rem;
+        border-radius: 8px;
+        border-left: 4px solid #2196F3;
+        margin: 1rem 0;
+    }
+    .success-box {
+        background: #d4edda;
+        border-left-color: #28a745;
+    }
+    .warning-box {
+        background: #fff3cd;
+        border-left-color: #ffc107;
+    }
+    .gradio-container {
+        max-width: 1200px;
+        margin: 0 auto;
+    }
+"""
+with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Gambling Website Detector") as app:
+    # Header Section
+    with gr.Row():
+        gr.HTML("""
+            <div class="main-header">
+                <h1>🕵️ Gambling Website Detection System</h1>
+                <p>AI-Powered URL Analysis using Deep Learning Fusion Model</p>
+            </div>
+        """)
+    # Info Section
+    with gr.Row():
+        gr.Markdown("""
+        ### 📋 About This Tool
+        This advanced detection system uses a **fusion model** combining:
+        - 🖼️ **Image Analysis**: EfficientNet-B3 for visual content detection
+        - 📝 **Text Analysis**: IndoBERT for Indonesian text understanding
+        - 🔗 **Fusion Learning**: Intelligent combination of both modalities
+        Simply enter a website URL to analyze whether it contains gambling-related content.
+        """)
+    with gr.Tabs():
+        with gr.Tab("🔍 Single URL Analysis", id="single"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    gr.Markdown("### Enter Website URL")
+                    url_input = gr.Textbox(
+                        label="Website URL",
+                        placeholder="https://example.com",
+                        info="Enter the full URL of the website you want to analyze",
+                        lines=1
+                    )
+                    predict_button = gr.Button(
+                        "🔎 Analyze Website",
+                        variant="primary",
+                        size="lg"
+                    )
+            gr.Markdown("---")
+            # Results Section
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📊 Detection Results")
+                    label_output = gr.Label(
+                        label="Prediction Result",
+                        value={"Gambling": 0.0, "Non-Gambling": 0.0},
+                        num_top_classes=2
+                    )
+                    confidence_output = gr.Markdown(
+                        value="**Confidence:** Waiting for analysis...",
+                        label="Confidence Score"
+                    )
+                    model_info_output = gr.Markdown(
+                        value="",
+                        label="Model Information"
+                    )
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📸 Website Screenshot")
+                    screenshot_output = gr.Image(
+                        label="Captured Screenshot",
+                        type="filepath",
+                        height=400
+                    )
+            gr.Markdown("---")
+            # Text Analysis Section
+            with gr.Accordion("📝 Text Analysis Details", open=False):
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("#### Raw OCR Text")
+                        raw_text_output = gr.Textbox(
+                            label="Extracted Text (Raw)",
+                            lines=8,
+                            interactive=False,
+                            placeholder="Raw text extracted from the screenshot will appear here..."
+                        )
+                    with gr.Column():
+                        gr.Markdown("#### Processed Text")
+                        cleaned_text_output = gr.Textbox(
+                            label="Cleaned Text (Processed)",
+                            lines=8,
+                            interactive=False,
+                            placeholder="Processed and cleaned text will appear here..."
+                        )
+            predict_button.click(
+                fn=predict_single_url,
+                inputs=url_input,
+                outputs=[
+                    label_output,
+                    confidence_output,
+                    screenshot_output,
+                    raw_text_output,
+                    cleaned_text_output,
+                    model_info_output
+                ]
+            )
+        with gr.Tab("📦 Batch URL Analysis", id="batch"):
+            gr.Markdown("""
+            ### Batch Processing
+            Upload a text file containing multiple URLs (one per line) to analyze them all at once.
+            The results will be displayed in a table format.
+            """)
+            with gr.Row():
+                with gr.Column():
+                    file_input = gr.File(
+                        label="Upload URL File (.txt)",
+                        file_types=[".txt"],
+                        info="Upload a .txt file with one URL per line"
+                    )
+                    batch_predict_button = gr.Button(
+                        "🚀 Process Batch",
+                        variant="primary",
+                        size="lg"
+                    )
+            gr.Markdown("---")
+            with gr.Row():
+                gr.Markdown("### 📋 Batch Results")
+                batch_output = gr.DataFrame(
+                    label="Analysis Results",
+                    wrap=True,
+                    interactive=False
+                )
+            batch_predict_button.click(
+                fn=predict_batch_urls,
+                inputs=file_input,
+                outputs=batch_output
+            )
+    # Footer
+    gr.Markdown("---")
+    gr.Markdown("""
+    <div style="text-align: center; color: #666; padding: 1rem;">
+        <p>Powered by PyTorch • Gradio • EfficientNet • IndoBERT</p>
+        <p style="font-size: 0.9rem;">⚠️ This tool is for educational and research purposes only</p>
+    </div>
+    """)
 app.launch()