rdsarjito commited on
Commit
b89e337
·
1 Parent(s): f9fed11
Files changed (1) hide show
  1. app.py +177 -118
app.py CHANGED
@@ -362,8 +362,8 @@ def predict_single_url(url):
362
  screenshot_path = take_screenshot(url)
363
  if not screenshot_path:
364
  error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
365
- error_msg = f"**Error:** Unable to capture screenshot for `{url}`\n\n**Possible reasons:**\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL"
366
- return error_label, error_msg, None, "", "", "**Model:** Screenshot capture failed"
367
 
368
  text = extract_text_from_image(screenshot_path)
369
  raw_text = text # Store raw text before cleaning
@@ -389,9 +389,10 @@ def predict_single_url(url):
389
  }
390
 
391
  confidence = gambling_prob if is_gambling else non_gambling_prob
392
- confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Image-Only Model (EfficientNet-B3)\n\n**Prediction:** {'Gambling' if is_gambling else 'Non-Gambling'}"
 
393
 
394
- model_info = f"**Model Type:** Image-Only\n**Architecture:** EfficientNet-B3\n**Gambling Probability:** {gambling_prob:.1%}\n**Non-Gambling Probability:** {non_gambling_prob:.1%}"
395
 
396
  print(f"[Image-Only] URL: {url}")
397
  print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
@@ -431,16 +432,17 @@ def predict_single_url(url):
431
  image_weight = 0.5
432
  text_weight = 0.5
433
 
434
- confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Fusion Model (Image + Text)\n\n**Prediction:** {'Gambling' if is_gambling else 'Non-Gambling'}"
 
435
 
436
- model_info = f"""**Model Type:** Fusion Model (MLP)
437
  **Image Model:** EfficientNet-B3
438
  **Text Model:** IndoBERT
439
 
440
- **Individual Predictions:**
441
  - Image Model: {image_probs[0].item():.1%}
442
  - Text Model: {text_probs[0].item():.1%}
443
- - Fusion Result: {gambling_prob:.1%}"""
444
 
445
  # ✨ Log detail
446
  print(f"[Fusion Model] URL: {url}")
@@ -471,143 +473,199 @@ def predict_batch_urls(file_obj):
471
 
472
  # --- Gradio App ---
473
 
474
- # Custom CSS for professional styling
475
  custom_css = """
476
- .main-header {
477
- text-align: center;
478
- padding: 2rem 0;
479
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
480
- color: white;
481
- border-radius: 10px;
482
- margin-bottom: 2rem;
483
  }
484
- .main-header h1 {
 
 
 
485
  margin: 0;
486
- font-size: 2.5rem;
487
- font-weight: 700;
488
  }
489
- .main-header p {
490
- margin: 0.5rem 0 0 0;
491
- font-size: 1.1rem;
492
- opacity: 0.9;
493
  }
494
- .result-card {
495
- background: #f8f9fa;
496
- padding: 1.5rem;
497
- border-radius: 10px;
498
- border: 2px solid #e9ecef;
499
- margin: 1rem 0;
500
  }
501
- .info-box {
502
- background: #e7f3ff;
503
- padding: 1rem;
504
  border-radius: 8px;
505
- border-left: 4px solid #2196F3;
506
- margin: 1rem 0;
507
  }
508
- .success-box {
509
- background: #d4edda;
510
- border-left-color: #28a745;
 
 
 
 
511
  }
512
- .warning-box {
513
- background: #fff3cd;
514
- border-left-color: #ffc107;
 
 
515
  }
516
- .gradio-container {
517
- max-width: 1200px;
518
- margin: 0 auto;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  }
520
  """
521
 
522
- with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Gambling Website Detector") as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  # Header Section
524
  with gr.Row():
525
  gr.HTML("""
526
- <div class="main-header">
527
- <h1>Gambling Website Detection System</h1>
528
- <p>AI-Powered URL Analysis using Deep Learning Fusion Model</p>
 
 
529
  </div>
530
  """)
531
 
532
- # Info Section
533
  with gr.Row():
534
- gr.Markdown("""
535
- ### About This Tool
536
-
537
- This advanced detection system uses a **fusion model** combining:
538
- - **Image Analysis**: EfficientNet-B3 for visual content detection
539
- - **Text Analysis**: IndoBERT for Indonesian text understanding
540
- - **Fusion Learning**: Intelligent combination of both modalities
541
-
542
- Simply enter a website URL to analyze whether it contains gambling-related content.
543
- """)
544
 
545
  with gr.Tabs():
546
- with gr.Tab("Single URL Analysis", id="single"):
547
  with gr.Row():
548
- with gr.Column(scale=2):
549
- gr.Markdown("### Enter Website URL")
 
 
 
 
 
 
 
 
 
 
550
  url_input = gr.Textbox(
551
- label="Website URL",
552
  placeholder="https://example.com",
553
- info="Enter the full URL of the website you want to analyze",
554
  lines=1
555
  )
556
  predict_button = gr.Button(
557
- "Analyze Website",
558
  variant="primary",
559
  size="lg"
560
  )
561
 
562
- gr.Markdown("---")
 
 
 
 
 
 
 
 
563
 
564
- # Results Section
565
  with gr.Row():
566
  with gr.Column(scale=1):
567
- gr.Markdown("### Detection Results")
568
  label_output = gr.Label(
569
- label="Prediction Result",
570
  value={"Gambling": 0.0, "Non-Gambling": 0.0},
571
  num_top_classes=2
572
  )
573
  confidence_output = gr.Markdown(
574
- value="**Confidence:** Waiting for analysis...",
575
- label="Confidence Score"
576
  )
577
  model_info_output = gr.Markdown(
578
  value="",
579
- label="Model Information"
580
  )
581
 
582
  with gr.Column(scale=1):
583
- gr.Markdown("### Website Screenshot")
584
  screenshot_output = gr.Image(
585
- label="Captured Screenshot",
586
  type="filepath",
587
  height=400
588
  )
589
 
590
- gr.Markdown("---")
591
-
592
- # Text Analysis Section
593
- with gr.Accordion("Text Analysis Details", open=False):
594
- with gr.Row():
595
- with gr.Column():
596
- gr.Markdown("#### Raw OCR Text")
597
- raw_text_output = gr.Textbox(
598
- label="Extracted Text (Raw)",
599
- lines=8,
600
- interactive=False,
601
- placeholder="Raw text extracted from the screenshot will appear here..."
602
- )
603
- with gr.Column():
604
- gr.Markdown("#### Processed Text")
605
- cleaned_text_output = gr.Textbox(
606
- label="Cleaned Text (Processed)",
607
- lines=8,
608
- interactive=False,
609
- placeholder="Processed and cleaned text will appear here..."
610
- )
611
 
612
  predict_button.click(
613
  fn=predict_single_url,
@@ -622,36 +680,38 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Gambling Website D
622
  ]
623
  )
624
 
625
- with gr.Tab("Batch URL Analysis", id="batch"):
626
- gr.Markdown("""
627
- ### Batch Processing
628
-
629
- Upload a text file containing multiple URLs (one per line) to analyze them all at once.
630
- The results will be displayed in a table format.
631
- """)
 
 
 
 
632
 
633
  with gr.Row():
634
  with gr.Column():
635
  file_input = gr.File(
636
- label="Upload URL File (.txt)",
637
  file_types=[".txt"]
638
  )
639
- gr.Markdown("**Tip:** Upload a .txt file with one URL per line")
640
  batch_predict_button = gr.Button(
641
- "Process Batch",
642
  variant="primary",
643
  size="lg"
644
  )
645
 
646
- gr.Markdown("---")
647
-
648
  with gr.Row():
649
- gr.Markdown("### Batch Results")
650
- batch_output = gr.DataFrame(
651
- label="Analysis Results",
652
- wrap=True,
653
- interactive=False
654
- )
655
 
656
  batch_predict_button.click(
657
  fn=predict_batch_urls,
@@ -660,12 +720,11 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Gambling Website D
660
  )
661
 
662
  # Footer
663
- gr.Markdown("---")
664
- gr.Markdown("""
665
- <div style="text-align: center; color: #666; padding: 1rem;">
666
- <p>Powered by PyTorch Gradio EfficientNet IndoBERT</p>
667
- <p style="font-size: 0.9rem;">This tool is for educational and research purposes only</p>
668
- </div>
669
  """)
670
 
671
  app.launch()
 
362
  screenshot_path = take_screenshot(url)
363
  if not screenshot_path:
364
  error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
365
+ error_msg = f"**Gagal mengambil screenshot**\n\nURL: `{url}`\n\n**Kemungkinan penyebab:**\n• Terlalu banyak redirect\n• Website memblokir akses otomatis\n• Masalah koneksi jaringan\n• URL tidak valid"
366
+ return error_label, error_msg, None, "", "", "**Status:** Gagal mengambil screenshot"
367
 
368
  text = extract_text_from_image(screenshot_path)
369
  raw_text = text # Store raw text before cleaning
 
389
  }
390
 
391
  confidence = gambling_prob if is_gambling else non_gambling_prob
392
+ result_text = "Gambling" if is_gambling else "Non-Gambling"
393
+ confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Image-Only (EfficientNet-B3)\n\n**Hasil:** {result_text}"
394
 
395
+ model_info = f"**Tipe Model:** Image-Only\n**Arsitektur:** EfficientNet-B3\n**Probabilitas Gambling:** {gambling_prob:.1%}\n**Probabilitas Non-Gambling:** {non_gambling_prob:.1%}"
396
 
397
  print(f"[Image-Only] URL: {url}")
398
  print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
 
432
  image_weight = 0.5
433
  text_weight = 0.5
434
 
435
+ result_text = "Gambling" if is_gambling else "Non-Gambling"
436
+ confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Fusion Model (Image + Text)\n\n**Hasil:** {result_text}"
437
 
438
+ model_info = f"""**Tipe Model:** Fusion Model (MLP)
439
  **Image Model:** EfficientNet-B3
440
  **Text Model:** IndoBERT
441
 
442
+ **Prediksi Individual:**
443
  - Image Model: {image_probs[0].item():.1%}
444
  - Text Model: {text_probs[0].item():.1%}
445
+ - Hasil Fusion: {gambling_prob:.1%}"""
446
 
447
  # ✨ Log detail
448
  print(f"[Fusion Model] URL: {url}")
 
473
 
474
  # --- Gradio App ---
475
 
476
+ # Custom CSS - Tokopedia style
477
  custom_css = """
478
+ .header-container {
479
+ background: #fff;
480
+ border-bottom: 1px solid #e5e5e5;
481
+ padding: 20px 0;
482
+ margin-bottom: 30px;
 
 
483
  }
484
+ .header-title {
485
+ font-size: 24px;
486
+ font-weight: 600;
487
+ color: #333;
488
  margin: 0;
489
+ padding: 0;
 
490
  }
491
+ .header-subtitle {
492
+ font-size: 14px;
493
+ color: #666;
494
+ margin: 5px 0 0 0;
495
  }
496
+ .content-container {
497
+ max-width: 1200px;
498
+ margin: 0 auto;
499
+ padding: 0 20px;
 
 
500
  }
501
+ .card {
502
+ background: #fff;
503
+ border: 1px solid #e5e5e5;
504
  border-radius: 8px;
505
+ padding: 24px;
506
+ margin-bottom: 20px;
507
  }
508
+ .section-title {
509
+ font-size: 18px;
510
+ font-weight: 600;
511
+ color: #333;
512
+ margin: 0 0 20px 0;
513
+ padding-bottom: 12px;
514
+ border-bottom: 2px solid #42b549;
515
  }
516
+ .info-text {
517
+ font-size: 14px;
518
+ color: #666;
519
+ line-height: 1.6;
520
+ margin: 0;
521
  }
522
+ .button-primary {
523
+ background: #42b549;
524
+ color: #fff;
525
+ border: none;
526
+ padding: 12px 32px;
527
+ border-radius: 4px;
528
+ font-weight: 500;
529
+ cursor: pointer;
530
+ }
531
+ .button-primary:hover {
532
+ background: #3aa040;
533
+ }
534
+ .result-box {
535
+ background: #f8f9fa;
536
+ border: 1px solid #e5e5e5;
537
+ border-radius: 8px;
538
+ padding: 20px;
539
+ margin: 15px 0;
540
+ }
541
+ .footer-text {
542
+ text-align: center;
543
+ color: #999;
544
+ font-size: 12px;
545
+ padding: 20px 0;
546
+ border-top: 1px solid #e5e5e5;
547
+ margin-top: 40px;
548
  }
549
  """
550
 
551
+ # Create custom theme with Tokopedia colors
552
+ tokopedia_theme = gr.themes.Default(
553
+ primary_hue=gr.themes.colors.green,
554
+ font=("Inter", "ui-sans-serif", "system-ui", "sans-serif"),
555
+ ).set(
556
+ button_primary_background_fill="#42b549",
557
+ button_primary_background_fill_hover="#3aa040",
558
+ button_primary_text_color="#ffffff",
559
+ border_color_accent="#42b549",
560
+ border_color_primary="#e5e5e5",
561
+ background_fill_primary="#ffffff",
562
+ background_fill_secondary="#f8f9fa",
563
+ body_text_color="#333333",
564
+ body_text_color_subdued="#666666",
565
+ )
566
+
567
+ with gr.Blocks(theme=tokopedia_theme, css=custom_css, title="Gambling Website Detector") as app:
568
  # Header Section
569
  with gr.Row():
570
  gr.HTML("""
571
+ <div class="header-container">
572
+ <div class="content-container">
573
+ <h1 class="header-title">Gambling Website Detector</h1>
574
+ <p class="header-subtitle">Analisis website untuk mendeteksi konten perjudian menggunakan teknologi deep learning</p>
575
+ </div>
576
  </div>
577
  """)
578
 
579
+ # Main Content
580
  with gr.Row():
581
+ with gr.Column():
582
+ gr.HTML("""
583
+ <div class="content-container">
584
+ <div class="card">
585
+ <p class="info-text">
586
+ Sistem ini menggunakan model fusion yang menggabungkan analisis gambar dan teks untuk mendeteksi konten perjudian pada website. Masukkan URL website yang ingin dianalisis.
587
+ </p>
588
+ </div>
589
+ </div>
590
+ """)
591
 
592
  with gr.Tabs():
593
+ with gr.Tab("Analisis URL", id="single"):
594
  with gr.Row():
595
+ with gr.Column():
596
+ gr.HTML("""
597
+ <div class="content-container">
598
+ <div class="card">
599
+ <h2 class="section-title">Masukkan URL Website</h2>
600
+ <p class="info-text" style="margin-bottom: 20px;">Masukkan URL lengkap website yang ingin dianalisis. Sistem akan mengambil screenshot dan menganalisis kontennya.</p>
601
+ </div>
602
+ </div>
603
+ """)
604
+
605
+ with gr.Row():
606
+ with gr.Column():
607
  url_input = gr.Textbox(
608
+ label="URL Website",
609
  placeholder="https://example.com",
 
610
  lines=1
611
  )
612
  predict_button = gr.Button(
613
+ "Analisis Website",
614
  variant="primary",
615
  size="lg"
616
  )
617
 
618
+ with gr.Row():
619
+ with gr.Column():
620
+ gr.HTML("""
621
+ <div class="content-container">
622
+ <div class="card">
623
+ <h2 class="section-title">Hasil Analisis</h2>
624
+ </div>
625
+ </div>
626
+ """)
627
 
 
628
  with gr.Row():
629
  with gr.Column(scale=1):
 
630
  label_output = gr.Label(
631
+ label="Hasil Prediksi",
632
  value={"Gambling": 0.0, "Non-Gambling": 0.0},
633
  num_top_classes=2
634
  )
635
  confidence_output = gr.Markdown(
636
+ value="",
637
+ label="Tingkat Keyakinan"
638
  )
639
  model_info_output = gr.Markdown(
640
  value="",
641
+ label="Informasi Model"
642
  )
643
 
644
  with gr.Column(scale=1):
 
645
  screenshot_output = gr.Image(
646
+ label="Screenshot Website",
647
  type="filepath",
648
  height=400
649
  )
650
 
651
+ with gr.Row():
652
+ with gr.Column():
653
+ with gr.Accordion("Detail Analisis Teks", open=False):
654
+ with gr.Row():
655
+ with gr.Column():
656
+ raw_text_output = gr.Textbox(
657
+ label="Teks Mentah (Raw OCR)",
658
+ lines=6,
659
+ interactive=False,
660
+ placeholder="Teks yang diekstrak dari screenshot akan muncul di sini..."
661
+ )
662
+ with gr.Column():
663
+ cleaned_text_output = gr.Textbox(
664
+ label="Teks yang Diproses",
665
+ lines=6,
666
+ interactive=False,
667
+ placeholder="Teks yang sudah dibersihkan akan muncul di sini..."
668
+ )
 
 
 
669
 
670
  predict_button.click(
671
  fn=predict_single_url,
 
680
  ]
681
  )
682
 
683
+ with gr.Tab("Analisis Batch", id="batch"):
684
+ with gr.Row():
685
+ with gr.Column():
686
+ gr.HTML("""
687
+ <div class="content-container">
688
+ <div class="card">
689
+ <h2 class="section-title">Analisis Multiple URL</h2>
690
+ <p class="info-text">Upload file teks (.txt) yang berisi beberapa URL (satu URL per baris) untuk dianalisis sekaligus. Hasil akan ditampilkan dalam format tabel.</p>
691
+ </div>
692
+ </div>
693
+ """)
694
 
695
  with gr.Row():
696
  with gr.Column():
697
  file_input = gr.File(
698
+ label="Upload File URL (.txt)",
699
  file_types=[".txt"]
700
  )
701
+ gr.Markdown("**Format file:** Satu URL per baris")
702
  batch_predict_button = gr.Button(
703
+ "Proses Batch",
704
  variant="primary",
705
  size="lg"
706
  )
707
 
 
 
708
  with gr.Row():
709
+ with gr.Column():
710
+ batch_output = gr.DataFrame(
711
+ label="Hasil Analisis",
712
+ wrap=True,
713
+ interactive=False
714
+ )
715
 
716
  batch_predict_button.click(
717
  fn=predict_batch_urls,
 
720
  )
721
 
722
  # Footer
723
+ gr.HTML("""
724
+ <div class="footer-text">
725
+ <p>Powered by PyTorch Gradio EfficientNet • IndoBERT</p>
726
+ <p style="margin-top: 8px;">Tool ini untuk keperluan edukasi dan penelitian</p>
727
+ </div>
 
728
  """)
729
 
730
  app.launch()