rdsarjito commited on
Commit
5a643a9
·
1 Parent(s): b89e337
Files changed (1) hide show
  1. app.py +193 -165
app.py CHANGED
@@ -362,8 +362,8 @@ def predict_single_url(url):
362
  screenshot_path = take_screenshot(url)
363
  if not screenshot_path:
364
  error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
365
- error_msg = f"**Gagal mengambil screenshot**\n\nURL: `{url}`\n\n**Kemungkinan penyebab:**\n• Terlalu banyak redirect\n• Website memblokir akses otomatis\n• Masalah koneksi jaringan\n• URL tidak valid"
366
- return error_label, error_msg, None, "", "", "**Status:** Gagal mengambil screenshot"
367
 
368
  text = extract_text_from_image(screenshot_path)
369
  raw_text = text # Store raw text before cleaning
@@ -389,10 +389,9 @@ def predict_single_url(url):
389
  }
390
 
391
  confidence = gambling_prob if is_gambling else non_gambling_prob
392
- result_text = "Gambling" if is_gambling else "Non-Gambling"
393
- confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Image-Only (EfficientNet-B3)\n\n**Hasil:** {result_text}"
394
 
395
- model_info = f"**Tipe Model:** Image-Only\n**Arsitektur:** EfficientNet-B3\n**Probabilitas Gambling:** {gambling_prob:.1%}\n**Probabilitas Non-Gambling:** {non_gambling_prob:.1%}"
396
 
397
  print(f"[Image-Only] URL: {url}")
398
  print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
@@ -432,17 +431,16 @@ def predict_single_url(url):
432
  image_weight = 0.5
433
  text_weight = 0.5
434
 
435
- result_text = "Gambling" if is_gambling else "Non-Gambling"
436
- confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Fusion Model (Image + Text)\n\n**Hasil:** {result_text}"
437
 
438
- model_info = f"""**Tipe Model:** Fusion Model (MLP)
439
  **Image Model:** EfficientNet-B3
440
  **Text Model:** IndoBERT
441
 
442
- **Prediksi Individual:**
443
  - Image Model: {image_probs[0].item():.1%}
444
  - Text Model: {text_probs[0].item():.1%}
445
- - Hasil Fusion: {gambling_prob:.1%}"""
446
 
447
  # ✨ Log detail
448
  print(f"[Fusion Model] URL: {url}")
@@ -473,199 +471,230 @@ def predict_batch_urls(file_obj):
473
 
474
  # --- Gradio App ---
475
 
476
- # Custom CSS - Tokopedia style
477
  custom_css = """
478
- .header-container {
479
- background: #fff;
480
- border-bottom: 1px solid #e5e5e5;
481
- padding: 20px 0;
482
- margin-bottom: 30px;
483
  }
484
- .header-title {
485
- font-size: 24px;
486
- font-weight: 600;
487
- color: #333;
 
 
 
 
 
 
488
  margin: 0;
489
- padding: 0;
 
 
 
490
  }
491
- .header-subtitle {
492
- font-size: 14px;
493
- color: #666;
494
- margin: 5px 0 0 0;
 
 
495
  }
496
- .content-container {
497
- max-width: 1200px;
498
  margin: 0 auto;
499
- padding: 0 20px;
500
  }
501
- .card {
502
- background: #fff;
503
- border: 1px solid #e5e5e5;
504
- border-radius: 8px;
505
- padding: 24px;
506
- margin-bottom: 20px;
507
  }
508
- .section-title {
509
- font-size: 18px;
510
  font-weight: 600;
511
- color: #333;
512
- margin: 0 0 20px 0;
513
- padding-bottom: 12px;
514
- border-bottom: 2px solid #42b549;
515
  }
516
- .info-text {
517
- font-size: 14px;
518
- color: #666;
519
  line-height: 1.6;
520
- margin: 0;
521
  }
522
- .button-primary {
523
- background: #42b549;
524
- color: #fff;
525
  border: none;
526
- padding: 12px 32px;
527
- border-radius: 4px;
528
  font-weight: 500;
529
- cursor: pointer;
530
  }
531
- .button-primary:hover {
532
- background: #3aa040;
 
 
533
  }
534
- .result-box {
535
- background: #f8f9fa;
536
- border: 1px solid #e5e5e5;
537
- border-radius: 8px;
538
- padding: 20px;
539
- margin: 15px 0;
540
  }
541
- .footer-text {
542
- text-align: center;
543
- color: #999;
544
- font-size: 12px;
545
- padding: 20px 0;
546
- border-top: 1px solid #e5e5e5;
547
- margin-top: 40px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  }
549
  """
550
 
551
- # Create custom theme with Tokopedia colors
552
- tokopedia_theme = gr.themes.Default(
553
- primary_hue=gr.themes.colors.green,
554
- font=("Inter", "ui-sans-serif", "system-ui", "sans-serif"),
555
  ).set(
556
- button_primary_background_fill="#42b549",
557
- button_primary_background_fill_hover="#3aa040",
558
  button_primary_text_color="#ffffff",
559
- border_color_accent="#42b549",
560
- border_color_primary="#e5e5e5",
 
561
  background_fill_primary="#ffffff",
562
- background_fill_secondary="#f8f9fa",
563
- body_text_color="#333333",
564
- body_text_color_subdued="#666666",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
  )
566
 
567
- with gr.Blocks(theme=tokopedia_theme, css=custom_css, title="Gambling Website Detector") as app:
568
  # Header Section
569
  with gr.Row():
570
  gr.HTML("""
571
- <div class="header-container">
572
- <div class="content-container">
573
- <h1 class="header-title">Gambling Website Detector</h1>
574
- <p class="header-subtitle">Analisis website untuk mendeteksi konten perjudian menggunakan teknologi deep learning</p>
575
- </div>
576
  </div>
577
  """)
578
 
579
- # Main Content
580
  with gr.Row():
581
- with gr.Column():
582
- gr.HTML("""
583
- <div class="content-container">
584
- <div class="card">
585
- <p class="info-text">
586
- Sistem ini menggunakan model fusion yang menggabungkan analisis gambar dan teks untuk mendeteksi konten perjudian pada website. Masukkan URL website yang ingin dianalisis.
587
- </p>
588
- </div>
589
- </div>
590
- """)
591
 
592
  with gr.Tabs():
593
- with gr.Tab("Analisis URL", id="single"):
594
- with gr.Row():
595
- with gr.Column():
596
- gr.HTML("""
597
- <div class="content-container">
598
- <div class="card">
599
- <h2 class="section-title">Masukkan URL Website</h2>
600
- <p class="info-text" style="margin-bottom: 20px;">Masukkan URL lengkap website yang ingin dianalisis. Sistem akan mengambil screenshot dan menganalisis kontennya.</p>
601
- </div>
602
- </div>
603
- """)
604
-
605
  with gr.Row():
606
- with gr.Column():
 
607
  url_input = gr.Textbox(
608
- label="URL Website",
609
  placeholder="https://example.com",
 
610
  lines=1
611
  )
612
  predict_button = gr.Button(
613
- "Analisis Website",
614
  variant="primary",
615
  size="lg"
616
  )
617
 
618
- with gr.Row():
619
- with gr.Column():
620
- gr.HTML("""
621
- <div class="content-container">
622
- <div class="card">
623
- <h2 class="section-title">Hasil Analisis</h2>
624
- </div>
625
- </div>
626
- """)
627
 
 
628
  with gr.Row():
629
  with gr.Column(scale=1):
 
630
  label_output = gr.Label(
631
- label="Hasil Prediksi",
632
  value={"Gambling": 0.0, "Non-Gambling": 0.0},
633
  num_top_classes=2
634
  )
635
  confidence_output = gr.Markdown(
636
- value="",
637
- label="Tingkat Keyakinan"
638
  )
639
  model_info_output = gr.Markdown(
640
  value="",
641
- label="Informasi Model"
642
  )
643
 
644
  with gr.Column(scale=1):
 
645
  screenshot_output = gr.Image(
646
- label="Screenshot Website",
647
  type="filepath",
648
  height=400
649
  )
650
 
651
- with gr.Row():
652
- with gr.Column():
653
- with gr.Accordion("Detail Analisis Teks", open=False):
654
- with gr.Row():
655
- with gr.Column():
656
- raw_text_output = gr.Textbox(
657
- label="Teks Mentah (Raw OCR)",
658
- lines=6,
659
- interactive=False,
660
- placeholder="Teks yang diekstrak dari screenshot akan muncul di sini..."
661
- )
662
- with gr.Column():
663
- cleaned_text_output = gr.Textbox(
664
- label="Teks yang Diproses",
665
- lines=6,
666
- interactive=False,
667
- placeholder="Teks yang sudah dibersihkan akan muncul di sini..."
668
- )
 
 
 
669
 
670
  predict_button.click(
671
  fn=predict_single_url,
@@ -680,38 +709,36 @@ with gr.Blocks(theme=tokopedia_theme, css=custom_css, title="Gambling Website De
680
  ]
681
  )
682
 
683
- with gr.Tab("Analisis Batch", id="batch"):
684
- with gr.Row():
685
- with gr.Column():
686
- gr.HTML("""
687
- <div class="content-container">
688
- <div class="card">
689
- <h2 class="section-title">Analisis Multiple URL</h2>
690
- <p class="info-text">Upload file teks (.txt) yang berisi beberapa URL (satu URL per baris) untuk dianalisis sekaligus. Hasil akan ditampilkan dalam format tabel.</p>
691
- </div>
692
- </div>
693
- """)
694
 
695
  with gr.Row():
696
  with gr.Column():
697
  file_input = gr.File(
698
- label="Upload File URL (.txt)",
699
  file_types=[".txt"]
700
  )
701
- gr.Markdown("**Format file:** Satu URL per baris")
702
  batch_predict_button = gr.Button(
703
- "Proses Batch",
704
  variant="primary",
705
  size="lg"
706
  )
707
 
 
 
708
  with gr.Row():
709
- with gr.Column():
710
- batch_output = gr.DataFrame(
711
- label="Hasil Analisis",
712
- wrap=True,
713
- interactive=False
714
- )
715
 
716
  batch_predict_button.click(
717
  fn=predict_batch_urls,
@@ -720,11 +747,12 @@ with gr.Blocks(theme=tokopedia_theme, css=custom_css, title="Gambling Website De
720
  )
721
 
722
  # Footer
723
- gr.HTML("""
724
- <div class="footer-text">
725
- <p>Powered by PyTorch Gradio EfficientNet IndoBERT</p>
726
- <p style="margin-top: 8px;">Tool ini untuk keperluan edukasi dan penelitian</p>
727
- </div>
 
728
  """)
729
 
730
  app.launch()
 
362
  screenshot_path = take_screenshot(url)
363
  if not screenshot_path:
364
  error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
365
+ error_msg = f"**Error:** Unable to capture screenshot for `{url}`\n\n**Possible reasons:**\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL"
366
+ return error_label, error_msg, None, "", "", "**Model:** Screenshot capture failed"
367
 
368
  text = extract_text_from_image(screenshot_path)
369
  raw_text = text # Store raw text before cleaning
 
389
  }
390
 
391
  confidence = gambling_prob if is_gambling else non_gambling_prob
392
+ confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Image-Only Model (EfficientNet-B3)\n\n**Prediction:** {'Gambling' if is_gambling else 'Non-Gambling'}"
 
393
 
394
+ model_info = f"**Model Type:** Image-Only\n**Architecture:** EfficientNet-B3\n**Gambling Probability:** {gambling_prob:.1%}\n**Non-Gambling Probability:** {non_gambling_prob:.1%}"
395
 
396
  print(f"[Image-Only] URL: {url}")
397
  print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
 
431
  image_weight = 0.5
432
  text_weight = 0.5
433
 
434
+ confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Fusion Model (Image + Text)\n\n**Prediction:** {'Gambling' if is_gambling else 'Non-Gambling'}"
 
435
 
436
+ model_info = f"""**Model Type:** Fusion Model (MLP)
437
  **Image Model:** EfficientNet-B3
438
  **Text Model:** IndoBERT
439
 
440
+ **Individual Predictions:**
441
  - Image Model: {image_probs[0].item():.1%}
442
  - Text Model: {text_probs[0].item():.1%}
443
+ - Fusion Result: {gambling_prob:.1%}"""
444
 
445
  # ✨ Log detail
446
  print(f"[Fusion Model] URL: {url}")
 
471
 
472
  # --- Gradio App ---
473
 
474
+ # Custom CSS for professional styling
475
  custom_css = """
476
+ * {
477
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif;
 
 
 
478
  }
479
+ .main-header {
480
+ text-align: center;
481
+ padding: 3rem 2rem;
482
+ background: linear-gradient(135deg, #1e3a5f 0%, #2c5282 100%);
483
+ color: #ffffff;
484
+ border-radius: 8px;
485
+ margin-bottom: 2.5rem;
486
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
487
+ }
488
+ .main-header h1 {
489
  margin: 0;
490
+ font-size: 2.25rem;
491
+ font-weight: 600;
492
+ letter-spacing: -0.5px;
493
+ color: #ffffff;
494
  }
495
+ .main-header p {
496
+ margin: 0.75rem 0 0 0;
497
+ font-size: 1rem;
498
+ opacity: 0.95;
499
+ font-weight: 400;
500
+ color: #e2e8f0;
501
  }
502
+ .gradio-container {
503
+ max-width: 1400px;
504
  margin: 0 auto;
505
+ padding: 0 1.5rem;
506
  }
507
+ .gradio-block {
508
+ background: #ffffff;
 
 
 
 
509
  }
510
+ h2, h3, h4 {
511
+ color: #1a202c;
512
  font-weight: 600;
513
+ letter-spacing: -0.3px;
 
 
 
514
  }
515
+ .gr-markdown {
516
+ color: #4a5568;
 
517
  line-height: 1.6;
 
518
  }
519
+ .gr-button {
520
+ background: #2c5282;
521
+ color: #ffffff;
522
  border: none;
 
 
523
  font-weight: 500;
524
+ transition: all 0.2s ease;
525
  }
526
+ .gr-button:hover {
527
+ background: #2a4d7a;
528
+ transform: translateY(-1px);
529
+ box-shadow: 0 4px 8px rgba(44, 82, 130, 0.3);
530
  }
531
+ .gr-textbox, .gr-textbox input {
532
+ border: 1px solid #cbd5e0;
533
+ border-radius: 6px;
534
+ transition: border-color 0.2s ease;
 
 
535
  }
536
+ .gr-textbox:focus-within, .gr-textbox input:focus {
537
+ border-color: #2c5282;
538
+ box-shadow: 0 0 0 3px rgba(44, 82, 130, 0.1);
539
+ }
540
+ .gr-tabs {
541
+ border-bottom: 2px solid #e2e8f0;
542
+ }
543
+ .gr-tab {
544
+ color: #4a5568;
545
+ font-weight: 500;
546
+ padding: 1rem 1.5rem;
547
+ }
548
+ .gr-tab.selected {
549
+ color: #2c5282;
550
+ border-bottom: 2px solid #2c5282;
551
+ }
552
+ .gr-label {
553
+ background: #f7fafc;
554
+ border: 1px solid #e2e8f0;
555
+ border-radius: 6px;
556
+ }
557
+ .gr-image {
558
+ border: 1px solid #e2e8f0;
559
+ border-radius: 6px;
560
+ overflow: hidden;
561
+ }
562
+ .gr-accordion {
563
+ border: 1px solid #e2e8f0;
564
+ border-radius: 6px;
565
+ background: #f7fafc;
566
+ }
567
+ .gr-dataframe {
568
+ border: 1px solid #e2e8f0;
569
+ border-radius: 6px;
570
+ }
571
+ hr {
572
+ border: none;
573
+ border-top: 1px solid #e2e8f0;
574
+ margin: 2rem 0;
575
  }
576
  """
577
 
578
+ # Professional theme configuration
579
+ professional_theme = gr.themes.Default(
580
+ primary_hue=gr.themes.colors.blue,
581
+ font=("-apple-system", "BlinkMacSystemFont", "Segoe UI", "Roboto", "Helvetica Neue", "Arial", "sans-serif"),
582
  ).set(
583
+ button_primary_background_fill="#2c5282",
584
+ button_primary_background_fill_hover="#2a4d7a",
585
  button_primary_text_color="#ffffff",
586
+ button_primary_background_fill_dark="#2c5282",
587
+ border_color_primary="#e2e8f0",
588
+ border_color_accent="#2c5282",
589
  background_fill_primary="#ffffff",
590
+ background_fill_secondary="#f7fafc",
591
+ body_text_color="#1a202c",
592
+ body_text_color_subdued="#4a5568",
593
+ block_background_fill="#ffffff",
594
+ block_border_color="#e2e8f0",
595
+ block_border_width="1px",
596
+ block_radius="8px",
597
+ block_shadow="0 2px 4px rgba(0,0,0,0.05)",
598
+ input_background_fill="#ffffff",
599
+ input_border_color="#cbd5e0",
600
+ input_border_width="1px",
601
+ input_radius="6px",
602
+ checkbox_background_color="#2c5282",
603
+ checkbox_border_color="#cbd5e0",
604
+ checkbox_border_radius="4px",
605
+ slider_color="#2c5282",
606
+ progress_color="#2c5282",
607
  )
608
 
609
+ with gr.Blocks(theme=professional_theme, css=custom_css, title="Gambling Website Detector") as app:
610
  # Header Section
611
  with gr.Row():
612
  gr.HTML("""
613
+ <div class="main-header">
614
+ <h1>Gambling Website Detection System</h1>
615
+ <p>AI-Powered URL Analysis using Deep Learning Fusion Model</p>
 
 
616
  </div>
617
  """)
618
 
619
+ # Info Section
620
  with gr.Row():
621
+ gr.Markdown("""
622
+ ### About This Tool
623
+
624
+ This advanced detection system uses a **fusion model** combining:
625
+ - **Image Analysis**: EfficientNet-B3 for visual content detection
626
+ - **Text Analysis**: IndoBERT for Indonesian text understanding
627
+ - **Fusion Learning**: Intelligent combination of both modalities
628
+
629
+ Simply enter a website URL to analyze whether it contains gambling-related content.
630
+ """)
631
 
632
  with gr.Tabs():
633
+ with gr.Tab("Single URL Analysis", id="single"):
 
 
 
 
 
 
 
 
 
 
 
634
  with gr.Row():
635
+ with gr.Column(scale=2):
636
+ gr.Markdown("### Enter Website URL")
637
  url_input = gr.Textbox(
638
+ label="Website URL",
639
  placeholder="https://example.com",
640
+ info="Enter the full URL of the website you want to analyze",
641
  lines=1
642
  )
643
  predict_button = gr.Button(
644
+ "Analyze Website",
645
  variant="primary",
646
  size="lg"
647
  )
648
 
649
+ gr.Markdown("---")
 
 
 
 
 
 
 
 
650
 
651
+ # Results Section
652
  with gr.Row():
653
  with gr.Column(scale=1):
654
+ gr.Markdown("### Detection Results")
655
  label_output = gr.Label(
656
+ label="Prediction Result",
657
  value={"Gambling": 0.0, "Non-Gambling": 0.0},
658
  num_top_classes=2
659
  )
660
  confidence_output = gr.Markdown(
661
+ value="**Confidence:** Waiting for analysis...",
662
+ label="Confidence Score"
663
  )
664
  model_info_output = gr.Markdown(
665
  value="",
666
+ label="Model Information"
667
  )
668
 
669
  with gr.Column(scale=1):
670
+ gr.Markdown("### Website Screenshot")
671
  screenshot_output = gr.Image(
672
+ label="Captured Screenshot",
673
  type="filepath",
674
  height=400
675
  )
676
 
677
+ gr.Markdown("---")
678
+
679
+ # Text Analysis Section
680
+ with gr.Accordion("Text Analysis Details", open=False):
681
+ with gr.Row():
682
+ with gr.Column():
683
+ gr.Markdown("#### Raw OCR Text")
684
+ raw_text_output = gr.Textbox(
685
+ label="Extracted Text (Raw)",
686
+ lines=8,
687
+ interactive=False,
688
+ placeholder="Raw text extracted from the screenshot will appear here..."
689
+ )
690
+ with gr.Column():
691
+ gr.Markdown("#### Processed Text")
692
+ cleaned_text_output = gr.Textbox(
693
+ label="Cleaned Text (Processed)",
694
+ lines=8,
695
+ interactive=False,
696
+ placeholder="Processed and cleaned text will appear here..."
697
+ )
698
 
699
  predict_button.click(
700
  fn=predict_single_url,
 
709
  ]
710
  )
711
 
712
+ with gr.Tab("Batch URL Analysis", id="batch"):
713
+ gr.Markdown("""
714
+ ### Batch Processing
715
+
716
+ Upload a text file containing multiple URLs (one per line) to analyze them all at once.
717
+ The results will be displayed in a table format.
718
+ """)
 
 
 
 
719
 
720
  with gr.Row():
721
  with gr.Column():
722
  file_input = gr.File(
723
+ label="Upload URL File (.txt)",
724
  file_types=[".txt"]
725
  )
726
+ gr.Markdown("**Tip:** Upload a .txt file with one URL per line")
727
  batch_predict_button = gr.Button(
728
+ "Process Batch",
729
  variant="primary",
730
  size="lg"
731
  )
732
 
733
+ gr.Markdown("---")
734
+
735
  with gr.Row():
736
+ gr.Markdown("### Batch Results")
737
+ batch_output = gr.DataFrame(
738
+ label="Analysis Results",
739
+ wrap=True,
740
+ interactive=False
741
+ )
742
 
743
  batch_predict_button.click(
744
  fn=predict_batch_urls,
 
747
  )
748
 
749
  # Footer
750
+ gr.Markdown("---")
751
+ gr.Markdown("""
752
+ <div style="text-align: center; color: #4a5568; padding: 2rem 1rem; font-size: 0.875rem; line-height: 1.6;">
753
+ <p style="margin: 0.5rem 0; color: #2c5282; font-weight: 500;">Powered by PyTorch Gradio EfficientNet • IndoBERT</p>
754
+ <p style="margin: 0.5rem 0; color: #718096;">This tool is for educational and research purposes only</p>
755
+ </div>
756
  """)
757
 
758
  app.launch()