rdsarjito commited on
Commit
dab4200
·
1 Parent(s): c0f13b0

[UPDATE]UI

Browse files
Files changed (1) hide show
  1. app.py +35 -341
app.py CHANGED
@@ -7,7 +7,7 @@ import torch.nn as nn
7
  from PIL import Image
8
  import requests
9
  import easyocr
10
- from transformers import AutoTokenizer, AutoModel
11
  from torchvision import transforms
12
  from torchvision import models
13
  from torchvision.transforms import functional as F
@@ -75,60 +75,18 @@ class LateFusionModel(nn.Module):
75
 
76
  return fused_logits, image_logits, text_logits, weights
77
 
78
- # Load Fusion Model
79
- # First, create the model architecture
80
- print("Creating fusion model architecture...")
81
- # Create image model (same as image-only model)
82
- fusion_image_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
83
- num_features = fusion_image_model.classifier[1].in_features
84
- fusion_image_model.classifier = nn.Linear(num_features, 1)
85
-
86
- # Create text model (IndoBERT)
87
- fusion_text_model_base = AutoModel.from_pretrained('indobenchmark/indobert-base-p1')
88
- # Add classification head for text model
89
- class TextClassifier(nn.Module):
90
- def __init__(self, base_model):
91
- super(TextClassifier, self).__init__()
92
- self.base_model = base_model
93
- self.classifier = nn.Linear(base_model.config.hidden_size, 1)
94
-
95
- def forward(self, input_ids, attention_mask):
96
- outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
97
- pooled_output = outputs.pooler_output
98
- logits = self.classifier(pooled_output)
99
- # Return object with logits attribute
100
- class ModelOutput:
101
- def __init__(self, logits):
102
- self.logits = logits
103
- return ModelOutput(logits)
104
-
105
- fusion_text_model = TextClassifier(fusion_text_model_base)
106
-
107
- # Create fusion model
108
- fusion_model = LateFusionModel(fusion_image_model, fusion_text_model)
109
-
110
- # Load state_dict
111
- fusion_model_path = "models/best_mlp_fusion_model_state_dict.pt"
112
- if os.path.exists(fusion_model_path):
113
- try:
114
- state_dict = torch.load(fusion_model_path, map_location=device, weights_only=False)
115
- # Handle potential DataParallel prefix
116
- if any(key.startswith('module.') for key in state_dict.keys()):
117
- state_dict = {key.replace('module.', ''): value for key, value in state_dict.items()}
118
- fusion_model.load_state_dict(state_dict, strict=False)
119
- fusion_model.to(device)
120
- fusion_model.eval()
121
- print("Fusion model loaded successfully!")
122
- except Exception as e:
123
- print(f"Warning: Error loading fusion model state_dict: {e}")
124
- print("Using fusion model with default weights...")
125
- fusion_model.to(device)
126
- fusion_model.eval()
127
  else:
128
- print(f"Warning: Fusion model not found at {fusion_model_path}")
129
- print("Creating fusion model with default weights...")
130
- fusion_model.to(device)
131
- fusion_model.eval()
 
 
 
132
 
133
  # Load Image-Only Model
134
  # Load image model from state_dict
@@ -349,8 +307,7 @@ def predict_single_url(url):
349
  print(f"Processing URL: {url}")
350
  screenshot_path = take_screenshot(url)
351
  if not screenshot_path:
352
- error_msg = f"❌ Gagal mengambil screenshot untuk {url}\n\nKemungkinan penyebab:\n• Terlalu banyak redirect\n• Website memblokir akses otomatis\n• Masalah koneksi jaringan\n• URL tidak valid"
353
- return {"Gambling": 0.0, "Non-Gambling": 1.0}, f"Error: Screenshot capture failed", None, "", ""
354
 
355
  text = extract_text_from_image(screenshot_path)
356
  raw_text = text # Store raw text before cleaning
@@ -371,11 +328,7 @@ def predict_single_url(url):
371
  confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
372
  print(f"[Image-Only] URL: {url}")
373
  print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
374
-
375
- # Format label output as dictionary for better display
376
- label_dict = {"Gambling": confidence if is_gambling else 0.0,
377
- "Non-Gambling": 1 - confidence if is_gambling else confidence}
378
- return label_dict, f"{confidence:.1%} (Image-Only Model)", screenshot_path, raw_text, ""
379
 
380
  else:
381
  clean_text_data = clean_text(text)
@@ -399,10 +352,7 @@ def predict_single_url(url):
399
  print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
400
  print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
401
 
402
- # Format label output as dictionary for better display
403
- label_dict = {"Gambling": confidence if is_gambling else 0.0,
404
- "Non-Gambling": 1 - confidence if is_gambling else confidence}
405
- return label_dict, f"{confidence:.1%} (Fusion Model)", screenshot_path, raw_text, clean_text_data
406
 
407
  def predict_batch_urls(file_obj):
408
  results = []
@@ -425,253 +375,26 @@ def predict_batch_urls(file_obj):
425
 
426
  # --- Gradio App ---
427
 
428
- # Custom CSS for professional styling
429
- custom_css = """
430
- /* Main container styling */
431
- .gradio-container {
432
- font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
433
- max-width: 1200px !important;
434
- }
435
-
436
- /* Header styling */
437
- .main-header {
438
- text-align: center;
439
- padding: 2rem 0;
440
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
441
- color: white;
442
- border-radius: 12px;
443
- margin-bottom: 2rem;
444
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
445
- }
446
-
447
- .main-header h1 {
448
- margin: 0;
449
- font-size: 2.5rem;
450
- font-weight: 700;
451
- letter-spacing: -0.5px;
452
- }
453
-
454
- .main-header p {
455
- margin: 0.5rem 0 0 0;
456
- font-size: 1.1rem;
457
- opacity: 0.95;
458
- }
459
-
460
- /* Tab styling */
461
- .tab-nav {
462
- background: #f8f9fa;
463
- border-radius: 8px;
464
- padding: 0.5rem;
465
- margin-bottom: 1.5rem;
466
- }
467
-
468
- /* Input section styling */
469
- .input-section {
470
- background: #ffffff;
471
- padding: 1.5rem;
472
- border-radius: 12px;
473
- box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
474
- margin-bottom: 1.5rem;
475
- }
476
-
477
- /* Button styling */
478
- .primary-button {
479
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
480
- color: white !important;
481
- border: none !important;
482
- padding: 0.75rem 2rem !important;
483
- font-size: 1rem !important;
484
- font-weight: 600 !important;
485
- border-radius: 8px !important;
486
- transition: all 0.3s ease !important;
487
- box-shadow: 0 4px 6px rgba(102, 126, 234, 0.3) !important;
488
- }
489
-
490
- .primary-button:hover {
491
- transform: translateY(-2px);
492
- box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
493
- }
494
-
495
- /* Output section styling */
496
- .output-section {
497
- background: #f8f9fa;
498
- padding: 1.5rem;
499
- border-radius: 12px;
500
- margin-top: 1.5rem;
501
- }
502
-
503
- /* Label output styling */
504
- .label-container {
505
- background: white;
506
- padding: 1.5rem;
507
- border-radius: 10px;
508
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
509
- text-align: center;
510
- }
511
-
512
- .label-gambling {
513
- color: #dc3545;
514
- font-size: 1.5rem;
515
- font-weight: 700;
516
- }
517
-
518
- .label-non-gambling {
519
- color: #28a745;
520
- font-size: 1.5rem;
521
- font-weight: 700;
522
- }
523
-
524
- /* Confidence badge */
525
- .confidence-badge {
526
- display: inline-block;
527
- padding: 0.5rem 1rem;
528
- border-radius: 20px;
529
- font-weight: 600;
530
- background: #e9ecef;
531
- color: #495057;
532
- }
533
-
534
- /* Image container */
535
- .image-container {
536
- border-radius: 10px;
537
- overflow: hidden;
538
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
539
- }
540
-
541
- /* Text output styling */
542
- .text-output {
543
- background: white;
544
- padding: 1rem;
545
- border-radius: 8px;
546
- border: 1px solid #e9ecef;
547
- font-family: 'Monaco', 'Courier New', monospace;
548
- font-size: 0.9rem;
549
- }
550
-
551
- /* Info box */
552
- .info-box {
553
- background: #e7f3ff;
554
- border-left: 4px solid #2196F3;
555
- padding: 1rem;
556
- border-radius: 4px;
557
- margin: 1rem 0;
558
- }
559
-
560
- /* Section titles */
561
- .section-title {
562
- font-size: 1.25rem;
563
- font-weight: 600;
564
- color: #495057;
565
- margin-bottom: 1rem;
566
- display: flex;
567
- align-items: center;
568
- gap: 0.5rem;
569
- }
570
-
571
- /* Card styling */
572
- .card {
573
- background: white;
574
- border-radius: 10px;
575
- padding: 1.5rem;
576
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
577
- margin-bottom: 1rem;
578
- }
579
-
580
- /* Loading animation */
581
- @keyframes pulse {
582
- 0%, 100% {
583
- opacity: 1;
584
- }
585
- 50% {
586
- opacity: 0.5;
587
- }
588
- }
589
-
590
- .loading {
591
- animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
592
- }
593
- """
594
-
595
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
596
- # Header Section
597
- gr.HTML("""
598
- <div class="main-header">
599
- <h1>🕵️ Gambling Website Detection</h1>
600
- <p>AI-Powered Detection System untuk Identifikasi Website Perjudian</p>
601
- </div>
602
- """)
603
-
604
- gr.Markdown("""
605
- <div style="text-align: center; color: #6c757d; margin-bottom: 2rem;">
606
- Sistem deteksi cerdas yang menggunakan <strong>Fusion Model</strong> (Image + Text) untuk mengidentifikasi
607
- website perjudian dengan akurasi tinggi. Upload URL atau batch file untuk analisis.
608
- </div>
609
- """)
610
-
611
- with gr.Tab("🔍 Single URL Detection"):
612
- with gr.Row():
613
- with gr.Column(scale=1):
614
- gr.Markdown("### 📝 Input URL")
615
- url_input = gr.Textbox(
616
- label="Masukkan URL Website",
617
- placeholder="Contoh: https://example.com",
618
- info="Masukkan URL lengkap website yang ingin dianalisis",
619
- scale=1
620
- )
621
- predict_button = gr.Button(
622
- "🚀 Analisis Website",
623
- variant="primary",
624
- scale=1,
625
- elem_classes="primary-button"
626
- )
627
-
628
- gr.Markdown("---")
629
 
630
  with gr.Row():
631
- with gr.Column(scale=1):
632
- gr.Markdown("### 📊 Hasil Prediksi")
633
- label_output = gr.Label(
634
- label="Status Deteksi",
635
- elem_classes="label-container"
636
- )
637
- confidence_output = gr.Textbox(
638
- label="Tingkat Keyakinan",
639
- interactive=False,
640
- elem_classes="confidence-badge"
641
- )
642
 
643
- with gr.Column(scale=1):
644
- gr.Markdown("### 📸 Screenshot Website")
645
- screenshot_output = gr.Image(
646
- label="Screenshot",
647
- type="filepath",
648
- elem_classes="image-container"
649
- )
650
 
651
  with gr.Row():
652
- with gr.Column(scale=1):
653
- gr.Markdown("### 📄 Raw OCR Text")
654
- raw_text_output = gr.Textbox(
655
- label="Teks yang Diekstrak dari Gambar",
656
- lines=6,
657
- interactive=False,
658
- elem_classes="text-output"
659
- )
660
- with gr.Column(scale=1):
661
- gr.Markdown("### ✨ Cleaned Text")
662
- cleaned_text_output = gr.Textbox(
663
- label="Teks yang Sudah Dibersihkan",
664
- lines=6,
665
- interactive=False,
666
- elem_classes="text-output"
667
- )
668
-
669
- gr.Markdown("""
670
- <div class="info-box">
671
- <strong>ℹ️ Informasi:</strong> Sistem akan mengambil screenshot website, mengekstrak teks menggunakan OCR,
672
- dan menganalisis menggunakan model AI untuk menentukan apakah website tersebut terkait perjudian.
673
- </div>
674
- """)
675
 
676
  predict_button.click(
677
  fn=predict_single_url,
@@ -685,40 +408,11 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:
685
  ]
686
  )
687
 
688
- with gr.Tab("📦 Batch URL Detection"):
689
- gr.Markdown("### 📤 Upload File Batch")
690
- gr.Markdown("""
691
- <div class="info-box">
692
- <strong>📋 Format File:</strong> Upload file .txt yang berisi daftar URL, satu URL per baris.
693
- Sistem akan memproses semua URL secara berurutan dan menampilkan hasil dalam tabel.
694
- </div>
695
- """)
696
-
697
- with gr.Row():
698
- with gr.Column(scale=2):
699
- file_input = gr.File(
700
- label="Pilih File .txt",
701
- file_types=[".txt"],
702
- type="filepath"
703
- )
704
- with gr.Column(scale=1):
705
- batch_predict_button = gr.Button(
706
- "🚀 Proses Batch",
707
- variant="primary",
708
- elem_classes="primary-button"
709
- )
710
-
711
- gr.Markdown("### 📊 Hasil Batch Processing")
712
- batch_output = gr.DataFrame(
713
- label="Hasil Analisis",
714
- wrap=True,
715
- interactive=False
716
- )
717
 
718
- batch_predict_button.click(
719
- fn=predict_batch_urls,
720
- inputs=file_input,
721
- outputs=batch_output
722
- )
723
 
724
  app.launch()
 
7
  from PIL import Image
8
  import requests
9
  import easyocr
10
+ from transformers import AutoTokenizer
11
  from torchvision import transforms
12
  from torchvision import models
13
  from torchvision.transforms import functional as F
 
75
 
76
  return fused_logits, image_logits, text_logits, weights
77
 
78
+ # Load model
79
+ model_path = "models/best_fusion_model.pt"
80
+ if os.path.exists(model_path):
81
+ fusion_model = torch.load(model_path, map_location=device, weights_only=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  else:
83
+ model_path = hf_hub_download(repo_id="azzandr/gambling-fusion-model", filename="best_fusion_model.pt")
84
+ fusion_model = torch.load(model_path, map_location=device, weights_only=False)
85
+
86
+ # fusion_model = unwrap_dataparallel(fusion_model)
87
+ fusion_model.to(device)
88
+ fusion_model.eval()
89
+ print("Fusion model loaded successfully!")
90
 
91
  # Load Image-Only Model
92
  # Load image model from state_dict
 
307
  print(f"Processing URL: {url}")
308
  screenshot_path = take_screenshot(url)
309
  if not screenshot_path:
310
+ return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL", "Screenshot capture failed", None, "", ""
 
311
 
312
  text = extract_text_from_image(screenshot_path)
313
  raw_text = text # Store raw text before cleaning
 
328
  confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
329
  print(f"[Image-Only] URL: {url}")
330
  print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
331
+ return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, ""
 
 
 
 
332
 
333
  else:
334
  clean_text_data = clean_text(text)
 
352
  print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
353
  print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
354
 
355
+ return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data
 
 
 
356
 
357
  def predict_batch_urls(file_obj):
358
  results = []
 
375
 
376
  # --- Gradio App ---
377
 
378
+ with gr.Blocks() as app:
379
+ gr.Markdown("# 🕵️ Gambling Website Detection (URL Based)")
380
+
381
+ with gr.Tab("Single URL"):
382
+ url_input = gr.Textbox(label="Enter Website URL")
383
+ predict_button = gr.Button("Predict")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
  with gr.Row():
386
+ with gr.Column():
387
+ label_output = gr.Label()
388
+ confidence_output = gr.Textbox(label="Confidence", interactive=False)
 
 
 
 
 
 
 
 
389
 
390
+ with gr.Column():
391
+ screenshot_output = gr.Image(label="Screenshot", type="filepath")
 
 
 
 
 
392
 
393
  with gr.Row():
394
+ with gr.Column():
395
+ raw_text_output = gr.Textbox(label="Raw OCR Text", lines=5)
396
+ with gr.Column():
397
+ cleaned_text_output = gr.Textbox(label="Cleaned Text", lines=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
  predict_button.click(
400
  fn=predict_single_url,
 
408
  ]
409
  )
410
 
411
+ with gr.Tab("Batch URLs"):
412
+ file_input = gr.File(label="Upload .txt file with URLs (one per line)")
413
+ batch_predict_button = gr.Button("Batch Predict")
414
+ batch_output = gr.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
+ batch_predict_button.click(fn=predict_batch_urls, inputs=file_input, outputs=batch_output)
 
 
 
 
417
 
418
  app.launch()