rdsarjito commited on
Commit
c48b7e8
Β·
1 Parent(s): 83d56a4

[UPDATE]UI

Browse files
Files changed (1) hide show
  1. app.py +236 -46
app.py CHANGED
@@ -346,7 +346,9 @@ def predict_single_url(url):
346
  print(f"Processing URL: {url}")
347
  screenshot_path = take_screenshot(url)
348
  if not screenshot_path:
349
- return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\nβ€’ Too many redirects\nβ€’ Website blocking automated access\nβ€’ Network connectivity issues\nβ€’ Invalid URL", "Screenshot capture failed", None, "", ""
 
 
350
 
351
  text = extract_text_from_image(screenshot_path)
352
  raw_text = text # Store raw text before cleaning
@@ -363,11 +365,22 @@ def predict_single_url(url):
363
  threshold = 0.6
364
  is_gambling = image_probs[0] > threshold
365
 
366
- label = "Gambling" if is_gambling else "Non-Gambling"
367
- confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
 
 
 
 
 
 
 
 
 
 
 
368
  print(f"[Image-Only] URL: {url}")
369
- print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
370
- return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, ""
371
 
372
  else:
373
  clean_text_data = clean_text(text)
@@ -382,16 +395,36 @@ def predict_single_url(url):
382
  threshold = 0.6
383
  is_gambling = fused_probs[0] > threshold
384
 
385
- label = "Gambling" if is_gambling else "Non-Gambling"
386
- confidence = fused_probs[0].item() if is_gambling else 1 - fused_probs[0].item()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
  # ✨ Log detail
389
  print(f"[Fusion Model] URL: {url}")
390
  print(f"Image Model Prediction Probability: {image_probs[0]:.2f}")
391
  print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
392
- print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
393
 
394
- return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data
395
 
396
  def predict_batch_urls(file_obj):
397
  results = []
@@ -414,44 +447,201 @@ def predict_batch_urls(file_obj):
414
 
415
  # --- Gradio App ---
416
 
417
- with gr.Blocks() as app:
418
- gr.Markdown("# πŸ•΅οΈ Gambling Website Detection (URL Based)")
419
-
420
- with gr.Tab("Single URL"):
421
- url_input = gr.Textbox(label="Enter Website URL")
422
- predict_button = gr.Button("Predict")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
- with gr.Row():
425
- with gr.Column():
426
- label_output = gr.Label()
427
- confidence_output = gr.Textbox(label="Confidence", interactive=False)
428
-
429
- with gr.Column():
430
- screenshot_output = gr.Image(label="Screenshot", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
- with gr.Row():
433
- with gr.Column():
434
- raw_text_output = gr.Textbox(label="Raw OCR Text", lines=5)
435
- with gr.Column():
436
- cleaned_text_output = gr.Textbox(label="Cleaned Text", lines=5)
437
-
438
- predict_button.click(
439
- fn=predict_single_url,
440
- inputs=url_input,
441
- outputs=[
442
- label_output,
443
- confidence_output,
444
- screenshot_output,
445
- raw_text_output,
446
- cleaned_text_output
447
- ]
448
- )
449
-
450
- with gr.Tab("Batch URLs"):
451
- file_input = gr.File(label="Upload .txt file with URLs (one per line)")
452
- batch_predict_button = gr.Button("Batch Predict")
453
- batch_output = gr.DataFrame()
454
-
455
- batch_predict_button.click(fn=predict_batch_urls, inputs=file_input, outputs=batch_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
 
457
  app.launch()
 
346
  print(f"Processing URL: {url}")
347
  screenshot_path = take_screenshot(url)
348
  if not screenshot_path:
349
+ error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
350
+ error_msg = f"**❌ Error:** Unable to capture screenshot for `{url}`\n\n**Possible reasons:**\nβ€’ Too many redirects\nβ€’ Website blocking automated access\nβ€’ Network connectivity issues\nβ€’ Invalid URL"
351
+ return error_label, error_msg, None, "", "", "**Model:** Screenshot capture failed"
352
 
353
  text = extract_text_from_image(screenshot_path)
354
  raw_text = text # Store raw text before cleaning
 
365
  threshold = 0.6
366
  is_gambling = image_probs[0] > threshold
367
 
368
+ gambling_prob = image_probs[0].item()
369
+ non_gambling_prob = 1 - gambling_prob
370
+
371
+ label_dict = {
372
+ "Gambling": gambling_prob,
373
+ "Non-Gambling": non_gambling_prob
374
+ }
375
+
376
+ confidence = gambling_prob if is_gambling else non_gambling_prob
377
+ confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Image-Only Model (EfficientNet-B3)\n\n**Prediction:** {'πŸŸ₯ Gambling' if is_gambling else '🟩 Non-Gambling'}"
378
+
379
+ model_info = f"**Model Type:** Image-Only\n**Architecture:** EfficientNet-B3\n**Gambling Probability:** {gambling_prob:.1%}\n**Non-Gambling Probability:** {non_gambling_prob:.1%}"
380
+
381
  print(f"[Image-Only] URL: {url}")
382
+ print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
383
+ return label_dict, confidence_md, screenshot_path, raw_text, "", model_info
384
 
385
  else:
386
  clean_text_data = clean_text(text)
 
395
  threshold = 0.6
396
  is_gambling = fused_probs[0] > threshold
397
 
398
+ gambling_prob = fused_probs[0].item()
399
+ non_gambling_prob = 1 - gambling_prob
400
+
401
+ label_dict = {
402
+ "Gambling": gambling_prob,
403
+ "Non-Gambling": non_gambling_prob
404
+ }
405
+
406
+ confidence = gambling_prob if is_gambling else non_gambling_prob
407
+ image_weight = weights[0].item()
408
+ text_weight = weights[1].item()
409
+
410
+ confidence_md = f"**Confidence:** {confidence:.1%}\n\n**Model Used:** Fusion Model (Image + Text)\n\n**Prediction:** {'πŸŸ₯ Gambling' if is_gambling else '🟩 Non-Gambling'}"
411
+
412
+ model_info = f"""**Model Type:** Fusion Model
413
+ **Image Model:** EfficientNet-B3 (Weight: {image_weight:.1%})
414
+ **Text Model:** IndoBERT (Weight: {text_weight:.1%})
415
+
416
+ **Individual Predictions:**
417
+ - πŸ–ΌοΈ Image Model: {image_probs[0].item():.1%}
418
+ - πŸ“ Text Model: {text_probs[0].item():.1%}
419
+ - πŸ”— Fusion Result: {gambling_prob:.1%}"""
420
 
421
  # ✨ Log detail
422
  print(f"[Fusion Model] URL: {url}")
423
  print(f"Image Model Prediction Probability: {image_probs[0]:.2f}")
424
  print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
425
+ print(f"Fusion Final Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
426
 
427
+ return label_dict, confidence_md, screenshot_path, raw_text, clean_text_data, model_info
428
 
429
  def predict_batch_urls(file_obj):
430
  results = []
 
447
 
448
  # --- Gradio App ---
449
 
450
+ # Custom CSS for professional styling
451
+ custom_css = """
452
+ .main-header {
453
+ text-align: center;
454
+ padding: 2rem 0;
455
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
456
+ color: white;
457
+ border-radius: 10px;
458
+ margin-bottom: 2rem;
459
+ }
460
+ .main-header h1 {
461
+ margin: 0;
462
+ font-size: 2.5rem;
463
+ font-weight: 700;
464
+ }
465
+ .main-header p {
466
+ margin: 0.5rem 0 0 0;
467
+ font-size: 1.1rem;
468
+ opacity: 0.9;
469
+ }
470
+ .result-card {
471
+ background: #f8f9fa;
472
+ padding: 1.5rem;
473
+ border-radius: 10px;
474
+ border: 2px solid #e9ecef;
475
+ margin: 1rem 0;
476
+ }
477
+ .info-box {
478
+ background: #e7f3ff;
479
+ padding: 1rem;
480
+ border-radius: 8px;
481
+ border-left: 4px solid #2196F3;
482
+ margin: 1rem 0;
483
+ }
484
+ .success-box {
485
+ background: #d4edda;
486
+ border-left-color: #28a745;
487
+ }
488
+ .warning-box {
489
+ background: #fff3cd;
490
+ border-left-color: #ffc107;
491
+ }
492
+ .gradio-container {
493
+ max-width: 1200px;
494
+ margin: 0 auto;
495
+ }
496
+ """
497
+
498
+ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Gambling Website Detector") as app:
499
+ # Header Section
500
+ with gr.Row():
501
+ gr.HTML("""
502
+ <div class="main-header">
503
+ <h1>πŸ•΅οΈ Gambling Website Detection System</h1>
504
+ <p>AI-Powered URL Analysis using Deep Learning Fusion Model</p>
505
+ </div>
506
+ """)
507
+
508
+ # Info Section
509
+ with gr.Row():
510
+ gr.Markdown("""
511
+ ### πŸ“‹ About This Tool
512
 
513
+ This advanced detection system uses a **fusion model** combining:
514
+ - πŸ–ΌοΈ **Image Analysis**: EfficientNet-B3 for visual content detection
515
+ - πŸ“ **Text Analysis**: IndoBERT for Indonesian text understanding
516
+ - πŸ”— **Fusion Learning**: Intelligent combination of both modalities
517
+
518
+ Simply enter a website URL to analyze whether it contains gambling-related content.
519
+ """)
520
+
521
+ with gr.Tabs():
522
+ with gr.Tab("πŸ” Single URL Analysis", id="single"):
523
+ with gr.Row():
524
+ with gr.Column(scale=2):
525
+ gr.Markdown("### Enter Website URL")
526
+ url_input = gr.Textbox(
527
+ label="Website URL",
528
+ placeholder="https://example.com",
529
+ info="Enter the full URL of the website you want to analyze",
530
+ lines=1
531
+ )
532
+ predict_button = gr.Button(
533
+ "πŸ”Ž Analyze Website",
534
+ variant="primary",
535
+ size="lg"
536
+ )
537
+
538
+ gr.Markdown("---")
539
+
540
+ # Results Section
541
+ with gr.Row():
542
+ with gr.Column(scale=1):
543
+ gr.Markdown("### πŸ“Š Detection Results")
544
+ label_output = gr.Label(
545
+ label="Prediction Result",
546
+ value={"Gambling": 0.0, "Non-Gambling": 0.0},
547
+ num_top_classes=2
548
+ )
549
+ confidence_output = gr.Markdown(
550
+ value="**Confidence:** Waiting for analysis...",
551
+ label="Confidence Score"
552
+ )
553
+ model_info_output = gr.Markdown(
554
+ value="",
555
+ label="Model Information"
556
+ )
557
 
558
+ with gr.Column(scale=1):
559
+ gr.Markdown("### πŸ“Έ Website Screenshot")
560
+ screenshot_output = gr.Image(
561
+ label="Captured Screenshot",
562
+ type="filepath",
563
+ height=400
564
+ )
565
+
566
+ gr.Markdown("---")
567
+
568
+ # Text Analysis Section
569
+ with gr.Accordion("πŸ“ Text Analysis Details", open=False):
570
+ with gr.Row():
571
+ with gr.Column():
572
+ gr.Markdown("#### Raw OCR Text")
573
+ raw_text_output = gr.Textbox(
574
+ label="Extracted Text (Raw)",
575
+ lines=8,
576
+ interactive=False,
577
+ placeholder="Raw text extracted from the screenshot will appear here..."
578
+ )
579
+ with gr.Column():
580
+ gr.Markdown("#### Processed Text")
581
+ cleaned_text_output = gr.Textbox(
582
+ label="Cleaned Text (Processed)",
583
+ lines=8,
584
+ interactive=False,
585
+ placeholder="Processed and cleaned text will appear here..."
586
+ )
587
+
588
+ predict_button.click(
589
+ fn=predict_single_url,
590
+ inputs=url_input,
591
+ outputs=[
592
+ label_output,
593
+ confidence_output,
594
+ screenshot_output,
595
+ raw_text_output,
596
+ cleaned_text_output,
597
+ model_info_output
598
+ ]
599
+ )
600
+
601
+ with gr.Tab("πŸ“¦ Batch URL Analysis", id="batch"):
602
+ gr.Markdown("""
603
+ ### Batch Processing
604
+
605
+ Upload a text file containing multiple URLs (one per line) to analyze them all at once.
606
+ The results will be displayed in a table format.
607
+ """)
608
+
609
+ with gr.Row():
610
+ with gr.Column():
611
+ file_input = gr.File(
612
+ label="Upload URL File (.txt)",
613
+ file_types=[".txt"],
614
+ info="Upload a .txt file with one URL per line"
615
+ )
616
+ batch_predict_button = gr.Button(
617
+ "πŸš€ Process Batch",
618
+ variant="primary",
619
+ size="lg"
620
+ )
621
+
622
+ gr.Markdown("---")
623
+
624
+ with gr.Row():
625
+ gr.Markdown("### πŸ“‹ Batch Results")
626
+ batch_output = gr.DataFrame(
627
+ label="Analysis Results",
628
+ wrap=True,
629
+ interactive=False
630
+ )
631
+
632
+ batch_predict_button.click(
633
+ fn=predict_batch_urls,
634
+ inputs=file_input,
635
+ outputs=batch_output
636
+ )
637
+
638
+ # Footer
639
+ gr.Markdown("---")
640
+ gr.Markdown("""
641
+ <div style="text-align: center; color: #666; padding: 1rem;">
642
+ <p>Powered by PyTorch β€’ Gradio β€’ EfficientNet β€’ IndoBERT</p>
643
+ <p style="font-size: 0.9rem;">⚠️ This tool is for educational and research purposes only</p>
644
+ </div>
645
+ """)
646
 
647
  app.launch()