Spaces:
Sleeping
Sleeping
rdsarjito
commited on
Commit
·
c4aea7e
1
Parent(s):
6afe076
[FIX]UI
Browse files
app.py
CHANGED
|
@@ -362,8 +362,8 @@ def predict_single_url(url):
|
|
| 362 |
screenshot_path = take_screenshot(url)
|
| 363 |
if not screenshot_path:
|
| 364 |
error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
|
| 365 |
-
error_msg = f"
|
| 366 |
-
return error_label, error_msg, None, "", "", "**
|
| 367 |
|
| 368 |
text = extract_text_from_image(screenshot_path)
|
| 369 |
raw_text = text # Store raw text before cleaning
|
|
@@ -389,9 +389,10 @@ def predict_single_url(url):
|
|
| 389 |
}
|
| 390 |
|
| 391 |
confidence = gambling_prob if is_gambling else non_gambling_prob
|
| 392 |
-
|
|
|
|
| 393 |
|
| 394 |
-
model_info = f"**Model
|
| 395 |
|
| 396 |
print(f"[Image-Only] URL: {url}")
|
| 397 |
print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
|
|
@@ -431,16 +432,17 @@ def predict_single_url(url):
|
|
| 431 |
image_weight = 0.5
|
| 432 |
text_weight = 0.5
|
| 433 |
|
| 434 |
-
|
|
|
|
| 435 |
|
| 436 |
-
model_info = f"""**Model
|
| 437 |
**Image Model:** EfficientNet-B3
|
| 438 |
**Text Model:** IndoBERT
|
| 439 |
|
| 440 |
-
**Individual
|
| 441 |
-
-
|
| 442 |
-
-
|
| 443 |
-
-
|
| 444 |
|
| 445 |
# ✨ Log detail
|
| 446 |
print(f"[Fusion Model] URL: {url}")
|
|
@@ -471,143 +473,190 @@ def predict_batch_urls(file_obj):
|
|
| 471 |
|
| 472 |
# --- Gradio App ---
|
| 473 |
|
| 474 |
-
# Custom CSS
|
| 475 |
custom_css = """
|
| 476 |
-
.
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
border-radius: 10px;
|
| 482 |
-
margin-bottom: 2rem;
|
| 483 |
}
|
| 484 |
-
.
|
|
|
|
|
|
|
|
|
|
| 485 |
margin: 0;
|
| 486 |
-
|
| 487 |
-
font-weight: 700;
|
| 488 |
}
|
| 489 |
-
.
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
}
|
| 494 |
-
.
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
border: 2px solid #e9ecef;
|
| 499 |
-
margin: 1rem 0;
|
| 500 |
}
|
| 501 |
-
.
|
| 502 |
-
background: #
|
| 503 |
-
|
| 504 |
border-radius: 8px;
|
| 505 |
-
|
| 506 |
-
margin:
|
| 507 |
}
|
| 508 |
-
.
|
| 509 |
-
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
}
|
| 512 |
-
.
|
| 513 |
-
|
| 514 |
-
|
|
|
|
|
|
|
| 515 |
}
|
| 516 |
-
.
|
| 517 |
-
|
| 518 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
}
|
| 520 |
"""
|
| 521 |
|
| 522 |
-
with gr.Blocks(theme=gr.themes.
|
| 523 |
-
# Header
|
| 524 |
with gr.Row():
|
| 525 |
gr.HTML("""
|
| 526 |
-
<div class="
|
| 527 |
-
<
|
| 528 |
-
|
|
|
|
|
|
|
| 529 |
</div>
|
| 530 |
""")
|
| 531 |
|
| 532 |
-
#
|
| 533 |
with gr.Row():
|
| 534 |
-
gr.
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
|
| 545 |
with gr.Tabs():
|
| 546 |
-
with gr.Tab("
|
| 547 |
with gr.Row():
|
| 548 |
-
with gr.Column(
|
| 549 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
url_input = gr.Textbox(
|
| 551 |
-
label="Website
|
| 552 |
placeholder="https://example.com",
|
| 553 |
-
|
| 554 |
-
|
| 555 |
)
|
| 556 |
predict_button = gr.Button(
|
| 557 |
-
"
|
| 558 |
variant="primary",
|
| 559 |
size="lg"
|
| 560 |
)
|
| 561 |
|
| 562 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
|
| 564 |
-
# Results Section
|
| 565 |
with gr.Row():
|
| 566 |
with gr.Column(scale=1):
|
| 567 |
-
gr.Markdown("### 📊 Detection Results")
|
| 568 |
label_output = gr.Label(
|
| 569 |
-
label="
|
| 570 |
value={"Gambling": 0.0, "Non-Gambling": 0.0},
|
| 571 |
-
num_top_classes=2
|
|
|
|
| 572 |
)
|
| 573 |
confidence_output = gr.Markdown(
|
| 574 |
-
value="
|
| 575 |
-
label="
|
|
|
|
| 576 |
)
|
| 577 |
model_info_output = gr.Markdown(
|
| 578 |
value="",
|
| 579 |
-
label="Model
|
|
|
|
| 580 |
)
|
| 581 |
|
| 582 |
with gr.Column(scale=1):
|
| 583 |
-
gr.Markdown("### 📸 Website Screenshot")
|
| 584 |
screenshot_output = gr.Image(
|
| 585 |
-
label="
|
| 586 |
type="filepath",
|
| 587 |
-
height=400
|
|
|
|
| 588 |
)
|
| 589 |
|
| 590 |
-
gr.
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
)
|
| 611 |
|
| 612 |
predict_button.click(
|
| 613 |
fn=predict_single_url,
|
|
@@ -622,36 +671,40 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Gambling Website D
|
|
| 622 |
]
|
| 623 |
)
|
| 624 |
|
| 625 |
-
with gr.Tab("
|
| 626 |
-
gr.
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
|
| 633 |
with gr.Row():
|
| 634 |
with gr.Column():
|
| 635 |
file_input = gr.File(
|
| 636 |
-
label="Upload URL
|
| 637 |
-
file_types=[".txt"]
|
|
|
|
| 638 |
)
|
| 639 |
-
gr.Markdown("
|
| 640 |
batch_predict_button = gr.Button(
|
| 641 |
-
"
|
| 642 |
variant="primary",
|
| 643 |
size="lg"
|
| 644 |
)
|
| 645 |
|
| 646 |
-
gr.Markdown("---")
|
| 647 |
-
|
| 648 |
with gr.Row():
|
| 649 |
-
gr.
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
|
|
|
| 655 |
|
| 656 |
batch_predict_button.click(
|
| 657 |
fn=predict_batch_urls,
|
|
@@ -660,12 +713,11 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Gambling Website D
|
|
| 660 |
)
|
| 661 |
|
| 662 |
# Footer
|
| 663 |
-
gr.
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
</div>
|
| 669 |
""")
|
| 670 |
|
| 671 |
app.launch()
|
|
|
|
| 362 |
screenshot_path = take_screenshot(url)
|
| 363 |
if not screenshot_path:
|
| 364 |
error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0}
|
| 365 |
+
error_msg = f"**Gagal mengambil screenshot**\n\nURL: `{url}`\n\n**Kemungkinan penyebab:**\n• Terlalu banyak redirect\n• Website memblokir akses otomatis\n• Masalah koneksi jaringan\n• URL tidak valid"
|
| 366 |
+
return error_label, error_msg, None, "", "", "**Status:** Gagal mengambil screenshot"
|
| 367 |
|
| 368 |
text = extract_text_from_image(screenshot_path)
|
| 369 |
raw_text = text # Store raw text before cleaning
|
|
|
|
| 389 |
}
|
| 390 |
|
| 391 |
confidence = gambling_prob if is_gambling else non_gambling_prob
|
| 392 |
+
result_text = "Gambling" if is_gambling else "Non-Gambling"
|
| 393 |
+
confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Image-Only (EfficientNet-B3)\n\n**Hasil:** {result_text}"
|
| 394 |
|
| 395 |
+
model_info = f"**Tipe Model:** Image-Only\n**Arsitektur:** EfficientNet-B3\n**Probabilitas Gambling:** {gambling_prob:.1%}\n**Probabilitas Non-Gambling:** {non_gambling_prob:.1%}"
|
| 396 |
|
| 397 |
print(f"[Image-Only] URL: {url}")
|
| 398 |
print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n")
|
|
|
|
| 432 |
image_weight = 0.5
|
| 433 |
text_weight = 0.5
|
| 434 |
|
| 435 |
+
result_text = "Gambling" if is_gambling else "Non-Gambling"
|
| 436 |
+
confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Fusion Model (Image + Text)\n\n**Hasil:** {result_text}"
|
| 437 |
|
| 438 |
+
model_info = f"""**Tipe Model:** Fusion Model (MLP)
|
| 439 |
**Image Model:** EfficientNet-B3
|
| 440 |
**Text Model:** IndoBERT
|
| 441 |
|
| 442 |
+
**Prediksi Individual:**
|
| 443 |
+
- Image Model: {image_probs[0].item():.1%}
|
| 444 |
+
- Text Model: {text_probs[0].item():.1%}
|
| 445 |
+
- Hasil Fusion: {gambling_prob:.1%}"""
|
| 446 |
|
| 447 |
# ✨ Log detail
|
| 448 |
print(f"[Fusion Model] URL: {url}")
|
|
|
|
| 473 |
|
| 474 |
# --- Gradio App ---
|
| 475 |
|
| 476 |
+
# Custom CSS - Tokopedia style
|
| 477 |
custom_css = """
|
| 478 |
+
.header-container {
|
| 479 |
+
background: #fff;
|
| 480 |
+
border-bottom: 1px solid #e5e5e5;
|
| 481 |
+
padding: 20px 0;
|
| 482 |
+
margin-bottom: 30px;
|
|
|
|
|
|
|
| 483 |
}
|
| 484 |
+
.header-title {
|
| 485 |
+
font-size: 24px;
|
| 486 |
+
font-weight: 600;
|
| 487 |
+
color: #333;
|
| 488 |
margin: 0;
|
| 489 |
+
padding: 0;
|
|
|
|
| 490 |
}
|
| 491 |
+
.header-subtitle {
|
| 492 |
+
font-size: 14px;
|
| 493 |
+
color: #666;
|
| 494 |
+
margin: 5px 0 0 0;
|
| 495 |
}
|
| 496 |
+
.content-container {
|
| 497 |
+
max-width: 1200px;
|
| 498 |
+
margin: 0 auto;
|
| 499 |
+
padding: 0 20px;
|
|
|
|
|
|
|
| 500 |
}
|
| 501 |
+
.card {
|
| 502 |
+
background: #fff;
|
| 503 |
+
border: 1px solid #e5e5e5;
|
| 504 |
border-radius: 8px;
|
| 505 |
+
padding: 24px;
|
| 506 |
+
margin-bottom: 20px;
|
| 507 |
}
|
| 508 |
+
.section-title {
|
| 509 |
+
font-size: 18px;
|
| 510 |
+
font-weight: 600;
|
| 511 |
+
color: #333;
|
| 512 |
+
margin: 0 0 20px 0;
|
| 513 |
+
padding-bottom: 12px;
|
| 514 |
+
border-bottom: 2px solid #42b549;
|
| 515 |
}
|
| 516 |
+
.info-text {
|
| 517 |
+
font-size: 14px;
|
| 518 |
+
color: #666;
|
| 519 |
+
line-height: 1.6;
|
| 520 |
+
margin: 0;
|
| 521 |
}
|
| 522 |
+
.button-primary {
|
| 523 |
+
background: #42b549;
|
| 524 |
+
color: #fff;
|
| 525 |
+
border: none;
|
| 526 |
+
padding: 12px 32px;
|
| 527 |
+
border-radius: 4px;
|
| 528 |
+
font-weight: 500;
|
| 529 |
+
cursor: pointer;
|
| 530 |
+
}
|
| 531 |
+
.button-primary:hover {
|
| 532 |
+
background: #3aa040;
|
| 533 |
+
}
|
| 534 |
+
.result-box {
|
| 535 |
+
background: #f8f9fa;
|
| 536 |
+
border: 1px solid #e5e5e5;
|
| 537 |
+
border-radius: 8px;
|
| 538 |
+
padding: 20px;
|
| 539 |
+
margin: 15px 0;
|
| 540 |
+
}
|
| 541 |
+
.footer-text {
|
| 542 |
+
text-align: center;
|
| 543 |
+
color: #999;
|
| 544 |
+
font-size: 12px;
|
| 545 |
+
padding: 20px 0;
|
| 546 |
+
border-top: 1px solid #e5e5e5;
|
| 547 |
+
margin-top: 40px;
|
| 548 |
}
|
| 549 |
"""
|
| 550 |
|
| 551 |
+
with gr.Blocks(theme=gr.themes.Default(), css=custom_css, title="Gambling Website Detector") as app:
|
| 552 |
+
# Header
|
| 553 |
with gr.Row():
|
| 554 |
gr.HTML("""
|
| 555 |
+
<div class="header-container">
|
| 556 |
+
<div class="content-container">
|
| 557 |
+
<h1 class="header-title">Gambling Website Detector</h1>
|
| 558 |
+
<p class="header-subtitle">Analisis website untuk mendeteksi konten perjudian menggunakan teknologi deep learning</p>
|
| 559 |
+
</div>
|
| 560 |
</div>
|
| 561 |
""")
|
| 562 |
|
| 563 |
+
# Main Content
|
| 564 |
with gr.Row():
|
| 565 |
+
with gr.Column():
|
| 566 |
+
gr.HTML("""
|
| 567 |
+
<div class="content-container">
|
| 568 |
+
<div class="card">
|
| 569 |
+
<p class="info-text">
|
| 570 |
+
Sistem ini menggunakan model fusion yang menggabungkan analisis gambar dan teks untuk mendeteksi konten perjudian pada website. Masukkan URL website yang ingin dianalisis.
|
| 571 |
+
</p>
|
| 572 |
+
</div>
|
| 573 |
+
</div>
|
| 574 |
+
""")
|
| 575 |
|
| 576 |
with gr.Tabs():
|
| 577 |
+
with gr.Tab("Analisis URL", id="single"):
|
| 578 |
with gr.Row():
|
| 579 |
+
with gr.Column():
|
| 580 |
+
gr.HTML("""
|
| 581 |
+
<div class="content-container">
|
| 582 |
+
<div class="card">
|
| 583 |
+
<h2 class="section-title">Masukkan URL Website</h2>
|
| 584 |
+
<p class="info-text" style="margin-bottom: 20px;">Masukkan URL lengkap website yang ingin dianalisis. Sistem akan mengambil screenshot dan menganalisis kontennya.</p>
|
| 585 |
+
</div>
|
| 586 |
+
</div>
|
| 587 |
+
""")
|
| 588 |
+
|
| 589 |
+
with gr.Row():
|
| 590 |
+
with gr.Column():
|
| 591 |
url_input = gr.Textbox(
|
| 592 |
+
label="URL Website",
|
| 593 |
placeholder="https://example.com",
|
| 594 |
+
lines=1,
|
| 595 |
+
container=False
|
| 596 |
)
|
| 597 |
predict_button = gr.Button(
|
| 598 |
+
"Analisis Website",
|
| 599 |
variant="primary",
|
| 600 |
size="lg"
|
| 601 |
)
|
| 602 |
|
| 603 |
+
with gr.Row():
|
| 604 |
+
with gr.Column():
|
| 605 |
+
gr.HTML("""
|
| 606 |
+
<div class="content-container">
|
| 607 |
+
<div class="card">
|
| 608 |
+
<h2 class="section-title">Hasil Analisis</h2>
|
| 609 |
+
</div>
|
| 610 |
+
</div>
|
| 611 |
+
""")
|
| 612 |
|
|
|
|
| 613 |
with gr.Row():
|
| 614 |
with gr.Column(scale=1):
|
|
|
|
| 615 |
label_output = gr.Label(
|
| 616 |
+
label="Hasil Prediksi",
|
| 617 |
value={"Gambling": 0.0, "Non-Gambling": 0.0},
|
| 618 |
+
num_top_classes=2,
|
| 619 |
+
container=False
|
| 620 |
)
|
| 621 |
confidence_output = gr.Markdown(
|
| 622 |
+
value="",
|
| 623 |
+
label="Tingkat Keyakinan",
|
| 624 |
+
container=False
|
| 625 |
)
|
| 626 |
model_info_output = gr.Markdown(
|
| 627 |
value="",
|
| 628 |
+
label="Informasi Model",
|
| 629 |
+
container=False
|
| 630 |
)
|
| 631 |
|
| 632 |
with gr.Column(scale=1):
|
|
|
|
| 633 |
screenshot_output = gr.Image(
|
| 634 |
+
label="Screenshot Website",
|
| 635 |
type="filepath",
|
| 636 |
+
height=400,
|
| 637 |
+
container=False
|
| 638 |
)
|
| 639 |
|
| 640 |
+
with gr.Row():
|
| 641 |
+
with gr.Column():
|
| 642 |
+
with gr.Accordion("Detail Analisis Teks", open=False):
|
| 643 |
+
with gr.Row():
|
| 644 |
+
with gr.Column():
|
| 645 |
+
raw_text_output = gr.Textbox(
|
| 646 |
+
label="Teks Mentah (Raw OCR)",
|
| 647 |
+
lines=6,
|
| 648 |
+
interactive=False,
|
| 649 |
+
placeholder="Teks yang diekstrak dari screenshot akan muncul di sini...",
|
| 650 |
+
container=False
|
| 651 |
+
)
|
| 652 |
+
with gr.Column():
|
| 653 |
+
cleaned_text_output = gr.Textbox(
|
| 654 |
+
label="Teks yang Diproses",
|
| 655 |
+
lines=6,
|
| 656 |
+
interactive=False,
|
| 657 |
+
placeholder="Teks yang sudah dibersihkan akan muncul di sini...",
|
| 658 |
+
container=False
|
| 659 |
+
)
|
|
|
|
| 660 |
|
| 661 |
predict_button.click(
|
| 662 |
fn=predict_single_url,
|
|
|
|
| 671 |
]
|
| 672 |
)
|
| 673 |
|
| 674 |
+
with gr.Tab("Analisis Batch", id="batch"):
|
| 675 |
+
with gr.Row():
|
| 676 |
+
with gr.Column():
|
| 677 |
+
gr.HTML("""
|
| 678 |
+
<div class="content-container">
|
| 679 |
+
<div class="card">
|
| 680 |
+
<h2 class="section-title">Analisis Multiple URL</h2>
|
| 681 |
+
<p class="info-text">Upload file teks (.txt) yang berisi beberapa URL (satu URL per baris) untuk dianalisis sekaligus. Hasil akan ditampilkan dalam format tabel.</p>
|
| 682 |
+
</div>
|
| 683 |
+
</div>
|
| 684 |
+
""")
|
| 685 |
|
| 686 |
with gr.Row():
|
| 687 |
with gr.Column():
|
| 688 |
file_input = gr.File(
|
| 689 |
+
label="Upload File URL (.txt)",
|
| 690 |
+
file_types=[".txt"],
|
| 691 |
+
container=False
|
| 692 |
)
|
| 693 |
+
gr.Markdown("**Format file:** Satu URL per baris", container=False)
|
| 694 |
batch_predict_button = gr.Button(
|
| 695 |
+
"Proses Batch",
|
| 696 |
variant="primary",
|
| 697 |
size="lg"
|
| 698 |
)
|
| 699 |
|
|
|
|
|
|
|
| 700 |
with gr.Row():
|
| 701 |
+
with gr.Column():
|
| 702 |
+
batch_output = gr.DataFrame(
|
| 703 |
+
label="Hasil Analisis",
|
| 704 |
+
wrap=True,
|
| 705 |
+
interactive=False,
|
| 706 |
+
container=False
|
| 707 |
+
)
|
| 708 |
|
| 709 |
batch_predict_button.click(
|
| 710 |
fn=predict_batch_urls,
|
|
|
|
| 713 |
)
|
| 714 |
|
| 715 |
# Footer
|
| 716 |
+
gr.HTML("""
|
| 717 |
+
<div class="footer-text">
|
| 718 |
+
<p>Powered by PyTorch • Gradio • EfficientNet • IndoBERT</p>
|
| 719 |
+
<p style="margin-top: 8px;">Tool ini untuk keperluan edukasi dan penelitian</p>
|
| 720 |
+
</div>
|
|
|
|
| 721 |
""")
|
| 722 |
|
| 723 |
app.launch()
|