Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import re | |
| import time | |
| import torch | |
| import torch.nn as nn | |
| from PIL import Image | |
| import requests | |
| import easyocr | |
| from transformers import AutoTokenizer, AutoModel | |
| from torchvision import transforms | |
| from torchvision import models | |
| from torchvision.transforms import functional as F | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # --- Setup --- | |
| # Device setup | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| print(f"Using device: {device}") | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1') | |
| # Image transformation | |
| class ResizePadToSquare: | |
| def __init__(self, target_size=300): | |
| self.target_size = target_size | |
| def __call__(self, img): | |
| img = img.convert("RGB") | |
| img.thumbnail((self.target_size, self.target_size), Image.BILINEAR) | |
| delta_w = self.target_size - img.size[0] | |
| delta_h = self.target_size - img.size[1] | |
| padding = (delta_w // 2, delta_h // 2, delta_w - delta_w // 2, delta_h - delta_h // 2) | |
| img = F.pad(img, padding, fill=0, padding_mode='constant') | |
| return img | |
| transform = transforms.Compose([ | |
| ResizePadToSquare(300), | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], | |
| std=[0.229, 0.224, 0.225]), | |
| ]) | |
| # Screenshot folder | |
| SCREENSHOT_DIR = "screenshots" | |
| os.makedirs(SCREENSHOT_DIR, exist_ok=True) | |
| # Create OCR reader | |
| reader = easyocr.Reader(['id']) # Indonesia language | |
| print("OCR reader initialized.") | |
| # --- Model --- | |
| class TextModelWithClassifier(nn.Module): | |
| def __init__(self, base_model): | |
| super(TextModelWithClassifier, self).__init__() | |
| self.bert = base_model # Use 'bert' to match saved state_dict keys | |
| self.classifier = nn.Linear(base_model.config.hidden_size, 1) | |
| def forward(self, input_ids, attention_mask): | |
| outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
| pooled_output = outputs.pooler_output if hasattr(outputs, 'pooler_output') else outputs.last_hidden_state[:, 0] | |
| logits = self.classifier(pooled_output) | |
| return type('Output', (), {'logits': logits})() | |
| class LateFusionModel(nn.Module): | |
| def __init__(self, image_model, text_model): | |
| super(LateFusionModel, self).__init__() | |
| self.image_model = image_model | |
| self.text_model = text_model | |
| # MLP fusion layer (matching saved model structure) | |
| # Structure: Linear(2, hidden) -> ReLU -> Dropout -> Linear(hidden, 1) | |
| hidden_dim = 16 # Matching saved model: [16, 2] -> [16] -> [1, 16] | |
| self.fusion_mlp = nn.Sequential( | |
| nn.Linear(2, hidden_dim), # layer 0: [16, 2] | |
| nn.ReLU(), # layer 1 (no params) | |
| nn.Dropout(0.1), # layer 2 (no params) | |
| nn.Linear(hidden_dim, 1) # layer 3: [1, 16] | |
| ) | |
| def forward(self, images, input_ids, attention_mask): | |
| with torch.no_grad(): | |
| image_logits = self.image_model(images).squeeze(1) | |
| text_logits = self.text_model(input_ids=input_ids, attention_mask=attention_mask).logits.squeeze(1) | |
| # Stack logits and pass through MLP | |
| stacked_logits = torch.stack([image_logits, text_logits], dim=1) | |
| fused_logits = self.fusion_mlp(stacked_logits).squeeze(1) | |
| # For compatibility, create dummy weights | |
| weights = torch.tensor([0.5, 0.5], device=fused_logits.device) | |
| return fused_logits, image_logits, text_logits, weights | |
| # Load Fusion Model | |
| # Create model architecture first | |
| image_model_for_fusion = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT) | |
| num_features = image_model_for_fusion.classifier[1].in_features | |
| # Match saved model structure: classifier.1 instead of classifier | |
| image_model_for_fusion.classifier = nn.Sequential( | |
| nn.Dropout(p=0.3, inplace=True), | |
| nn.Linear(num_features, 1) | |
| ) | |
| text_base_model = AutoModel.from_pretrained('indobenchmark/indobert-base-p1') | |
| text_model = TextModelWithClassifier(text_base_model) | |
| fusion_model = LateFusionModel(image_model_for_fusion, text_model) | |
| # Load state_dict | |
| model_path = "models/best_mlp_fusion_model_state_dict.pt" | |
| if os.path.exists(model_path): | |
| state_dict = torch.load(model_path, map_location=device) | |
| try: | |
| fusion_model.load_state_dict(state_dict, strict=True) | |
| print("Fusion model loaded from local state_dict successfully!") | |
| except RuntimeError as e: | |
| print(f"Warning: Some keys didn't match. Trying with strict=False...") | |
| print(f"Error details: {str(e)[:500]}") | |
| fusion_model.load_state_dict(state_dict, strict=False) | |
| print("Fusion model loaded with strict=False (some keys may be missing)") | |
| else: | |
| print("Fusion model not found locally. Downloading from Hugging Face Hub...") | |
| model_path = hf_hub_download(repo_id="azzandr/gambling-fusion-model", filename="best_mlp_fusion_model_state_dict.pt") | |
| state_dict = torch.load(model_path, map_location=device) | |
| try: | |
| fusion_model.load_state_dict(state_dict, strict=True) | |
| print("Fusion model downloaded and loaded successfully!") | |
| except RuntimeError as e: | |
| print(f"Warning: Some keys didn't match. Trying with strict=False...") | |
| print(f"Error details: {str(e)[:500]}") | |
| fusion_model.load_state_dict(state_dict, strict=False) | |
| print("Fusion model loaded with strict=False (some keys may be missing)") | |
| fusion_model.to(device) | |
| fusion_model.eval() | |
| print("Fusion model ready!") | |
| # Load Image-Only Model | |
| # Load image model from state_dict | |
| image_model_path = "models/best_image_model_Adam_lr0.0001_bs32_state_dict.pt" | |
| if os.path.exists(image_model_path): | |
| image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT) | |
| num_features = image_only_model.classifier[1].in_features | |
| image_only_model.classifier = nn.Linear(num_features, 1) | |
| image_only_model.load_state_dict(torch.load(image_model_path, map_location=device)) | |
| image_only_model.to(device) | |
| image_only_model.eval() | |
| print("Image-only model loaded from state_dict successfully!") | |
| else: | |
| print("Image-only model not found locally. Downloading from Hugging Face Hub...") | |
| image_model_path = hf_hub_download(repo_id="azzandr/gambling-image-model", filename="best_image_model_Adam_lr0.0001_bs32_state_dict.pt") | |
| image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT) | |
| num_features = image_only_model.classifier[1].in_features | |
| image_only_model.classifier = nn.Linear(num_features, 1) | |
| image_only_model.load_state_dict(torch.load(image_model_path, map_location=device)) | |
| image_only_model.to(device) | |
| image_only_model.eval() | |
| print("Image-only model downloaded and loaded successfully!") | |
| # --- Functions --- | |
| def clean_text(text): | |
| exceptions = { | |
| "di", "ke", "ya" | |
| } | |
| # ----- BASIC CLEANING ----- | |
| text = re.sub(r"http\S+", "", text) # Hapus URL | |
| text = re.sub(r"\n", " ", text) # Ganti newline dengan spasi | |
| text = re.sub(r"[^a-zA-Z']", " ", text) # Hanya sisakan huruf dan apostrof | |
| text = re.sub(r"\s{2,}", " ", text).strip().lower() # Hapus spasi ganda, ubah ke lowercase | |
| # ----- FILTERING ----- | |
| words = text.split() | |
| filtered_words = [ | |
| w for w in words | |
| if (len(w) > 2 or w in exceptions) # Simpan kata >2 huruf atau ada di exceptions | |
| ] | |
| text = ' '.join(filtered_words) | |
| # ----- REMOVE UNWANTED PATTERNS ----- | |
| text = re.sub(r'\b[aeiou]+\b', '', text) # Hapus kata semua vokal (panjang berapa pun) | |
| text = re.sub(r'\b[^aeiou\s]+\b', '', text) # Hapus kata semua konsonan (panjang berapa pun) | |
| text = re.sub(r'\b\w{20,}\b', '', text) # Hapus kata sangat panjang (≥20 huruf) | |
| text = re.sub(r'\s+', ' ', text).strip() # Bersihkan spasi ekstra | |
| # check words number | |
| if len(text.split()) < 5: | |
| print(f"Cleaned text too short ({len(text.split())} words). Ignoring text.") | |
| return "" # empty return to use image-only | |
| return text | |
| # Your API key | |
| SCREENSHOT_API_KEY = os.getenv("SCREENSHOT_API_KEY") # Ambil dari environment variable | |
| # Constants for screenshot configuration | |
| CLOUDFLARE_CHECK_KEYWORDS = ["Checking your browser", "Just a moment", "Cloudflare"] | |
| def ensure_http(url): | |
| if not url.startswith(('http://', 'https://')): | |
| return 'http://' + url | |
| return url | |
| def sanitize_filename(url): | |
| return re.sub(r'[^\w\-_\. ]', '_', url) | |
| def take_screenshot(url): | |
| url = ensure_http(url) | |
| filename = sanitize_filename(url) + '.png' | |
| filepath = os.path.join(SCREENSHOT_DIR, filename) | |
| try: | |
| if not SCREENSHOT_API_KEY: | |
| print("SCREENSHOT_API_KEY not found in environment.") | |
| return None | |
| api_url = "https://api.apiflash.com/v1/urltoimage" | |
| # Base parameters - only using supported parameters | |
| params = { | |
| "access_key": SCREENSHOT_API_KEY, | |
| "url": url, | |
| "format": "png", | |
| "wait_until": "network_idle", | |
| "delay": 2, | |
| "fail_on_status": "400,401,402,403,404,500,502,503,504", | |
| "fresh": "true", # Don't use cached version | |
| "response_type": "image", | |
| "wait_for": "body" # Wait for body to be present | |
| } | |
| print(f"Taking screenshot of: {url}") | |
| response = requests.get(api_url, params=params) | |
| if response.status_code == 200: | |
| # Check if response is actually an image | |
| if response.headers.get('content-type', '').startswith('image'): | |
| with open(filepath, 'wb') as f: | |
| f.write(response.content) | |
| print(f"Screenshot taken successfully for URL: {url}") | |
| return filepath | |
| else: | |
| print(f"API returned non-image content") | |
| return None | |
| else: | |
| error_msg = response.text | |
| print(f"Screenshot failed: {error_msg}") | |
| # Check for Cloudflare detection | |
| if any(keyword.lower() in error_msg.lower() for keyword in CLOUDFLARE_CHECK_KEYWORDS): | |
| print("Cloudflare challenge detected, retrying with different parameters...") | |
| # Retry with different parameters for Cloudflare | |
| params.update({ | |
| "wait_until": "load", | |
| "delay": 5 | |
| }) | |
| response = requests.get(api_url, params=params) | |
| if response.status_code == 200 and response.headers.get('content-type', '').startswith('image'): | |
| with open(filepath, 'wb') as f: | |
| f.write(response.content) | |
| print(f"Screenshot taken successfully after Cloudflare retry") | |
| return filepath | |
| return None | |
| except Exception as e: | |
| print(f"Error taking screenshot: {e}") | |
| return None | |
| def resize_if_needed(image_path, max_mb=1, target_height=720): | |
| file_size = os.path.getsize(image_path) / (1024 * 1024) # dalam MB | |
| if file_size > max_mb: | |
| try: | |
| with Image.open(image_path) as img: | |
| width, height = img.size | |
| if height > target_height: | |
| ratio = target_height / float(height) | |
| new_width = int(float(width) * ratio) | |
| img = img.resize((new_width, target_height), Image.Resampling.LANCZOS) | |
| img.save(image_path, optimize=True, quality=85) | |
| print(f"Image resized to {new_width}x{target_height}") | |
| except Exception as e: | |
| print(f"Resize error: {e}") | |
| def easyocr_extract(image_path): | |
| try: | |
| results = reader.readtext(image_path, detail=0) | |
| text = " ".join(results) | |
| print(f"OCR text extracted from EasyOCR: {len(text)} characters") | |
| return text.strip() | |
| except Exception as e: | |
| print(f"EasyOCR error: {e}") | |
| return "" | |
| # def extract_text_from_image(image_path): | |
| # print("Skipping OCR. Forcing Image-Only prediction.") | |
| # return "" | |
| def extract_text_from_image(image_path): | |
| try: | |
| resize_if_needed(image_path, max_mb=1, target_height=720) # Tambahkan ini di awal | |
| file_size = os.path.getsize(image_path) / (1024 * 1024) # ukuran MB | |
| if file_size < 1: | |
| print(f"Using OCR.Space API for image ({file_size:.2f} MB)") | |
| api_key = os.getenv("OCR_SPACE_API_KEY") | |
| if not api_key: | |
| print("OCR_SPACE_API_KEY not found in environment. Using EasyOCR as fallback.") | |
| return easyocr_extract(image_path) | |
| with open(image_path, 'rb') as f: | |
| payload = { | |
| 'isOverlayRequired': False, | |
| 'apikey': api_key, | |
| 'language': 'eng' | |
| } | |
| r = requests.post('https://api.ocr.space/parse/image', | |
| files={'filename': f}, | |
| data=payload) | |
| result = r.json() | |
| if result.get('IsErroredOnProcessing', False): | |
| print(f"OCR.Space API Error: {result.get('ErrorMessage')}") | |
| return easyocr_extract(image_path) | |
| text = result['ParsedResults'][0]['ParsedText'] | |
| print(f"OCR text extracted from OCR.Space: {len(text)} characters") | |
| return text.strip() | |
| else: | |
| print(f"Using EasyOCR for image ({file_size:.2f} MB)") | |
| return easyocr_extract(image_path) | |
| except Exception as e: | |
| print(f"OCR error: {e}") | |
| return "" | |
| def prepare_data_for_model(image_path, text): | |
| image = Image.open(image_path) | |
| image_tensor = transform(image).unsqueeze(0).to(device) | |
| clean_text_data = clean_text(text) | |
| encoding = tokenizer.encode_plus( | |
| clean_text_data, | |
| add_special_tokens=True, | |
| max_length=128, | |
| padding='max_length', | |
| truncation=True, | |
| return_tensors='pt' | |
| ) | |
| input_ids = encoding['input_ids'].to(device) | |
| attention_mask = encoding['attention_mask'].to(device) | |
| return image_tensor, input_ids, attention_mask | |
| def predict_single_url(url): | |
| print(f"Processing URL: {url}") | |
| screenshot_path = take_screenshot(url) | |
| if not screenshot_path: | |
| error_label = {"Error": 1.0, "Non-Gambling": 0.0, "Gambling": 0.0} | |
| error_msg = f"**Gagal mengambil screenshot**\n\nURL: `{url}`\n\n**Kemungkinan penyebab:**\n• Terlalu banyak redirect\n• Website memblokir akses otomatis\n• Masalah koneksi jaringan\n• URL tidak valid" | |
| return error_label, error_msg, None, "", "", "**Status:** Gagal mengambil screenshot" | |
| text = extract_text_from_image(screenshot_path) | |
| raw_text = text # Store raw text before cleaning | |
| if not text.strip(): # Jika text kosong | |
| print(f"No OCR text found for {url}. Using Image-Only Model.") | |
| image = Image.open(screenshot_path) | |
| image_tensor = transform(image).unsqueeze(0).to(device) | |
| with torch.no_grad(): | |
| image_logits = image_only_model(image_tensor).squeeze(1) | |
| image_probs = torch.sigmoid(image_logits) | |
| threshold = 0.6 | |
| is_gambling = image_probs[0] > threshold | |
| gambling_prob = image_probs[0].item() | |
| non_gambling_prob = 1 - gambling_prob | |
| label_dict = { | |
| "Gambling": gambling_prob, | |
| "Non-Gambling": non_gambling_prob | |
| } | |
| confidence = gambling_prob if is_gambling else non_gambling_prob | |
| result_text = "Gambling" if is_gambling else "Non-Gambling" | |
| confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Image-Only (EfficientNet-B3)\n\n**Hasil:** {result_text}" | |
| model_info = f"**Tipe Model:** Image-Only\n**Arsitektur:** EfficientNet-B3\n**Probabilitas Gambling:** {gambling_prob:.1%}\n**Probabilitas Non-Gambling:** {non_gambling_prob:.1%}" | |
| print(f"[Image-Only] URL: {url}") | |
| print(f"Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n") | |
| return label_dict, confidence_md, screenshot_path, raw_text, "", model_info | |
| else: | |
| clean_text_data = clean_text(text) | |
| image_tensor, input_ids, attention_mask = prepare_data_for_model(screenshot_path, text) | |
| with torch.no_grad(): | |
| fused_logits, image_logits, text_logits, weights = fusion_model(image_tensor, input_ids, attention_mask) | |
| fused_probs = torch.sigmoid(fused_logits) | |
| image_probs = torch.sigmoid(image_logits) | |
| text_probs = torch.sigmoid(text_logits) | |
| threshold = 0.6 | |
| is_gambling = fused_probs[0] > threshold | |
| gambling_prob = fused_probs[0].item() | |
| non_gambling_prob = 1 - gambling_prob | |
| label_dict = { | |
| "Gambling": gambling_prob, | |
| "Non-Gambling": non_gambling_prob | |
| } | |
| confidence = gambling_prob if is_gambling else non_gambling_prob | |
| # Calculate relative contribution (approximation for MLP fusion) | |
| image_contrib = abs(image_probs[0].item() - 0.5) | |
| text_contrib = abs(text_probs[0].item() - 0.5) | |
| total_contrib = image_contrib + text_contrib | |
| if total_contrib > 0: | |
| image_weight = image_contrib / total_contrib | |
| text_weight = text_contrib / total_contrib | |
| else: | |
| image_weight = 0.5 | |
| text_weight = 0.5 | |
| result_text = "Gambling" if is_gambling else "Non-Gambling" | |
| confidence_md = f"**Tingkat Keyakinan:** {confidence:.1%}\n\n**Model:** Fusion Model (Image + Text)\n\n**Hasil:** {result_text}" | |
| model_info = f"""**Tipe Model:** Fusion Model (MLP) | |
| **Image Model:** EfficientNet-B3 | |
| **Text Model:** IndoBERT | |
| **Prediksi Individual:** | |
| - Image Model: {image_probs[0].item():.1%} | |
| - Text Model: {text_probs[0].item():.1%} | |
| - Hasil Fusion: {gambling_prob:.1%}""" | |
| # ✨ Log detail | |
| print(f"[Fusion Model] URL: {url}") | |
| print(f"Image Model Prediction Probability: {image_probs[0]:.2f}") | |
| print(f"Text Model Prediction Probability: {text_probs[0]:.2f}") | |
| print(f"Fusion Final Prediction: {'Gambling' if is_gambling else 'Non-Gambling'} | Confidence: {confidence:.2f}\n") | |
| return label_dict, confidence_md, screenshot_path, raw_text, clean_text_data, model_info | |
| def predict_batch_urls(file_obj): | |
| results = [] | |
| content = file_obj.read().decode('utf-8') | |
| urls = [line.strip() for line in content.splitlines() if line.strip()] | |
| for url in urls: | |
| label, confidence, screenshot_path, raw_text, cleaned_text = predict_single_url(url) | |
| results.append({ | |
| "url": url, | |
| "label": label, | |
| "confidence": confidence, | |
| "screenshot_path": screenshot_path, | |
| "raw_text": raw_text, | |
| "cleaned_text": cleaned_text | |
| }) | |
| df = pd.DataFrame(results) | |
| print(f"Batch prediction completed for {len(urls)} URLs.") | |
| return df | |
| # --- Gradio App --- | |
| # Custom CSS - Tokopedia style | |
| custom_css = """ | |
| .header-container { | |
| background: #fff; | |
| border-bottom: 1px solid #e5e5e5; | |
| padding: 20px 0; | |
| margin-bottom: 30px; | |
| } | |
| .header-title { | |
| font-size: 24px; | |
| font-weight: 600; | |
| color: #333; | |
| margin: 0; | |
| padding: 0; | |
| } | |
| .header-subtitle { | |
| font-size: 14px; | |
| color: #666; | |
| margin: 5px 0 0 0; | |
| } | |
| .content-container { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| padding: 0 20px; | |
| } | |
| .card { | |
| background: #fff; | |
| border: 1px solid #e5e5e5; | |
| border-radius: 8px; | |
| padding: 24px; | |
| margin-bottom: 20px; | |
| } | |
| .section-title { | |
| font-size: 18px; | |
| font-weight: 600; | |
| color: #333; | |
| margin: 0 0 20px 0; | |
| padding-bottom: 12px; | |
| border-bottom: 2px solid #42b549; | |
| } | |
| .info-text { | |
| font-size: 14px; | |
| color: #666; | |
| line-height: 1.6; | |
| margin: 0; | |
| } | |
| .button-primary { | |
| background: #42b549; | |
| color: #fff; | |
| border: none; | |
| padding: 12px 32px; | |
| border-radius: 4px; | |
| font-weight: 500; | |
| cursor: pointer; | |
| } | |
| .button-primary:hover { | |
| background: #3aa040; | |
| } | |
| .result-box { | |
| background: #f8f9fa; | |
| border: 1px solid #e5e5e5; | |
| border-radius: 8px; | |
| padding: 20px; | |
| margin: 15px 0; | |
| } | |
| .footer-text { | |
| text-align: center; | |
| color: #999; | |
| font-size: 12px; | |
| padding: 20px 0; | |
| border-top: 1px solid #e5e5e5; | |
| margin-top: 40px; | |
| } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Default(), css=custom_css, title="Gambling Website Detector") as app: | |
| # Header | |
| with gr.Row(): | |
| gr.HTML(""" | |
| <div class="header-container"> | |
| <div class="content-container"> | |
| <h1 class="header-title">Gambling Website Detector</h1> | |
| <p class="header-subtitle">Analisis website untuk mendeteksi konten perjudian menggunakan teknologi deep learning</p> | |
| </div> | |
| </div> | |
| """) | |
| # Main Content | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="content-container"> | |
| <div class="card"> | |
| <p class="info-text"> | |
| Sistem ini menggunakan model fusion yang menggabungkan analisis gambar dan teks untuk mendeteksi konten perjudian pada website. Masukkan URL website yang ingin dianalisis. | |
| </p> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| with gr.Tab("Analisis URL", id="single"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="content-container"> | |
| <div class="card"> | |
| <h2 class="section-title">Masukkan URL Website</h2> | |
| <p class="info-text" style="margin-bottom: 20px;">Masukkan URL lengkap website yang ingin dianalisis. Sistem akan mengambil screenshot dan menganalisis kontennya.</p> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| url_input = gr.Textbox( | |
| label="URL Website", | |
| placeholder="https://example.com", | |
| lines=1, | |
| container=False | |
| ) | |
| predict_button = gr.Button( | |
| "Analisis Website", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="content-container"> | |
| <div class="card"> | |
| <h2 class="section-title">Hasil Analisis</h2> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| label_output = gr.Label( | |
| label="Hasil Prediksi", | |
| value={"Gambling": 0.0, "Non-Gambling": 0.0}, | |
| num_top_classes=2, | |
| container=False | |
| ) | |
| confidence_output = gr.Markdown( | |
| value="", | |
| label="Tingkat Keyakinan", | |
| container=False | |
| ) | |
| model_info_output = gr.Markdown( | |
| value="", | |
| label="Informasi Model", | |
| container=False | |
| ) | |
| with gr.Column(scale=1): | |
| screenshot_output = gr.Image( | |
| label="Screenshot Website", | |
| type="filepath", | |
| height=400, | |
| container=False | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Accordion("Detail Analisis Teks", open=False): | |
| with gr.Row(): | |
| with gr.Column(): | |
| raw_text_output = gr.Textbox( | |
| label="Teks Mentah (Raw OCR)", | |
| lines=6, | |
| interactive=False, | |
| placeholder="Teks yang diekstrak dari screenshot akan muncul di sini...", | |
| container=False | |
| ) | |
| with gr.Column(): | |
| cleaned_text_output = gr.Textbox( | |
| label="Teks yang Diproses", | |
| lines=6, | |
| interactive=False, | |
| placeholder="Teks yang sudah dibersihkan akan muncul di sini...", | |
| container=False | |
| ) | |
| predict_button.click( | |
| fn=predict_single_url, | |
| inputs=url_input, | |
| outputs=[ | |
| label_output, | |
| confidence_output, | |
| screenshot_output, | |
| raw_text_output, | |
| cleaned_text_output, | |
| model_info_output | |
| ] | |
| ) | |
| with gr.Tab("Analisis Batch", id="batch"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="content-container"> | |
| <div class="card"> | |
| <h2 class="section-title">Analisis Multiple URL</h2> | |
| <p class="info-text">Upload file teks (.txt) yang berisi beberapa URL (satu URL per baris) untuk dianalisis sekaligus. Hasil akan ditampilkan dalam format tabel.</p> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File( | |
| label="Upload File URL (.txt)", | |
| file_types=[".txt"], | |
| container=False | |
| ) | |
| gr.Markdown("**Format file:** Satu URL per baris", container=False) | |
| batch_predict_button = gr.Button( | |
| "Proses Batch", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| batch_output = gr.DataFrame( | |
| label="Hasil Analisis", | |
| wrap=True, | |
| interactive=False, | |
| container=False | |
| ) | |
| batch_predict_button.click( | |
| fn=predict_batch_urls, | |
| inputs=file_input, | |
| outputs=batch_output | |
| ) | |
| # Footer | |
| gr.HTML(""" | |
| <div class="footer-text"> | |
| <p>Powered by PyTorch • Gradio • EfficientNet • IndoBERT</p> | |
| <p style="margin-top: 8px;">Tool ini untuk keperluan edukasi dan penelitian</p> | |
| </div> | |
| """) | |
| app.launch() |