Spaces:

doniramdani820
/

xxxcp2

Paused

App Files Files Community

doniramdani820 commited on Sep 4, 2025

Commit

de22214

verified ·

1 Parent(s): b4e54e7

Upload 4 files

Browse files

Files changed (4) hide show

app.py +428 -0
model55x140.onnx +3 -0
model90x280.onnx +3 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,428 @@

+import gradio as gr
+import cv2
+import numpy as np
+import base64
+import onnxruntime as ort
+from PIL import Image, ImageDraw, ImageFont
+import io
+import json
+import logging
+from difflib import SequenceMatcher
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global variables for ONNX models
+title_model = None
+button_model = None
+def load_models():
+    """Load ONNX models for title and button detection"""
+    global title_model, button_model
+    try:
+        # Load title detection model
+        title_model = ort.InferenceSession("model90x280.onnx")
+        logger.info("✅ Title model (model90x280.onnx) loaded successfully")
+        # Load button detection model
+        button_model = ort.InferenceSession("model55x140.onnx")
+        logger.info("✅ Button model (model55x140.onnx) loaded successfully")
+    except Exception as e:
+        logger.error(f"❌ Error loading models: {str(e)}")
+        raise e
+def decode_base64_image(base64_str):
+    """Convert base64 string to numpy array"""
+    try:
+        # Remove data URL prefix if present
+        if base64_str.startswith('data:image'):
+            base64_str = base64_str.split(',')[1]
+        # Decode base64
+        img_data = base64.b64decode(base64_str)
+        # Convert to PIL Image
+        img = Image.open(io.BytesIO(img_data))
+        # Convert to RGB if necessary
+        if img.mode != 'RGB':
+            img = img.convert('RGB')
+        # Convert to numpy array
+        img_array = np.array(img)
+        return img_array
+    except Exception as e:
+        logger.error(f"❌ Error decoding base64 image: {str(e)}")
+        raise e
+def crop_title_area(image):
+    """
+    Crop title area from image
+    Area: Width: 280px, Height: 100px, X: 0, Y: 220
+    """
+    try:
+        # Crop title area
+        title_crop = image[220:320, 0:280]  # [y1:y2, x1:x2]
+        logger.info(f"📏 Title area cropped: {title_crop.shape}")
+        return title_crop
+    except Exception as e:
+        logger.error(f"❌ Error cropping title area: {str(e)}")
+        raise e
+def crop_button_areas(image):
+    """
+    Crop button areas from image
+    Grid 280×320 px with specific positions for buttons 1-9
+    """
+    try:
+        button_crops = {}
+        # Define button positions (x, y, width, height)
+        button_positions = {
+            1: (0, 0, 140, 60),      # Tombol 1: (0,0) → 140×60 px
+            2: (140, 0, 140, 60),    # Tombol 2: (140,0) → 140×60 px
+            3: (0, 60, 140, 60),     # Tombol 3: (0,60) → 140×60 px
+            4: (140, 60, 140, 50),   # Tombol 4: (140,60) → 140×50 px
+            5: (0, 115, 140, 50),    # Tombol 5: (0,115) → 140×50 px
+            6: (140, 110, 140, 60),  # Tombol 6: (140,110) → 140×60 px
+            7: (0, 170, 140, 50),    # Tombol 7: (0,170) → 140×50 px
+            8: (140, 170, 140, 50),  # Tombol 8: (140,170) → 140×50 px
+        }
+        # Assume button area starts at a consistent position (adjust as needed)
+        button_grid_start_y = 350  # Adjust based on actual image layout
+        for button_id, (x, y, w, h) in button_positions.items():
+            # Adjust coordinates for actual image position
+            abs_y = button_grid_start_y + y
+            abs_x = x
+            # Crop button area
+            button_crop = image[abs_y:abs_y+h, abs_x:abs_x+w]
+            button_crops[button_id] = button_crop
+            logger.info(f"🔲 Button {button_id} cropped: {button_crop.shape}")
+        return button_crops
+    except Exception as e:
+        logger.error(f"❌ Error cropping button areas: {str(e)}")
+        raise e
+def preprocess_for_ocr(image, target_size):
+    """Preprocess image for ONNX model inference"""
+    try:
+        # Resize image to target size
+        resized = cv2.resize(image, target_size)
+        # Convert to grayscale if needed
+        if len(resized.shape) == 3:
+            gray = cv2.cvtColor(resized, cv2.COLOR_RGB2GRAY)
+        else:
+            gray = resized
+        # Normalize pixel values to 0-1
+        normalized = gray.astype(np.float32) / 255.0
+        # Add batch and channel dimensions
+        input_tensor = normalized.reshape(1, 1, target_size[1], target_size[0])
+        return input_tensor
+    except Exception as e:
+        logger.error(f"❌ Error preprocessing image: {str(e)}")
+        raise e
+def predict_title(title_crop):
+    """Predict title text using ONNX model"""
+    try:
+        if title_model is None:
+            raise ValueError("Title model not loaded")
+        # Preprocess image for model (280x90 based on model name)
+        input_tensor = preprocess_for_ocr(title_crop, (280, 90))
+        # Get model input name
+        input_name = title_model.get_inputs()[0].name
+        # Run inference
+        outputs = title_model.run(None, {input_name: input_tensor})
+        # Process output (assuming it returns text prediction)
+        # This will depend on your specific model output format
+        predicted_text = process_model_output(outputs[0])
+        logger.info(f"🔤 Title prediction: '{predicted_text}'")
+        return predicted_text
+    except Exception as e:
+        logger.error(f"❌ Error predicting title: {str(e)}")
+        raise e
+def predict_button_text(button_crop):
+    """Predict button text using ONNX model"""
+    try:
+        if button_model is None:
+            raise ValueError("Button model not loaded")
+        # Preprocess image for model (140x55 based on model name)
+        input_tensor = preprocess_for_ocr(button_crop, (140, 55))
+        # Get model input name
+        input_name = button_model.get_inputs()[0].name
+        # Run inference
+        outputs = button_model.run(None, {input_name: input_tensor})
+        # Process output
+        predicted_text = process_model_output(outputs[0])
+        return predicted_text
+    except Exception as e:
+        logger.error(f"❌ Error predicting button text: {str(e)}")
+        raise e
+def process_model_output(output):
+    """
+    Process ONNX model output to extract text
+    This is a placeholder - you'll need to adjust based on your specific model output format
+    """
+    try:
+        # If output is probabilities over characters, decode to text
+        # This is a simplified example - adjust based on your model
+        if isinstance(output, np.ndarray):
+            if len(output.shape) == 3:  # Sequence of character probabilities
+                # Convert probabilities to characters (CTC-like decoding)
+                chars = "abcdefghijklmnopqrstuvwxyz0123456789"
+                text = ""
+                for timestep in output[0]:
+                    char_idx = np.argmax(timestep)
+                    if char_idx < len(chars):
+                        text += chars[char_idx]
+                # Clean up repeated characters and spaces
+                cleaned_text = ""
+                prev_char = ""
+                for char in text:
+                    if char != prev_char:
+                        cleaned_text += char
+                    prev_char = char
+                return cleaned_text.strip()
+            elif len(output.shape) == 2:  # Single prediction
+                chars = "abcdefghijklmnopqrstuvwxyz0123456789"
+                char_idx = np.argmax(output[0])
+                if char_idx < len(chars):
+                    return chars[char_idx]
+        return ""
+    except Exception as e:
+        logger.error(f"❌ Error processing model output: {str(e)}")
+        return ""
+def split_title(title_text):
+    """
+    Split title into 2 parts based on length
+    6 huruf: abcdef → abc dan def
+    5 huruf: abcde → abc dan de
+    4 huruf: abcd → ab dan cd
+    """
+    try:
+        title_length = len(title_text)
+        if title_length >= 6:
+            # 6+ characters: split in half
+            mid = title_length // 2
+            part1 = title_text[:mid]
+            part2 = title_text[mid:]
+        elif title_length == 5:
+            # 5 characters: first 3, last 2
+            part1 = title_text[:3]
+            part2 = title_text[3:]
+        elif title_length == 4:
+            # 4 characters: split in half
+            part1 = title_text[:2]
+            part2 = title_text[2:]
+        else:
+            # Less than 4 characters: handle as edge case
+            mid = max(1, title_length // 2)
+            part1 = title_text[:mid]
+            part2 = title_text[mid:]
+        logger.info(f"✂️ Title split: '{title_text}' → '{part1}' + '{part2}'")
+        return part1, part2
+    except Exception as e:
+        logger.error(f"❌ Error splitting title: {str(e)}")
+        return "", ""
+def find_matching_buttons(part1, part2, button_predictions):
+    """
+    Find buttons that match the title parts
+    Uses fuzzy matching to handle OCR errors
+    """
+    try:
+        matching_buttons = []
+        # Convert parts to lowercase for comparison
+        part1_lower = part1.lower().strip()
+        part2_lower = part2.lower().strip()
+        logger.info(f"🔍 Looking for buttons matching: '{part1_lower}' and '{part2_lower}'")
+        # Track best matches
+        part1_matches = []
+        part2_matches = []
+        for button_id, button_text in button_predictions.items():
+            button_text_lower = button_text.lower().strip()
+            # Calculate similarity scores
+            part1_similarity = SequenceMatcher(None, part1_lower, button_text_lower).ratio()
+            part2_similarity = SequenceMatcher(None, part2_lower, button_text_lower).ratio()
+            # Set threshold for matching (adjust as needed)
+            threshold = 0.6
+            if part1_similarity >= threshold:
+                part1_matches.append((button_id, part1_similarity, button_text))
+                logger.info(f"  📍 Button {button_id} matches part1 '{part1_lower}': '{button_text_lower}' (similarity: {part1_similarity:.2f})")
+            if part2_similarity >= threshold:
+                part2_matches.append((button_id, part2_similarity, button_text))
+                logger.info(f"  📍 Button {button_id} matches part2 '{part2_lower}': '{button_text_lower}' (similarity: {part2_similarity:.2f})")
+        # Sort matches by similarity score (highest first)
+        part1_matches.sort(key=lambda x: x[1], reverse=True)
+        part2_matches.sort(key=lambda x: x[1], reverse=True)
+        # Select best matches
+        if part1_matches:
+            best_part1_match = part1_matches[0]
+            matching_buttons.append(best_part1_match[0])
+            logger.info(f"🎯 Best match for part1: Button {best_part1_match[0]} ('{best_part1_match[2]}', score: {best_part1_match[1]:.2f})")
+        if part2_matches:
+            best_part2_match = part2_matches[0]
+            # Avoid duplicate buttons
+            if best_part2_match[0] not in matching_buttons:
+                matching_buttons.append(best_part2_match[0])
+            logger.info(f"🎯 Best match for part2: Button {best_part2_match[0]} ('{best_part2_match[2]}', score: {best_part2_match[1]:.2f})")
+        logger.info(f"✅ Final matching buttons: {matching_buttons}")
+        return matching_buttons
+    except Exception as e:
+        logger.error(f"❌ Error finding matching buttons: {str(e)}")
+        return []
+def solve_assemble_captcha(base64_image):
+    """
+    Main function to solve assemble captcha
+    """
+    try:
+        logger.info("🚀 Starting assemble captcha solving...")
+        # Decode base64 image
+        image = decode_base64_image(base64_image)
+        logger.info(f"📸 Image decoded: {image.shape}")
+        # Step 1: Crop title area
+        title_crop = crop_title_area(image)
+        # Step 2: Predict title text
+        title_text = predict_title(title_crop)
+        if not title_text:
+            raise ValueError("Could not detect title text")
+        # Step 3: Split title into parts
+        part1, part2 = split_title(title_text)
+        if not part1 or not part2:
+            raise ValueError("Could not split title into valid parts")
+        # Step 4: Crop button areas
+        button_crops = crop_button_areas(image)
+        # Step 5: Predict button texts
+        button_predictions = {}
+        for button_id, button_crop in button_crops.items():
+            button_text = predict_button_text(button_crop)
+            button_predictions[button_id] = button_text
+            logger.info(f"🔲 Button {button_id} prediction: '{button_text}'")
+        # Step 6: Find matching buttons
+        matching_buttons = find_matching_buttons(part1, part2, button_predictions)
+        if not matching_buttons:
+            raise ValueError("No matching buttons found")
+        # Prepare result
+        result = {
+            "success": True,
+            "title_detected": title_text,
+            "title_part1": part1,
+            "title_part2": part2,
+            "button_predictions": button_predictions,
+            "buttons_to_click": matching_buttons,
+            "message": f"Found {len(matching_buttons)} matching buttons for '{title_text}' ('{part1}' + '{part2}')"
+        }
+        logger.info("✅ Assemble captcha solved successfully!")
+        logger.info(f"📊 Result: {json.dumps(result, indent=2)}")
+        return result
+    except Exception as e:
+        logger.error(f"❌ Error solving assemble captcha: {str(e)}")
+        return {
+            "success": False,
+            "error": str(e),
+            "message": "Failed to solve assemble captcha"
+        }
+# Initialize models when app starts
+try:
+    load_models()
+except Exception as e:
+    logger.error(f"❌ Failed to initialize models: {str(e)}")
+# Gradio interface
+def gradio_solve(base64_image):
+    """Gradio interface function"""
+    try:
+        result = solve_assemble_captcha(base64_image)
+        return json.dumps(result, indent=2)
+    except Exception as e:
+        error_result = {
+            "success": False,
+            "error": str(e),
+            "message": "Internal server error"
+        }
+        return json.dumps(error_result, indent=2)
+# Create Gradio interface
+iface = gr.Interface(
+    fn=gradio_solve,
+    inputs=gr.Textbox(
+        label="Base64 Image",
+        placeholder="Paste base64 encoded captcha image here...",
+        lines=3
+    ),
+    outputs=gr.Textbox(
+        label="Solution Result",
+        lines=10
+    ),
+    title="XCaptcha2 Assemble Solver",
+    description="Solve 'Assemble from 2 elements' type captchas by detecting title and matching buttons",
+    examples=[]
+)
+# For Hugging Face Spaces API
+def solve(image_base64):
+    """API endpoint for solving captcha"""
+    return solve_assemble_captcha(image_base64)
+if __name__ == "__main__":
+    iface.launch()

model55x140.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cb0b7a5aa05fe95f7110a99dfc7a210151229744c7a4b1bf3ca279e8cdc1cea
+size 1935908

model90x280.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:161aa69dce99ef1c5c291d9b35163479808ea228ba6fe8903c926e5ca2bc7a77
+size 1938087

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+opencv-python
+numpy
+onnxruntime
+pillow
+difflib