Spaces:

huijio
/

zeracap2

Paused

App Files Files Community

huijio commited on Nov 5, 2025

Commit

bdf51cf

verified ·

1 Parent(s): 84df808

Create app.py

Browse files

Files changed (1) hide show

app.py +661 -0

app.py ADDED Viewed

	@@ -0,0 +1,661 @@

+import gradio as gr
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models, transforms
+from PIL import Image
+import numpy as np
+import cv2
+from scipy import stats
+import requests
+from io import BytesIO
+import base64
+from fastapi import FastAPI, HTTPException, Request
+from pydantic import BaseModel
+import uvicorn
+from fastapi.middleware.cors import CORSMiddleware
+import json
+import warnings
+import threading
+import time
+warnings.filterwarnings('ignore')
+# ==================== KEEP-ALIVE SERVICE ====================
+def keep_alive_ping():
+    """Ping the space every 20 minutes to prevent sleeping"""
+    def ping():
+        time.sleep(10)  # Wait for app to start
+        while True:
+            try:
+                # Ping your own space
+                requests.get("https://huijio-zeracap2.hf.space/api/health", timeout=10)
+                print("🔄 Keep-alive ping sent - Preventing sleep")
+            except Exception as e:
+                print(f"❌ Keep-alive ping failed: {e}")
+            time.sleep(1200)  # 20 minutes
+    # Start in background thread
+    thread = threading.Thread(target=ping, daemon=True)
+    thread.start()
+    print("✅ Keep-alive service started")
+# ==================== MODEL DEFINITIONS ====================
+class DualPathSiamese(nn.Module):
+    def __init__(self, embedding_dim=256):
+        super(DualPathSiamese, self).__init__()
+        # Deep learning path
+        resnet = models.resnet50(weights=None)
+        self.cnn_backbone = nn.Sequential(*list(resnet.children())[:-1])
+        self.cnn_embedding = nn.Sequential(
+            nn.Linear(2048, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+            nn.Linear(512, embedding_dim)
+        )
+        # Traditional CV path
+        self.feature_embedding = nn.Sequential(
+            nn.Linear(29, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(128, 64)
+        )
+        # Fusion layer
+        self.fusion = nn.Sequential(
+            nn.Linear(embedding_dim + 64, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, embedding_dim)
+        )
+    def forward_once(self, img, features):
+        cnn_out = self.cnn_backbone(img)
+        cnn_out = cnn_out.view(cnn_out.size(0), -1)
+        cnn_embed = self.cnn_embedding(cnn_out)
+        feat_embed = self.feature_embedding(features)
+        combined = torch.cat([cnn_embed, feat_embed], dim=1)
+        output = self.fusion(combined)
+        return F.normalize(output, p=2, dim=1)
+    def forward(self, img1, img2, features):
+        feat1 = features[:, :29]
+        feat2 = features[:, 29:]
+        output1 = self.forward_once(img1, feat1)
+        output2 = self.forward_once(img2, feat2)
+        return output1, output2
+class EnsembleSiamese:
+    def __init__(self, device='cpu'):
+        self.device = device
+        self.models = {}
+        self.model_names = ['dualpath', 'resnet50', 'efficientnet']
+        self.weights = [0.34, 0.33, 0.33]
+        self.models_loaded = False
+        try:
+            # Load DualPath model
+            self.models['dualpath'] = DualPathSiamese(embedding_dim=256).to(device)
+            # Load ResNet50 model
+            resnet = models.resnet50(weights=None)
+            self.models['resnet50'] = self.create_resnet_siamese(resnet, 2048, 256).to(device)
+            # Load EfficientNet model
+            from torchvision.models import efficientnet_b3
+            efficientnet = efficientnet_b3(weights=None)
+            self.models['efficientnet'] = self.create_efficientnet_siamese(efficientnet, 256).to(device)
+            # Load trained weights with proper settings
+            self.load_weights()
+            self.models_loaded = True
+            print("✅ Ensemble model initialized successfully!")
+        except Exception as e:
+            print(f"❌ Error initializing models: {e}")
+            self.models_loaded = False
+    def create_resnet_siamese(self, resnet, in_features, embedding_dim):
+        class ResNetSiam(nn.Module):
+            def __init__(self):
+                super(ResNetSiam, self).__init__()
+                self.backbone = nn.Sequential(*list(resnet.children())[:-1])
+                self.embedding = nn.Sequential(
+                    nn.Linear(in_features, 512),
+                    nn.BatchNorm1d(512),
+                    nn.ReLU(),
+                    nn.Dropout(0.5),
+                    nn.Linear(512, embedding_dim)
+                )
+            def forward_once(self, x):
+                x = self.backbone(x)
+                x = x.view(x.size(0), -1)
+                x = self.embedding(x)
+                return F.normalize(x, p=2, dim=1)
+            def forward(self, img1, img2, features=None):
+                return self.forward_once(img1), self.forward_once(img2)
+        return ResNetSiam()
+    def create_efficientnet_siamese(self, efficientnet, embedding_dim):
+        class EfficientNetSiam(nn.Module):
+            def __init__(self):
+                super(EfficientNetSiam, self).__init__()
+                self.backbone = efficientnet.features
+                self.avgpool = nn.AdaptiveAvgPool2d(1)
+                self.embedding = nn.Sequential(
+                    nn.Linear(1536, 512),
+                    nn.BatchNorm1d(512),
+                    nn.ReLU(),
+                    nn.Dropout(0.4),
+                    nn.Linear(512, embedding_dim)
+                )
+            def forward_once(self, x):
+                x = self.backbone(x)
+                x = self.avgpool(x)
+                x = x.view(x.size(0), -1)
+                x = self.embedding(x)
+                return F.normalize(x, p=2, dim=1)
+            def forward(self, img1, img2, features=None):
+                return self.forward_once(img1), self.forward_once(img2)
+        return EfficientNetSiam()
+    def load_weights(self):
+        """Load trained model weights with proper error handling"""
+        try:
+            # Load DualPath with weights_only=False for compatibility
+            dualpath_state = torch.load('ensemble_dualpath.pth', map_location=self.device, weights_only=False)
+            self.models['dualpath'].load_state_dict(dualpath_state['model_state_dict'])
+            print("✅ DualPath weights loaded")
+            # Load ResNet50
+            resnet_state = torch.load('ensemble_resnet50.pth', map_location=self.device, weights_only=False)
+            self.models['resnet50'].load_state_dict(resnet_state['model_state_dict'])
+            print("✅ ResNet50 weights loaded")
+            # Load EfficientNet
+            efficient_state = torch.load('ensemble_efficientnet.pth', map_location=self.device, weights_only=False)
+            self.models['efficientnet'].load_state_dict(efficient_state['model_state_dict'])
+            print("✅ EfficientNet weights loaded")
+        except Exception as e:
+            print(f"⚠️ Partial weight loading error: {e}")
+            # Initialize with random weights if loading fails
+            for name, model in self.models.items():
+                model.apply(self._init_weights)
+            print("🔄 Models initialized with random weights")
+    def _init_weights(self, m):
+        """Initialize weights for models"""
+        if isinstance(m, nn.Linear):
+            torch.nn.init.xavier_uniform_(m.weight)
+            if m.bias is not None:
+                m.bias.data.fill_(0.01)
+    def extract_handcrafted_features(self, img_array):
+        """Extract traditional CV features from numpy array"""
+        if img_array is None:
+            return np.zeros(29)
+        try:
+            features = []
+            # Color histogram
+            for i in range(3):
+                hist = cv2.calcHist([img_array], [i], None, [8], [0, 256])
+                features.extend(hist.flatten() / (hist.sum() + 1e-6))
+            # HSV features
+            hsv = cv2.cvtColor(img_array, cv2.COLOR_RGB2HSV)
+            features.extend([hsv[:,:,i].mean() for i in range(3)])
+            # Edge density
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+            edges = cv2.Canny(gray, 50, 150)
+            features.append(edges.sum() / (edges.size + 1e-6))
+            # Texture
+            features.append(cv2.Laplacian(gray, cv2.CV_64F).var())
+            return np.array(features, dtype=np.float32)
+        except Exception as e:
+            print(f"Feature extraction error: {e}")
+            return np.zeros(29)
+    def predict_detailed(self, question_img, answer_imgs, threshold=0.312):
+        """Predict similarity with detailed model breakdown"""
+        if not self.models_loaded:
+            return [{
+                'answer_index': i,
+                'model_predictions': {
+                    'dualpath': {'distance': 1.0, 'confidence': 0.0, 'is_match': False},
+                    'resnet50': {'distance': 1.0, 'confidence': 0.0, 'is_match': False},
+                    'efficientnet': {'distance': 1.0, 'confidence': 0.0, 'is_match': False}
+                },
+                'ensemble_confidence': 0.0,
+                'ensemble_distance': 1.0,
+                'ensemble_match': False,
+                'final_decision': False
+            } for i in range(len(answer_imgs))]
+        transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+        all_results = []
+        for answer_idx, answer_img in enumerate(answer_imgs):
+            try:
+                # Preprocess images
+                q_img = transform(question_img.convert('RGB')).unsqueeze(0).to(self.device)
+                a_img = transform(answer_img.convert('RGB')).unsqueeze(0).to(self.device)
+                # Extract features
+                q_features = self.extract_handcrafted_features(np.array(question_img))
+                a_features = self.extract_handcrafted_features(np.array(answer_img))
+                features = np.concatenate([q_features, a_features])
+                features_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(self.device)
+                # Get predictions from all models
+                model_predictions = {}
+                distances = []
+                confidences = []
+                for name, model in self.models.items():
+                    model.eval()
+                    with torch.no_grad():
+                        if name == 'dualpath':
+                            out1, out2 = model(q_img, a_img, features_tensor)
+                        else:
+                            out1, out2 = model(q_img, a_img)
+                        dist = F.pairwise_distance(out1, out2)
+                        confidence = max(0, 100 * (1 - dist.item()))
+                        model_predictions[name] = {
+                            'distance': float(dist.item()),
+                            'confidence': float(confidence),
+                            'is_match': bool(dist.item() < threshold)
+                        }
+                        distances.append(dist.item())
+                        confidences.append(confidence)
+                # Weighted average
+                weighted_distance = sum(w * d for w, d in zip(self.weights, distances))
+                weighted_confidence = sum(w * c for w, c in zip(self.weights, confidences))
+                is_match = weighted_distance < threshold
+                answer_result = {
+                    'answer_index': answer_idx,
+                    'model_predictions': model_predictions,
+                    'ensemble_distance': float(weighted_distance),
+                    'ensemble_confidence': float(weighted_confidence),
+                    'ensemble_match': bool(is_match),
+                    'final_decision': is_match
+                }
+                all_results.append(answer_result)
+            except Exception as e:
+                print(f"Error processing answer {answer_idx}: {e}")
+                # Add fallback result
+                all_results.append({
+                    'answer_index': answer_idx,
+                    'model_predictions': {
+                        'dualpath': {'distance': 1.0, 'confidence': 0.0, 'is_match': False},
+                        'resnet50': {'distance': 1.0, 'confidence': 0.0, 'is_match': False},
+                        'efficientnet': {'distance': 1.0, 'confidence': 0.0, 'is_match': False}
+                    },
+                    'ensemble_confidence': 0.0,
+                    'ensemble_distance': 1.0,
+                    'ensemble_match': False,
+                    'final_decision': False
+                })
+        return all_results
+# ==================== INITIALIZE MODEL ====================
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+print(f"🔧 Using device: {device}")
+ensemble_model = EnsembleSiamese(device=device)
+# ==================== FASTAPI SETUP ====================
+app = FastAPI(title="CAPTCHA Solver API", version="1.0")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class AnswerData(BaseModel):
+    captcha_id: str
+    image_base64: str
+class CAPTCHAPredictionRequest(BaseModel):
+    question_base64: str
+    answers: list[AnswerData]
+class Base64PredictionRequest(BaseModel):
+    question_base64: str
+    answers_base64: list[str]
+def base64_to_image(base64_string):
+    """Convert base64 string to PIL Image"""
+    try:
+        # Remove data URL prefix if present
+        if ',' in base64_string:
+            base64_string = base64_string.split(',')[1]
+        image_data = base64.b64decode(base64_string)
+        return Image.open(BytesIO(image_data))
+    except Exception as e:
+        print(f"Error decoding base64: {e}")
+        return None
+@app.post("/api/predict")
+async def api_predict_endpoint(request: CAPTCHAPredictionRequest):
+    """API endpoint that preserves captcha IDs"""
+    try:
+        print(f"📥 Received API request: {len(request.answers)} answers with captcha IDs")
+        # Convert base64 to images
+        question_img = base64_to_image(request.question_base64)
+        if not question_img:
+            return {"success": False, "error": "Failed to decode question image"}
+        answer_data = []
+        answer_imgs = []
+        for answer in request.answers:
+            img = base64_to_image(answer.image_base64)
+            if img:
+                answer_imgs.append(img)
+                answer_data.append({
+                    'captcha_id': answer.captcha_id,
+                    'image': img
+                })
+                print(f"✅ Decoded answer with captcha_id: {answer.captcha_id}")
+            else:
+                print(f"❌ Failed to decode answer with captcha_id: {answer.captcha_id}")
+        if len(answer_imgs) == 0:
+            return {"success": False, "error": "No answer images could be decoded"}
+        # Make prediction
+        results = ensemble_model.predict_detailed(question_img, answer_imgs)
+        # Map results back to captcha IDs
+        predictions_with_ids = []
+        for i, result in enumerate(results):
+            if i < len(answer_data):
+                predictions_with_ids.append({
+                    'captcha_id': answer_data[i]['captcha_id'],
+                    'ensemble_confidence': result['ensemble_confidence'],
+                    'ensemble_distance': result['ensemble_distance'],
+                    'ensemble_match': result['ensemble_match'],
+                    'model_predictions': result['model_predictions']
+                })
+        # Find best match
+        if predictions_with_ids:
+            best_prediction = max(predictions_with_ids, key=lambda x: x['ensemble_confidence'])
+            response_data = {
+                'success': True,
+                'predictions': predictions_with_ids,
+                'best_match': best_prediction['captcha_id'],
+                'best_confidence': best_prediction['ensemble_confidence'],
+                'best_distance': best_prediction['ensemble_distance'],
+                'models_loaded': ensemble_model.models_loaded
+            }
+            print(f"✅ API Prediction complete. Best match: captcha_id {best_prediction['captcha_id']} with {best_prediction['ensemble_confidence']:.2f}% confidence")
+            return response_data
+        else:
+            return {"success": False, "error": "No valid predictions could be made"}
+    except Exception as e:
+        print(f"❌ API error: {str(e)}")
+        return {"success": False, "error": str(e)}
+@app.post("/predict")
+async def predict_endpoint(request: Base64PredictionRequest):
+    """Alternative endpoint for backward compatibility"""
+    try:
+        print(f"📥 Received legacy API request: {len(request.answers_base64)} answers")
+        # Convert to new format
+        captcha_request = CAPTCHAPredictionRequest(
+            question_base64=request.question_base64,
+            answers=[AnswerData(captcha_id=str(i), image_base64=img_base64)
+                    for i, img_base64 in enumerate(request.answers_base64)]
+        )
+        return await api_predict_endpoint(captcha_request)
+    except Exception as e:
+        print(f"❌ Legacy API error: {str(e)}")
+        return {"success": False, "error": str(e)}
+@app.get("/api/health")
+async def health_check():
+    return {
+        "status": "healthy",
+        "models_loaded": ensemble_model.models_loaded,
+        "device": device,
+        "api_version": "1.0",
+        "keep_alive": "active",
+        "timestamp": time.time()
+    }
+@app.get("/health")
+async def health_check_alt():
+    return await health_check()
+@app.get("/")
+async def root():
+    return {
+        "message": "CAPTCHA Solver API is running!",
+        "version": "1.0",
+        "accuracy": "98.67%",
+        "models_loaded": ensemble_model.models_loaded,
+        "keep_alive": "enabled",
+        "endpoints": {
+            "api_predict": "POST /api/predict (recommended)",
+            "predict": "POST /predict (legacy)",
+            "health": "GET /api/health"
+        }
+    }
+# ==================== GRADIO INTERFACE ====================
+def format_detailed_results(results):
+    """Format results with detailed model breakdown"""
+    if not ensemble_model.models_loaded:
+        return "⚠️ **MODELS NOT PROPERLY LOADED**\n\nPlease check that all model files are uploaded:\n- ensemble_dualpath.pth\n- ensemble_resnet50.pth\n- ensemble_efficientnet.pth\n\nCurrently using fallback mode with random weights."
+    output = ""
+    # Find best match
+    valid_results = [r for r in results if r['ensemble_confidence'] > 0]
+    if not valid_results:
+        return "❌ No valid predictions could be made. Please check your images."
+    best_match = max(valid_results, key=lambda x: x['ensemble_confidence'])
+    best_index = best_match['answer_index']
+    output += "🎯 **FINAL PREDICTION RESULTS** 🎯\n\n"
+    output += f"**Best Match: Answer {best_index + 1}** \n"
+    output += f"**Overall Confidence: {best_match['ensemble_confidence']:.2f}%** \n"
+    output += f"**Distance: {best_match['ensemble_distance']:.4f}** \n"
+    output += f"**Match: {'✅ YES' if best_match['final_decision'] else '❌ NO'}** \n\n"
+    output += "---\n\n"
+    output += "**📊 DETAILED MODEL BREAKDOWN:**\n\n"
+    for result in results:
+        output += f"## **Answer {result['answer_index'] + 1}**\n"
+        output += f"**Ensemble:** {result['ensemble_confidence']:.2f}% | Distance: {result['ensemble_distance']:.4f} | {'✅ MATCH' if result['final_decision'] else '❌ NO MATCH'}\n\n"
+        for model_name, prediction in result['model_predictions'].items():
+            emoji = "🟢" if prediction['is_match'] else "🔴"
+            output += f"  - **{model_name.upper()}:** {emoji} {prediction['confidence']:.2f}% | Distance: {prediction['distance']:.4f}\n"
+        output += "\n"
+    # Model agreement analysis
+    output += "---\n\n"
+    output += "**🤝 MODEL AGREEMENT ANALYSIS:**\n\n"
+    for result in results:
+        matches = sum(1 for pred in result['model_predictions'].values() if pred['is_match'])
+        total_models = len(result['model_predictions'])
+        agreement = (matches / total_models) * 100
+        consensus_emoji = "🟢" if agreement > 66 else "🟡" if agreement > 33 else "🔴"
+        output += f"**Answer {result['answer_index'] + 1}:** {consensus_emoji} {matches}/{total_models} models agree ({agreement:.1f}% consensus)\n"
+    return output
+def predict_captcha_detailed(question_image, *answer_images):
+    """Gradio prediction function with detailed output"""
+    # Filter out None images
+    answer_imgs = [img for img in answer_images if img is not None]
+    if not question_image or len(answer_imgs) == 0:
+        return "❌ Please upload both question and answer images"
+    try:
+        print(f"🔍 Processing: 1 question + {len(answer_imgs)} answers")
+        # Get detailed predictions
+        results = ensemble_model.predict_detailed(question_image, answer_imgs)
+        # Format output
+        output = format_detailed_results(results)
+        # Add technical details
+        output += "\n---\n\n"
+        output += "**⚙️ TECHNICAL DETAILS:**\n\n"
+        output += f"- **Threshold:** 0.312 (optimized during training)\n"
+        output += f"- **Models:** DualPath (CNN + Handcrafted), ResNet50, EfficientNet-B3\n"
+        output += f"- **Ensemble Weights:** DualPath(34%), ResNet50(33%), EfficientNet(33%)\n"
+        output += f"- **Training Accuracy:** 98.67%\n"
+        output += f"- **Device:** {device.upper()}\n"
+        output += f"- **Models Loaded:** {'✅ YES' if ensemble_model.models_loaded else '❌ NO'}\n"
+        output += f"- **Keep-Alive:** ✅ Active (prevents sleeping)\n"
+        return output
+    except Exception as e:
+        return f"❌ Error during prediction: {str(e)}"
+# ==================== GRADIO UI ====================
+with gr.Blocks(title="CAPTCHA Solver - Ensemble AI", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🔍 CAPTCHA Solver - Ensemble Siamese Network
+    ### **Achieved 98.67% Accuracy during Training**
+    **🚀 Auto Keep-Alive Enabled** - Prevents Hugging Face from sleeping!
+    **⏱️ 60s Timeout** - Extended timeout for better reliability
+    **API Endpoints:**
+    - `POST /api/predict` - **Recommended** (with captcha ID support)
+    - `POST /predict` - Legacy (order-based)
+    - `GET /api/health` - Health check
+    """)
+    # Status indicator
+    status_text = "✅ Models Loaded Successfully" if ensemble_model.models_loaded else "⚠️ Models Not Properly Loaded - Using Fallback Mode"
+    gr.Markdown(f"**Status:** {status_text} | **Keep-Alive:** ✅ Active")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📸 Upload Images")
+            question = gr.Image(label="Question Image", type="pil", height=200)
+            gr.Markdown("### 🎯 Answer Images")
+            with gr.Row():
+                answer1 = gr.Image(label="Answer 1", type="pil", height=150)
+                answer2 = gr.Image(label="Answer 2", type="pil", height=150)
+            with gr.Row():
+                answer3 = gr.Image(label="Answer 3", type="pil", height=150)
+                answer4 = gr.Image(label="Answer 4", type="pil", height=150)
+            with gr.Row():
+                answer5 = gr.Image(label="Answer 5", type="pil", height=150)
+            predict_btn = gr.Button("🚀 Analyze CAPTCHA", variant="primary", size="lg")
+        with gr.Column(scale=2):
+            gr.Markdown("### 📊 Prediction Results")
+            output = gr.Markdown(
+                label="Detailed Analysis",
+                value="👆 Upload images and click 'Analyze CAPTCHA' to see predictions here..."
+            )
+    # Connect the prediction function
+    predict_btn.click(
+        fn=predict_captcha_detailed,
+        inputs=[question, answer1, answer2, answer3, answer4, answer5],
+        outputs=output
+    )
+# ==================== COMBINE GRADIO AND FASTAPI ====================
+@app.get("/api")
+async def api_info():
+    return {
+        "message": "CAPTCHA Solver API",
+        "version": "1.0",
+        "features": [
+            "captcha_id_based_matching",
+            "ensemble_ai_models",
+            "base64_image_support",
+            "auto_keep_alive",
+            "extended_timeouts"
+        ],
+        "endpoints": {
+            "/api/predict": "POST - Main prediction endpoint with captcha ID support",
+            "/predict": "POST - Legacy order-based endpoint",
+            "/api/health": "GET - Health check",
+            "/": "GET - API info"
+        }
+    }
+# Mount Gradio app
+app = gr.mount_gradio_app(app, demo, path="/")
+# ==================== START KEEP-ALIVE & SERVER ====================
+# Start keep-alive service
+keep_alive_ping()
+if __name__ == "__main__":
+    print("🚀 Starting CAPTCHA Solver API Server...")
+    print("✅ Keep-Alive Service: ACTIVE (prevents sleeping)")
+    print("📍 API URL: https://huijio-zeracap2.hf.space/api/predict")
+    print("📍 Health Check: https://huijio-zeracap2.hf.space/api/health")
+    print("⏱️  Timeout: 60 seconds")
+    uvicorn.run(app, host="0.0.0.0", port=7860, timeout_keep_alive=60)