Spaces:

SpiralyzeLLC
/

ABTestPredictor

Runtime error

App Files Files Community

nitish-spz commited on Oct 2, 2025

Commit

774e5c0

1 Parent(s): 81aa4c1

...

Browse files

Files changed (2) hide show

__pycache__/app.cpython-313.pyc +0 -0
app.py +78 -59

__pycache__/app.cpython-313.pyc ADDED Viewed

Binary file (50.3 kB). View file

app.py CHANGED Viewed

@@ -828,71 +828,90 @@ def predict_with_auto_categorization(control_image, variant_image):
 @spaces.GPU(duration=180)  # Extended duration for maximum concurrent load
 def predict_single(control_image, variant_image, business_model, customer_type, conversion_type, industry, page_type):
     """Orchestrates the prediction for a single pair of images and features."""
-    if control_image is None or variant_image is None:
-        return {"Error": 1.0, "Please upload both images": 0.0}
-    start_time = time.time()
-    c_img = Image.fromarray(control_image).convert("RGB")
-    v_img = Image.fromarray(variant_image).convert("RGB")
-    # Extract OCR text from both images (this is crucial for model performance)
     try:
-        c_text_str = pytesseract.image_to_string(c_img)
-        v_text_str = pytesseract.image_to_string(v_img)
-        print(f"📝 OCR extracted - Control: {len(c_text_str)} chars, Variant: {len(v_text_str)} chars")
-    except pytesseract.TesseractNotFoundError:
-        print("🛑 Tesseract is not installed or not in your PATH. Skipping OCR.")
-        c_text_str, v_text_str = "", ""
-    # Get confidence data for this combination
-    confidence_data = get_confidence_data(business_model, customer_type, conversion_type, industry, page_type)
-    with torch.no_grad():
-        c_pix = image_processor(images=c_img, return_tensors="pt").pixel_values.to(device)
-        v_pix = image_processor(images=v_img, return_tensors="pt").pixel_values.to(device)
-        # Process OCR text through the text model
-        c_text = tokenizer(c_text_str, padding='max_length', truncation=True, max_length=MAX_TEXT_LENGTH, return_tensors='pt').to(device)
-        v_text = tokenizer(v_text_str, padding='max_length', truncation=True, max_length=MAX_TEXT_LENGTH, return_tensors='pt').to(device)
-        cat_inputs = [business_model, customer_type, conversion_type, industry, page_type]
-        cat_codes = [category_mappings[name]['categories'].index(val) for name, val in zip(CATEGORICAL_FEATURES, cat_inputs)]
-        cat_feats = torch.tensor([cat_codes], dtype=torch.int64).to(device)
-        # Run the multimodal model prediction
-        logits = model(
-            c_pix=c_pix, v_pix=v_pix,
-            c_tok=c_text['input_ids'], c_attn=c_text['attention_mask'],
-            v_tok=v_text['input_ids'], v_attn=v_text['attention_mask'],
-            cat_feats=cat_feats
-        )
-        probability = torch.sigmoid(logits).item()
-    processing_time = time.time() - start_time
-    # Log GPU memory usage for monitoring
-    if torch.cuda.is_available():
-        gpu_memory = torch.cuda.memory_allocated() / 1024**3
-        print(f"🚀 Prediction completed in {processing_time:.2f}s | GPU Memory: {gpu_memory:.1f}GB")
-    else:
-        print(f"🚀 Prediction completed in {processing_time:.2f}s")
-    # Determine winner
-    winner = "VARIANT WINS" if probability > 0.5 else "CONTROL WINS"
-    confidence_percentage = confidence_data['accuracy'] * 100
-    # Create enhanced output with confidence scores and training data info
-    result = {
-        f"🏆 {winner}": f"{probability:.3f}",
-        f"📊 Model Confidence": f"{confidence_percentage:.1f}%",
-        f"📈 Training Data": f"{confidence_data['training_data_count']} samples",
-        f"✅ Historical Accuracy": f"{confidence_data['correct_predictions']}/{confidence_data['count']} correct",
-        f"🎯 Win/Loss Ratio": f"{confidence_data['actual_wins']} wins in {confidence_data['count']} tests"
-    }
-    return result
 @spaces.GPU
 def predict_batch(csv_path, control_img_dir, variant_img_dir, num_samples):

 @spaces.GPU(duration=180)  # Extended duration for maximum concurrent load
 def predict_single(control_image, variant_image, business_model, customer_type, conversion_type, industry, page_type):
     """Orchestrates the prediction for a single pair of images and features."""
     try:
+        if control_image is None or variant_image is None:
+            return {"Error": 1.0, "Please upload both images": 0.0}
+        start_time = time.time()
+        print(f"🔍 Starting prediction with categories: {business_model} | {customer_type} | {conversion_type} | {industry} | {page_type}")
+        c_img = Image.fromarray(control_image).convert("RGB")
+        v_img = Image.fromarray(variant_image).convert("RGB")
+        # Extract OCR text from both images (this is crucial for model performance)
+        try:
+            c_text_str = pytesseract.image_to_string(c_img)
+            v_text_str = pytesseract.image_to_string(v_img)
+            print(f"📝 OCR extracted - Control: {len(c_text_str)} chars, Variant: {len(v_text_str)} chars")
+        except pytesseract.TesseractNotFoundError:
+            print("🛑 Tesseract is not installed or not in your PATH. Skipping OCR.")
+            c_text_str, v_text_str = "", ""
+        # Get confidence data for this combination
+        confidence_data = get_confidence_data(business_model, customer_type, conversion_type, industry, page_type)
+        print(f"📊 Confidence data loaded: {confidence_data}")
+        with torch.no_grad():
+            c_pix = image_processor(images=c_img, return_tensors="pt").pixel_values.to(device)
+            v_pix = image_processor(images=v_img, return_tensors="pt").pixel_values.to(device)
+            # Process OCR text through the text model
+            c_text = tokenizer(c_text_str, padding='max_length', truncation=True, max_length=MAX_TEXT_LENGTH, return_tensors='pt').to(device)
+            v_text = tokenizer(v_text_str, padding='max_length', truncation=True, max_length=MAX_TEXT_LENGTH, return_tensors='pt').to(device)
+            cat_inputs = [business_model, customer_type, conversion_type, industry, page_type]
+            cat_codes = [category_mappings[name]['categories'].index(val) for name, val in zip(CATEGORICAL_FEATURES, cat_inputs)]
+            cat_feats = torch.tensor([cat_codes], dtype=torch.int64).to(device)
+            # Run the multimodal model prediction
+            logits = model(
+                c_pix=c_pix, v_pix=v_pix,
+                c_tok=c_text['input_ids'], c_attn=c_text['attention_mask'],
+                v_tok=v_text['input_ids'], v_attn=v_text['attention_mask'],
+                cat_feats=cat_feats
+            )
+            probability = torch.sigmoid(logits).item()
+        processing_time = time.time() - start_time
+        # Log GPU memory usage for monitoring
+        if torch.cuda.is_available():
+            gpu_memory = torch.cuda.memory_allocated() / 1024**3
+            print(f"🚀 Prediction completed in {processing_time:.2f}s | GPU Memory: {gpu_memory:.1f}GB")
+        else:
+            print(f"🚀 Prediction completed in {processing_time:.2f}s")
+        # Determine winner
+        winner = "VARIANT WINS" if probability > 0.5 else "CONTROL WINS"
+        confidence_percentage = confidence_data['accuracy'] * 100
+        # Create enhanced output with confidence scores and training data info
+        result = {
+            f"🏆 {winner}": f"{probability:.3f}",
+            f"📊 Model Confidence": f"{confidence_percentage:.1f}%",
+            f"📈 Training Data": f"{confidence_data['training_data_count']} samples",
+            f"✅ Historical Accuracy": f"{confidence_data['correct_predictions']}/{confidence_data['count']} correct",
+            f"🎯 Win/Loss Ratio": f"{confidence_data['actual_wins']} wins in {confidence_data['count']} tests"
+        }
+        print(f"🎯 Final result: {result}")
+        return result
+    except Exception as e:
+        print(f"❌ ERROR in predict_single: {e}")
+        print(f"🔍 Error type: {type(e).__name__}")
+        import traceback
+        traceback.print_exc()
+        # Return error result with fallback confidence data
+        return {
+            "❌ Error": f"Prediction failed: {str(e)}",
+            f"📊 Model Confidence": "50.0%",
+            f"📈 Training Data": "0 samples",
+            f"✅ Historical Accuracy": "0/0 correct",
+            f"🎯 Win/Loss Ratio": "0 wins in 0 tests"
+        }
 @spaces.GPU
 def predict_batch(csv_path, control_img_dir, variant_img_dir, num_samples):