Spaces:

Aff77
/

captcha_stn

Sleeping

App Files Files Community

Aff77 commited on May 23, 2025

Commit

297f772

verified ·

1 Parent(s): d036b64

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -13

app.py CHANGED Viewed

@@ -55,23 +55,28 @@ model = load_model()
 # --------------------------
 # Prediction Logic
 # --------------------------
 def decode_predictions(preds):
-    """Convert model output to text using CTC decoding"""
     preds = preds.permute(1, 0, 2)  # [B, W, C]
-    _, pred_indices = preds.max(2)
     texts = []
     for pred in pred_indices:
-        # CTC decoding: merge repeated and remove blank
         decoded = []
         prev_char = None
         for idx in pred:
-            char = idx_to_char.get(idx.item(), '')
-            if char != prev_char and char != '' and idx.item() != (VOCAB_SIZE - 1):
-                decoded.append(char)
-            prev_char = char
         texts.append(''.join(decoded))
-    return texts[0] if len(texts) == 1 else texts
 def preprocess_image(image):
     """Convert input to model-compatible format"""
@@ -85,22 +90,50 @@ def preprocess_image(image):
 def predict(image):
     try:
-        # Handle Gradio input types
         if isinstance(image, dict):
             image = image['image'] if 'image' in image else image['data']
         if not isinstance(image, Image.Image):
-            image = Image.fromarray(image)
-        # Process and predict
-        image_tensor = preprocess_image(image)
         with torch.no_grad():
             outputs = model(image_tensor)
             prediction = decode_predictions(outputs)
         return prediction
     except Exception as e:
-        return f"Error: {str(e)}"
 # --------------------------
 # Gradio Interface

 # --------------------------
 # Prediction Logic
 # --------------------------
 def decode_predictions(preds):
+    """More robust CTC decoding"""
     preds = preds.permute(1, 0, 2)  # [B, W, C]
+    preds = torch.softmax(preds, dim=2)
+    pred_indices = torch.argmax(preds, dim=2)
     texts = []
     for pred in pred_indices:
+        # Merge repeated and remove blank (VOCAB_SIZE-1)
         decoded = []
         prev_char = None
         for idx in pred:
+            char_idx = idx.item()
+            if char_idx < len(idx_to_char) and char_idx != (VOCAB_SIZE - 1):
+                char = idx_to_char[char_idx]
+                if char != prev_char:
+                    decoded.append(char)
+                prev_char = char
         texts.append(''.join(decoded))
+    return texts[0] if len(texts) == 1 else texts
 def preprocess_image(image):
     """Convert input to model-compatible format"""
 def predict(image):
     try:
+        print("\n=== New Prediction ===")  # Debug separator
+        # 1. Log input type
+        print(f"Input type: {type(image)}")
         if isinstance(image, dict):
+            print(f"Dict keys: {image.keys()}")
             image = image['image'] if 'image' in image else image['data']
+        # 2. Convert to PIL Image
         if not isinstance(image, Image.Image):
+            print("Converting to PIL Image...")
+            try:
+                image = Image.fromarray(image)
+            except Exception as conv_err:
+                print(f"Conversion error: {conv_err}")
+                return f"Image conversion failed: {conv_err}"
+        # 3. Verify image
+        print(f"Image mode: {image.mode}, size: {image.size}")
+        if image.mode != 'L':
+            print("Converting to grayscale...")
+            image = image.convert('L')
+        # 4. Preprocess
+        try:
+            image_tensor = preprocess_image(image)
+            print(f"Tensor shape: {image_tensor.shape}")
+        except Exception as preprocess_err:
+            print(f"Preprocessing error: {preprocess_err}")
+            return f"Preprocessing failed: {preprocess_err}"
+        # 5. Predict
         with torch.no_grad():
             outputs = model(image_tensor)
+            print(f"Raw model output shape: {outputs.shape}")
             prediction = decode_predictions(outputs)
+            print(f"Final prediction: {prediction}")
         return prediction
     except Exception as e:
+        error_msg = f"Full error: {str(e)}"
+        print(error_msg)
+        return error_msg
 # --------------------------
 # Gradio Interface