Spaces:

Oamitai
/

Set-Game-Solver

Sleeping

App Files Files Community

Oamitai commited on Feb 15, 2025

Commit

1f1a91e

verified ·

1 Parent(s): 9b24efb

Update app.py

Browse files

Files changed (1) hide show

app.py +272 -240

app.py CHANGED Viewed

@@ -1,240 +1,272 @@
-import cv2
-import numpy as np
-import tensorflow as tf
-import torch
-from ultralytics import YOLO
-from PIL import Image
-import gradio as gr
-import traceback
-import pandas as pd
-from itertools import combinations
-from huggingface_hub import hf_hub_download
-import spaces  # For ZeroGPU support
-# =============================================================================
-#                           MODEL LOADING (Keras Models on CPU)
-# =============================================================================
-# These models can be loaded globally.
-shape_classification_model = tf.keras.models.load_model(
-    hf_hub_download("Oamitai/shape-classification", "shape_model.keras")
-)
-fill_classification_model = tf.keras.models.load_model(
-    hf_hub_download("Oamitai/fill-classification", "fill_model.keras")
-)
-# Global YOLO models will be loaded lazily inside the GPU function.
-global_card_detection_model = None
-global_shape_detection_model = None
-# =============================================================================
-#                  ORIENTATION CORRECTION FUNCTIONS
-# =============================================================================
-def check_and_rotate_input_image(board_image, card_boxes):
-    if len(card_boxes) == 0:
-        return board_image, False
-    total_width = total_height = 0
-    for box in card_boxes:
-        x1, y1, x2, y2 = box
-        total_width += (x2 - x1)
-        total_height += (y2 - y1)
-    avg_width = total_width / len(card_boxes)
-    avg_height = total_height / len(card_boxes)
-    if avg_height > avg_width:
-        rotated_image = cv2.rotate(board_image, cv2.ROTATE_90_CLOCKWISE)
-        return rotated_image, True
-    else:
-        return board_image, False
-def restore_original_orientation(image, was_rotated):
-    if was_rotated:
-        return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
-    return image
-# =============================================================================
-#                        PREDICTION FUNCTIONS
-# =============================================================================
-def predict_color(shape_image):
-    hsv_image = cv2.cvtColor(shape_image, cv2.COLOR_BGR2HSV)
-    green_mask = cv2.inRange(hsv_image, np.array([40, 50, 50]), np.array([80, 255, 255]))
-    purple_mask = cv2.inRange(hsv_image, np.array([120, 50, 50]), np.array([160, 255, 255]))
-    red_mask1 = cv2.inRange(hsv_image, np.array([0, 50, 50]), np.array([10, 255, 255]))
-    red_mask2 = cv2.inRange(hsv_image, np.array([170, 50, 50]), np.array([180, 255, 255]))
-    red_mask = cv2.bitwise_or(red_mask1, red_mask2)
-    color_counts = {
-        'green': cv2.countNonZero(green_mask),
-        'purple': cv2.countNonZero(purple_mask),
-        'red': cv2.countNonZero(red_mask)
-    }
-    return max(color_counts, key=color_counts.get)
-def predict_card_features(card_image, shape_detection_model, fill_model, shape_model, box):
-    shape_results = shape_detection_model(card_image)
-    card_height, card_width = card_image.shape[:2]
-    card_area = card_width * card_height
-    filtered_boxes = []
-    for detected_box in shape_results[0].boxes.xyxy.cpu().numpy():
-        x1, y1, x2, y2 = detected_box.astype(int)
-        shape_area = (x2 - x1) * (y2 - y1)
-        if shape_area > 0.03 * card_area:
-            filtered_boxes.append([x1, y1, x2, y2])
-    if len(filtered_boxes) == 0:
-        return {'count': 0, 'color': 'unknown', 'fill': 'unknown', 'shape': 'unknown', 'box': box}
-    fill_input_shape = fill_model.input_shape[1:3]
-    shape_input_shape = shape_model.input_shape[1:3]
-    fill_imgs = []
-    shape_imgs = []
-    color_list = []
-    for fb in filtered_boxes:
-        x1, y1, x2, y2 = fb
-        shape_img = card_image[y1:y2, x1:x2]
-        fill_img = cv2.resize(shape_img, tuple(fill_input_shape)) / 255.0
-        shape_img_resized = cv2.resize(shape_img, tuple(shape_input_shape)) / 255.0
-        fill_imgs.append(fill_img)
-        shape_imgs.append(shape_img_resized)
-        color_list.append(predict_color(shape_img))
-    fill_imgs = np.array(fill_imgs)
-    shape_imgs = np.array(shape_imgs)
-    fill_preds = fill_model.predict(fill_imgs, batch_size=len(fill_imgs))
-    shape_preds = shape_model.predict(shape_imgs, batch_size=len(shape_imgs))
-    fill_labels_list = ['empty', 'full', 'striped']
-    shape_labels_list = ['diamond', 'oval', 'squiggle']
-    predicted_fill = [fill_labels_list[np.argmax(pred)] for pred in fill_preds]
-    predicted_shape = [shape_labels_list[np.argmax(pred)] for pred in shape_preds]
-    count = min(len(filtered_boxes), 3)
-    final_color = max(set(color_list), key=color_list.count)
-    final_fill = max(set(predicted_fill), key=predicted_fill.count)
-    final_shape = max(set(predicted_shape), key=predicted_shape.count)
-    return {'count': count, 'color': final_color, 'fill': final_fill, 'shape': final_shape, 'box': box}
-def is_set(cards):
-    for feature in ['Count', 'Color', 'Fill', 'Shape']:
-        if len({card[feature] for card in cards}) not in [1, 3]:
-            return False
-    return True
-def find_sets(card_df):
-    sets_found = []
-    for combo in combinations(card_df.iterrows(), 3):
-        cards = [entry[1] for entry in combo]
-        if is_set(cards):
-            set_info = {
-                'set_indices': [entry[0] for entry in combo],
-                'cards': [{feature: card[feature] for feature in ['Count', 'Color', 'Fill', 'Shape', 'Coordinates']} for card in cards]
-            }
-            sets_found.append(set_info)
-    return sets_found
-def detect_cards_from_image(board_image, card_detection_model):
-    card_results = card_detection_model(board_image)
-    card_boxes = card_results[0].boxes.xyxy.cpu().numpy().astype(int)
-    cards = []
-    for box in card_boxes:
-        x1, y1, x2, y2 = box
-        card_img = board_image[y1:y2, x1:x2]
-        cards.append((card_img, box))
-    return cards, card_boxes
-def classify_cards_from_board_image(board_image, card_boxes, shape_detection_model, fill_model, shape_model):
-    card_data = []
-    for box in card_boxes:
-        x1, y1, x2, y2 = box
-        card_img = board_image[y1:y2, x1:x2]
-        features = predict_card_features(card_img, shape_detection_model, fill_model, shape_model, box)
-        card_data.append({
-            "Count": features['count'],
-            "Color": features['color'],
-            "Fill": features['fill'],
-            "Shape": features['shape'],
-            "Coordinates": f"{box[0]}, {box[1]}, {box[2]}, {box[3]}"
-        })
-    return pd.DataFrame(card_data)
-def classify_and_find_sets_from_array(board_image, card_detection_model, shape_detection_model, fill_model, shape_model):
-    _, card_boxes = detect_cards_from_image(board_image, card_detection_model)
-    board_image, was_rotated = check_and_rotate_input_image(board_image, card_boxes)
-    if was_rotated:
-        _, card_boxes = detect_cards_from_image(board_image, card_detection_model)
-    card_df = classify_cards_from_board_image(board_image, card_boxes, shape_detection_model, fill_model, shape_model)
-    sets_found = find_sets(card_df)
-    annotated_image = draw_sets_on_image(board_image.copy(), sets_found)
-    final_image = restore_original_orientation(annotated_image, was_rotated)
-    return sets_found, final_image
-# =============================================================================
-#                          DRAWING FUNCTIONS
-# =============================================================================
-def draw_sets_on_image(board_image, sets_info):
-    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255),
-              (255, 255, 0), (255, 0, 255), (0, 255, 255)]
-    base_thickness = 8
-    base_expansion = 5
-    for index, set_info in enumerate(sets_info):
-        color = colors[index % len(colors)]
-        thickness = base_thickness + 2 * index
-        expansion = base_expansion + 15 * index
-        for i, card in enumerate(set_info['cards']):
-            coordinates = list(map(int, card['Coordinates'].split(',')))
-            x1, y1, x2, y2 = coordinates
-            x1_expanded = max(0, x1 - expansion)
-            y1_expanded = max(0, y1 - expansion)
-            x2_expanded = min(board_image.shape[1], x2 + expansion)
-            y2_expanded = min(board_image.shape[0], y2 + expansion)
-            cv2.rectangle(board_image, (x1_expanded, y1_expanded),
-                          (x2_expanded, y2_expanded), color, thickness)
-            if i == 0:
-                cv2.putText(board_image, f"Set {index + 1}", (x1_expanded, y1_expanded - 10),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, thickness)
-    return board_image
-# =============================================================================
-#                     GRADIO INTERFACE FUNCTION (ZeroGPU)
-# =============================================================================
-@spaces.GPU(duration=280)
-def detect_and_display_sets_interface(input_image):
-    try:
-        image_cv = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
-        global global_card_detection_model, global_shape_detection_model
-        # Lazy load YOLO models on GPU after allocation.
-        if global_card_detection_model is None:
-            card_model_path = hf_hub_download("Oamitai/card-detection", "best.pt")
-            global_card_detection_model = YOLO(card_model_path)
-            global_card_detection_model.conf = 0.5
-            global_card_detection_model.to("cuda")
-        if global_shape_detection_model is None:
-            shape_model_path = hf_hub_download("Oamitai/shape-detection", "best.pt")
-            global_shape_detection_model = YOLO(shape_model_path)
-            global_shape_detection_model.conf = 0.5
-            global_shape_detection_model.to("cuda")
-        sets_found, final_image = classify_and_find_sets_from_array(
-            image_cv,
-            global_card_detection_model,
-            global_shape_detection_model,
-            fill_classification_model,
-            shape_classification_model
-        )
-        final_image_rgb = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
-        return Image.fromarray(final_image_rgb), "Sets detected successfully."
-    except Exception as e:
-        err = f"❌ Error: {str(e)}\n{traceback.format_exc()}"
-        return Image.fromarray(np.zeros((100, 100, 3), dtype=np.uint8)), err
-# =============================================================================
-#                             LAUNCH GRADIO
-# =============================================================================
-iface = gr.Interface(
-    fn=detect_and_display_sets_interface,
-    inputs=gr.Image(type="pil", label="Upload Board Image"),
-    outputs=[gr.Image(type="pil", label="Annotated Image"), gr.Textbox(label="Status")],
-    title="Set Game Detector",
-    description=("Upload an image of a Set game board to detect cards, "
-                 "classify their features, and highlight valid sets.")
-)
-if __name__ == "__main__":
-    iface.launch()

+import gradio as gr
+import spaces
+from huggingface_hub import hf_hub_download
+import cv2
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.models import load_model
+import torch
+from ultralytics import YOLO
+from PIL import Image
+import traceback
+import json
+import pandas as pd
+from itertools import combinations
+from pathlib import Path
+# =============================================================================
+#                           MODEL LOADING
+# =============================================================================
+# Load YOLO Card Detection Model from HuggingFace Hub
+card_model_path = hf_hub_download("Oamitai/card-detection", "best.pt")
+card_detection_model = YOLO(card_model_path)
+card_detection_model.conf = 0.5
+# Load YOLO Shape Detection Model from HuggingFace Hub
+shape_model_path = hf_hub_download("Oamitai/shape-detection", "best.pt")
+shape_detection_model = YOLO(shape_model_path)
+shape_detection_model.conf = 0.5
+# Load Shape Classification Model (Keras) from HuggingFace Hub
+shape_classification_model = load_model(
+    hf_hub_download("Oamitai/shape-classification", "shape_model.keras")
+)
+# Load Fill Classification Model (Keras) from HuggingFace Hub
+fill_classification_model = load_model(
+    hf_hub_download("Oamitai/fill-classification", "fill_model.keras")
+)
+# =============================================================================
+#                     UTILITY & PROCESSING FUNCTIONS
+# =============================================================================
+def check_and_rotate_input_image(board_image: np.ndarray, detector) -> (np.ndarray, bool):
+    """
+    Detect card regions and determine if the image needs to be rotated.
+    """
+    card_results = detector(board_image)
+    card_boxes = card_results[0].boxes.xyxy.cpu().numpy().astype(int)
+    if card_boxes.size == 0:
+        return board_image, False
+    widths = card_boxes[:, 2] - card_boxes[:, 0]
+    heights = card_boxes[:, 3] - card_boxes[:, 1]
+    if np.mean(heights) > np.mean(widths):
+        return cv2.rotate(board_image, cv2.ROTATE_90_CLOCKWISE), True
+    return board_image, False
+def restore_original_orientation(image: np.ndarray, was_rotated: bool) -> np.ndarray:
+    """
+    Restore the original orientation of the image if it was rotated.
+    """
+    if was_rotated:
+        return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    return image
+def predict_color(shape_image: np.ndarray) -> str:
+    """
+    Determine the dominant color in a shape image using HSV thresholds.
+    """
+    hsv_image = cv2.cvtColor(shape_image, cv2.COLOR_BGR2HSV)
+    green_mask = cv2.inRange(hsv_image, np.array([40, 50, 50]), np.array([80, 255, 255]))
+    purple_mask = cv2.inRange(hsv_image, np.array([120, 50, 50]), np.array([160, 255, 255]))
+    red_mask1 = cv2.inRange(hsv_image, np.array([0, 50, 50]), np.array([10, 255, 255]))
+    red_mask2 = cv2.inRange(hsv_image, np.array([170, 50, 50]), np.array([180, 255, 255]))
+    red_mask = cv2.bitwise_or(red_mask1, red_mask2)
+    color_counts = {
+        'green': cv2.countNonZero(green_mask),
+        'purple': cv2.countNonZero(purple_mask),
+        'red': cv2.countNonZero(red_mask)
+    }
+    return max(color_counts, key=color_counts.get)
+def predict_card_features(card_image: np.ndarray, shape_detector, fill_model, shape_model, box: list) -> dict:
+    """
+    Detect and classify features on a card image.
+    """
+    shape_results = shape_detector(card_image)
+    card_h, card_w = card_image.shape[:2]
+    card_area = card_w * card_h
+    filtered_boxes = [
+        [int(x1), int(y1), int(x2), int(y2)]
+        for x1, y1, x2, y2 in shape_results[0].boxes.xyxy.cpu().numpy()
+        if (x2 - x1) * (y2 - y1) > 0.03 * card_area
+    ]
+    if not filtered_boxes:
+        return {'count': 0, 'color': 'unknown', 'fill': 'unknown', 'shape': 'unknown', 'box': box}
+    fill_input_shape = fill_model.input_shape[1:3]
+    shape_input_shape = shape_model.input_shape[1:3]
+    fill_imgs, shape_imgs, color_list = [], [], []
+    for fb in filtered_boxes:
+        x1, y1, x2, y2 = fb
+        shape_img = card_image[y1:y2, x1:x2]
+        fill_img = cv2.resize(shape_img, tuple(fill_input_shape)) / 255.0
+        shape_img_resized = cv2.resize(shape_img, tuple(shape_input_shape)) / 255.0
+        fill_imgs.append(fill_img)
+        shape_imgs.append(shape_img_resized)
+        color_list.append(predict_color(shape_img))
+    fill_imgs = np.array(fill_imgs)
+    shape_imgs = np.array(shape_imgs)
+    fill_preds = fill_model.predict(fill_imgs, batch_size=len(fill_imgs))
+    shape_preds = shape_model.predict(shape_imgs, batch_size=len(shape_imgs))
+    fill_labels_list = ['empty', 'full', 'striped']
+    shape_labels_list = ['diamond', 'oval', 'squiggle']
+    predicted_fill = [fill_labels_list[np.argmax(pred)] for pred in fill_preds]
+    predicted_shape = [shape_labels_list[np.argmax(pred)] for pred in shape_preds]
+    color_label = max(set(color_list), key=color_list.count)
+    fill_label = max(set(predicted_fill), key=predicted_fill.count)
+    shape_label = max(set(predicted_shape), key=predicted_shape.count)
+    return {'count': len(filtered_boxes), 'color': color_label,
+            'fill': fill_label, 'shape': shape_label, 'box': box}
+def is_set(cards: list) -> bool:
+    """
+    Check if a group of cards forms a valid set. For each feature,
+    values must be all identical or all distinct.
+    """
+    for feature in ['Count', 'Color', 'Fill', 'Shape']:
+        if len({card[feature] for card in cards}) not in [1, 3]:
+            return False
+    return True
+def find_sets(card_df: pd.DataFrame) -> list:
+    """
+    Iterate over all combinations of three cards to identify valid sets.
+    """
+    sets_found = []
+    for combo in combinations(card_df.iterrows(), 3):
+        cards = [entry[1] for entry in combo]
+        if is_set(cards):
+            sets_found.append({
+                'set_indices': [entry[0] for entry in combo],
+                'cards': [{feature: card[feature] for feature in
+                           ['Count', 'Color', 'Fill', 'Shape', 'Coordinates']} for card in cards]
+            })
+    return sets_found
+def detect_cards_from_image(board_image: np.ndarray, detector) -> list:
+    """
+    Extract card regions from the board image using the YOLO card detection model.
+    """
+    card_results = detector(board_image)
+    card_boxes = card_results[0].boxes.xyxy.cpu().numpy().astype(int)
+    return [(board_image[y1:y2, x1:x2], [x1, y1, x2, y2]) for x1, y1, x2, y2 in card_boxes]
+def classify_cards_from_board_image(board_image: np.ndarray, card_detector, shape_detector, fill_model, shape_model) -> pd.DataFrame:
+    """
+    Detect cards from the board image and classify their features.
+    """
+    cards = detect_cards_from_image(board_image, card_detector)
+    card_data = []
+    for card_image, box in cards:
+        features = predict_card_features(card_image, shape_detector, fill_model, shape_model, box)
+        card_data.append({
+            "Count": features['count'],
+            "Color": features['color'],
+            "Fill": features['fill'],
+            "Shape": features['shape'],
+            "Coordinates": f"{box[0]}, {box[1]}, {box[2]}, {box[3]}"
+        })
+    return pd.DataFrame(card_data)
+def draw_sets_on_image(board_image: np.ndarray, sets_info: list) -> np.ndarray:
+    """
+    Draw bounding boxes and labels for each detected set on the board image.
+    """
+    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255),
+              (255, 255, 0), (255, 0, 255), (0, 255, 255)]
+    base_thickness = 8
+    base_expansion = 5
+    for index, set_info in enumerate(sets_info):
+        color = colors[index % len(colors)]
+        thickness = base_thickness + 2 * index
+        expansion = base_expansion + 15 * index
+        for i, card in enumerate(set_info['cards']):
+            coordinates = list(map(int, card['Coordinates'].split(',')))
+            x1, y1, x2, y2 = coordinates
+            x1_exp = max(0, x1 - expansion)
+            y1_exp = max(0, y1 - expansion)
+            x2_exp = min(board_image.shape[1], x2 + expansion)
+            y2_exp = min(board_image.shape[0], y2 + expansion)
+            cv2.rectangle(board_image, (x1_exp, y1_exp), (x2_exp, y2_exp), color, thickness)
+            if i == 0:
+                cv2.putText(board_image, f"Set {index + 1}", (x1_exp, y1_exp - 10),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, thickness)
+    return board_image
+def classify_and_find_sets_from_array(board_image: np.ndarray, card_detector, shape_detector, fill_model, shape_model) -> (list, np.ndarray):
+    """
+    Process the input image: adjust orientation, classify card features, detect sets, and annotate the image.
+    """
+    processed_image, was_rotated = check_and_rotate_input_image(board_image, card_detector)
+    card_df = classify_cards_from_board_image(processed_image, card_detector, shape_detector, fill_model, shape_model)
+    sets_found = find_sets(card_df)
+    annotated_image = draw_sets_on_image(processed_image.copy(), sets_found)
+    final_image = restore_original_orientation(annotated_image, was_rotated)
+    return sets_found, final_image
+# =============================================================================
+#                      GRADIO INFERENCE FUNCTION
+# =============================================================================
+@spaces.GPU()
+def detect_sets(input_image: Image.Image):
+    """
+    Process an uploaded image and return the annotated image along with detected sets info.
+    """
+    try:
+        # Convert the PIL image to OpenCV BGR format
+        image_cv = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
+        # Run the detection pipeline
+        sets_info, annotated_image = classify_and_find_sets_from_array(
+            image_cv,
+            card_detection_model,
+            shape_detection_model,
+            fill_classification_model,
+            shape_classification_model
+        )
+        # Convert annotated image back to RGB for display
+        annotated_image_rgb = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
+        return annotated_image_rgb, json.dumps(sets_info, indent=2)
+    except Exception:
+        return None, f"Error occurred: {traceback.format_exc()}"
+# =============================================================================
+#                           GRADIO INTERFACE
+# =============================================================================
+with gr.Blocks(css="#col-container { margin: 0 auto; max-width: 800px; }") as demo:
+    gr.Markdown("# Set Game Detector\nUpload an image of a Set game board to detect valid sets.")
+    with gr.Row(elem_id="col-container"):
+        image_input = gr.Image(label="Upload Set Game Board", type="pil")
+        detect_button = gr.Button("Detect Sets")
+    with gr.Row():
+        result_image = gr.Image(label="Annotated Image")
+        result_info = gr.JSON(label="Detected Sets Info")
+    detect_button.click(
+        detect_sets,
+        inputs=[image_input],
+        outputs=[result_image, result_info]
+    )
+# =============================================================================
+#                          LAUNCH THE APP
+# =============================================================================
+demo.launch()