import os import cv2 import numpy as np import pandas as pd import tensorflow as tf from tensorflow.keras.models import load_model import torch from ultralytics import YOLO from itertools import combinations import gradio as gr import traceback import time from typing import Tuple, List, Dict import logging # Force CPU mode for TensorFlow os.environ["CUDA_VISIBLE_DEVICES"] = "-1" tf.config.set_visible_devices([], 'GPU') # Import spaces for ZeroGPU wrapper try: import spaces except ImportError: # Create a dummy spaces class for local development class spaces: @staticmethod def GPU(func): return func # ============================================================================= # LOGGING CONFIGURATION # ============================================================================= logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("set_detector") # ============================================================================= # MODEL LOADING # ============================================================================= # Global variables for model caching _CARD_DETECTOR = None _SHAPE_DETECTOR = None _SHAPE_CLASSIFIER = None _FILL_CLASSIFIER = None def load_models(): """ Load all models needed for SET detection in CPU-only mode. Returns tuple of (card_detector, shape_detector, shape_classifier, fill_classifier) """ global _CARD_DETECTOR, _SHAPE_DETECTOR, _SHAPE_CLASSIFIER, _FILL_CLASSIFIER # Return cached models if already loaded if all([_CARD_DETECTOR, _SHAPE_DETECTOR, _SHAPE_CLASSIFIER, _FILL_CLASSIFIER]): logger.info("Using cached models") return _CARD_DETECTOR, _SHAPE_DETECTOR, _SHAPE_CLASSIFIER, _FILL_CLASSIFIER try: from huggingface_hub import hf_hub_download logger.info("Loading models from Hugging Face Hub...") # Load Shape Classification Model (TensorFlow) logger.info("Loading shape classification model...") shape_classifier = load_model( hf_hub_download("Oamitai/shape-classification", "shape_model.keras") ) # Load Fill Classification Model (TensorFlow) logger.info("Loading fill classification model...") fill_classifier = load_model( hf_hub_download("Oamitai/fill-classification", "fill_model.keras") ) # Load YOLO Card Detection Model (PyTorch) logger.info("Loading card detection model...") card_model_path = hf_hub_download("Oamitai/card-detection", "best.pt") card_detector = YOLO(card_model_path) card_detector.conf = 0.5 # Load YOLO Shape Detection Model (PyTorch) logger.info("Loading shape detection model...") shape_model_path = hf_hub_download("Oamitai/shape-detection", "best.pt") shape_detector = YOLO(shape_model_path) shape_detector.conf = 0.5 # Explicitly set to CPU mode logger.info("Setting models to CPU mode...") card_detector.to("cpu") shape_detector.to("cpu") # Cache the models _CARD_DETECTOR = card_detector _SHAPE_DETECTOR = shape_detector _SHAPE_CLASSIFIER = shape_classifier _FILL_CLASSIFIER = fill_classifier logger.info("All models loaded successfully in CPU mode!") return card_detector, shape_detector, shape_classifier, fill_classifier except Exception as e: error_msg = f"Error loading models: {str(e)}" logger.error(error_msg) logger.error(traceback.format_exc()) raise ValueError(error_msg) # ============================================================================= # UTILITY & DETECTION FUNCTIONS # ============================================================================= def verify_and_rotate_image(board_image: np.ndarray, card_detector: YOLO) -> Tuple[np.ndarray, bool]: """ Checks if the detected cards are oriented primarily vertically or horizontally. If they're vertical, rotates the board_image 90 degrees clockwise for consistent processing. Returns (possibly_rotated_image, was_rotated_flag). """ detection = card_detector(board_image) boxes = detection[0].boxes.xyxy.cpu().numpy().astype(int) if boxes.size == 0: return board_image, False widths = boxes[:, 2] - boxes[:, 0] heights = boxes[:, 3] - boxes[:, 1] # Rotate if average height > average width if np.mean(heights) > np.mean(widths): return cv2.rotate(board_image, cv2.ROTATE_90_CLOCKWISE), True else: return board_image, False def restore_orientation(img: np.ndarray, was_rotated: bool) -> np.ndarray: """ Restores original orientation if the image was previously rotated. """ if was_rotated: return cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE) return img def predict_color(img_bgr: np.ndarray) -> str: """ Rough color classification using HSV thresholds to differentiate 'red', 'green', 'purple'. """ hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV) mask_green = cv2.inRange(hsv, np.array([40, 50, 50]), np.array([80, 255, 255])) mask_purple = cv2.inRange(hsv, np.array([120, 50, 50]), np.array([160, 255, 255])) # Red can wrap around hue=0, so we combine both ends mask_red1 = cv2.inRange(hsv, np.array([0, 50, 50]), np.array([10, 255, 255])) mask_red2 = cv2.inRange(hsv, np.array([170, 50, 50]), np.array([180, 255, 255])) mask_red = cv2.bitwise_or(mask_red1, mask_red2) counts = { "green": cv2.countNonZero(mask_green), "purple": cv2.countNonZero(mask_purple), "red": cv2.countNonZero(mask_red), } return max(counts, key=counts.get) def detect_cards(board_img: np.ndarray, card_detector: YOLO) -> List[Tuple[np.ndarray, List[int]]]: """ Runs YOLO on the board_img to detect card bounding boxes. Returns a list of (card_image, [x1, y1, x2, y2]) for each detected card. """ result = card_detector(board_img) boxes = result[0].boxes.xyxy.cpu().numpy().astype(int) detected_cards = [] for x1, y1, x2, y2 in boxes: detected_cards.append((board_img[y1:y2, x1:x2], [x1, y1, x2, y2])) return detected_cards def predict_card_features( card_img: np.ndarray, shape_detector: YOLO, fill_model: tf.keras.Model, shape_model: tf.keras.Model, card_box: List[int] ) -> Dict: """ Predicts the 'count', 'color', 'fill', 'shape' features for a single card. It uses a shape_detector YOLO model to locate shapes, then passes them to fill_model and shape_model. """ # Detect shapes on the card shape_detections = shape_detector(card_img) c_h, c_w = card_img.shape[:2] card_area = c_w * c_h # Filter out spurious shape detections shape_boxes = [] for coords in shape_detections[0].boxes.xyxy.cpu().numpy(): x1, y1, x2, y2 = coords.astype(int) if (x2 - x1) * (y2 - y1) > 0.03 * card_area: shape_boxes.append([x1, y1, x2, y2]) if not shape_boxes: return { 'count': 0, 'color': 'unknown', 'fill': 'unknown', 'shape': 'unknown', 'box': card_box } fill_input_size = fill_model.input_shape[1:3] shape_input_size = shape_model.input_shape[1:3] fill_imgs = [] shape_imgs = [] color_candidates = [] # Prepare each detected shape region for classification for sb in shape_boxes: sx1, sy1, sx2, sy2 = sb shape_crop = card_img[sy1:sy2, sx1:sx2] fill_crop = cv2.resize(shape_crop, fill_input_size) / 255.0 shape_crop_resized = cv2.resize(shape_crop, shape_input_size) / 255.0 fill_imgs.append(fill_crop) shape_imgs.append(shape_crop_resized) color_candidates.append(predict_color(shape_crop)) # Handle TensorFlow prediction - process one image at a time to avoid memory issues fill_preds = [] shape_preds = [] for img in fill_imgs: try: pred = fill_model.predict(np.array([img]), verbose=0) fill_preds.append(pred[0]) except Exception as e: logger.error(f"Fill prediction error: {e}") fill_preds.append(np.array([0.33, 0.33, 0.34])) # Fallback for img in shape_imgs: try: pred = shape_model.predict(np.array([img]), verbose=0) shape_preds.append(pred[0]) except Exception as e: logger.error(f"Shape prediction error: {e}") shape_preds.append(np.array([0.33, 0.33, 0.34])) # Fallback fill_labels = ['empty', 'full', 'striped'] shape_labels = ['diamond', 'oval', 'squiggle'] fill_result = [fill_labels[np.argmax(fp)] for fp in fill_preds] shape_result = [shape_labels[np.argmax(sp)] for sp in shape_preds] # Take the most common color/fill/shape across all shape detections for the card if color_candidates: final_color = max(set(color_candidates), key=color_candidates.count) else: final_color = "unknown" if fill_result: final_fill = max(set(fill_result), key=fill_result.count) else: final_fill = "unknown" if shape_result: final_shape = max(set(shape_result), key=shape_result.count) else: final_shape = "unknown" return { 'count': len(shape_boxes), 'color': final_color, 'fill': final_fill, 'shape': final_shape, 'box': card_box } def classify_cards_on_board( board_img: np.ndarray, card_detector: YOLO, shape_detector: YOLO, fill_model: tf.keras.Model, shape_model: tf.keras.Model ) -> pd.DataFrame: """ Detects cards on the board, then classifies each card's features. Returns a DataFrame with columns: 'Count', 'Color', 'Fill', 'Shape', 'Coordinates'. """ detected_cards = detect_cards(board_img, card_detector) card_rows = [] for (card_img, box) in detected_cards: card_feats = predict_card_features(card_img, shape_detector, fill_model, shape_model, box) card_rows.append({ "Count": card_feats['count'], "Color": card_feats['color'], "Fill": card_feats['fill'], "Shape": card_feats['shape'], "Coordinates": card_feats['box'] }) return pd.DataFrame(card_rows) def valid_set(cards: List[dict]) -> bool: """ Checks if the given 3 cards collectively form a valid SET. """ for feature in ["Count", "Color", "Fill", "Shape"]: if len({card[feature] for card in cards}) not in (1, 3): return False return True def locate_all_sets(cards_df: pd.DataFrame) -> List[dict]: """ Finds all possible SETs from the card DataFrame. Each SET is a dictionary with 'set_indices' and 'cards' fields. """ found_sets = [] for combo in combinations(cards_df.iterrows(), 3): cards = [c[1] for c in combo] # c is (index, row) if valid_set(cards): found_sets.append({ 'set_indices': [c[0] for c in combo], 'cards': [ {f: card[f] for f in ['Count', 'Color', 'Fill', 'Shape', 'Coordinates']} for card in cards ] }) return found_sets def draw_detected_sets(board_img: np.ndarray, sets_detected: List[dict]) -> np.ndarray: """ Annotates the board image with bounding boxes for each detected SET. Each SET is drawn in a different color and offset (thickness & expansion) so that overlapping sets are visible. """ # Some distinct BGR colors colors = [ (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255) ] base_thickness = 8 base_expansion = 5 for idx, single_set in enumerate(sets_detected): color = colors[idx % len(colors)] thickness = base_thickness + 2 * idx expansion = base_expansion + 15 * idx for i, card_info in enumerate(single_set["cards"]): x1, y1, x2, y2 = card_info["Coordinates"] # Expand the bounding box slightly x1e = max(0, x1 - expansion) y1e = max(0, y1 - expansion) x2e = min(board_img.shape[1], x2 + expansion) y2e = min(board_img.shape[0], y2 + expansion) cv2.rectangle(board_img, (x1e, y1e), (x2e, y2e), color, thickness) # Label only the first card's box with "Set " if i == 0: cv2.putText( board_img, f"Set {idx + 1}", (x1e, y1e - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, thickness ) return board_img def optimize_image_size(image_array: np.ndarray, max_dim=1200) -> np.ndarray: """ Resizes an image if its largest dimension exceeds max_dim, to reduce processing time. """ if image_array is None: return None height, width = image_array.shape[:2] if max(width, height) > max_dim: if width > height: new_width = max_dim new_height = int(height * (max_dim / width)) else: new_height = max_dim new_width = int(width * (max_dim / height)) return cv2.resize(image_array, (new_width, new_height), interpolation=cv2.INTER_AREA) return image_array def process_image(input_image): """ CPU-only processing function for SET detection. """ if input_image is None: return None, "Please upload an image." try: start_time = time.time() # Load models (CPU-only) card_detector, shape_detector, shape_model, fill_model = load_models() # Optimize image size optimized_img = optimize_image_size(input_image) # Convert to BGR (OpenCV format) if len(optimized_img.shape) == 3 and optimized_img.shape[2] == 4: # RGBA optimized_img = cv2.cvtColor(optimized_img, cv2.COLOR_RGBA2BGR) elif len(optimized_img.shape) == 3 and optimized_img.shape[2] == 3: # RGB to BGR optimized_img = cv2.cvtColor(optimized_img, cv2.COLOR_RGB2BGR) # Check and fix orientation processed_img, was_rotated = verify_and_rotate_image(optimized_img, card_detector) # Detect cards cards = detect_cards(processed_img, card_detector) if not cards: return cv2.cvtColor(optimized_img, cv2.COLOR_BGR2RGB), "No cards detected. Please check that it's a SET game board." # Classify cards and find sets df_cards = classify_cards_on_board(processed_img, card_detector, shape_detector, fill_model, shape_model) found_sets = locate_all_sets(df_cards) if not found_sets: return cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB), "Cards detected, but no valid SETs found!" # Draw sets on the image annotated = draw_detected_sets(processed_img.copy(), found_sets) # Restore original orientation if needed final_output = restore_orientation(annotated, was_rotated) # Convert back to RGB for display final_output_rgb = cv2.cvtColor(final_output, cv2.COLOR_BGR2RGB) process_time = time.time() - start_time return final_output_rgb, f"Found {len(found_sets)} SET(s) in {process_time:.2f} seconds." except Exception as e: error_message = f"Error processing image: {str(e)}" logger.error(error_message) logger.error(traceback.format_exc()) return input_image, error_message # Keep the spaces.GPU decorator for ZeroGPU API but use CPU internally @spaces.GPU def process_image_wrapper(input_image): """ Wrapper for process_image that uses the spaces.GPU decorator but internally works in CPU-only mode. """ return process_image(input_image) # ============================================================================= # SIMPLIFIED GRADIO INTERFACE # ============================================================================= with gr.Blocks(title="SET Game Detector") as demo: gr.HTML("""

🎴 SET Game Detector

Upload an image of a SET game board to find all valid sets

""") with gr.Row(): with gr.Column(): input_image = gr.Image( label="Upload SET Board Image", type="numpy" ) find_sets_btn = gr.Button( "🔎 Find Sets", variant="primary" ) with gr.Column(): output_image = gr.Image( label="Detected Sets" ) status = gr.Textbox( label="Status", value="Upload an image and click 'Find Sets'", interactive=False ) # Function bindings find_sets_btn.click( fn=process_image_wrapper, inputs=[input_image], outputs=[output_image, status] ) gr.HTML("""

SET Game Detector by omamitai | Gradio version adapted for Hugging Face Spaces

""") # ============================================================================= # MAIN EXECUTION # ============================================================================= if __name__ == "__main__": # Launch the app demo.queue().launch()