"""
Convert CVAT XML annotations to COCO format
Extracts annotations for a specific frame and converts to COCO JSON
"""
import xml.etree.ElementTree as ET
import json
from pathlib import Path
from typing import Dict, List, Tuple, Optional


def parse_cvat_xml(xml_path: str) -> ET.ElementTree:
    """Parse CVAT XML file"""
    tree = ET.parse(xml_path)
    return tree


def extract_frame_annotations(tree: ET.ElementTree, frame_id: int = 0) -> List[Dict]:
    """
    Extract annotations for a specific frame from CVAT XML
    
    Args:
        tree: Parsed XML tree
        frame_id: Frame number to extract (default: 0)
    
    Returns:
        List of annotation dicts with keys: track_id, label, bbox (xtl, ytl, xbr, ybr)
    """
    root = tree.getroot()
    annotations = []
    
    # Find all tracks
    tracks = root.findall('.//track')
    
    for track in tracks:
        track_id = track.get('id')
        label = track.get('label', 'player')
        
        # Find boxes for this frame
        boxes = track.findall(f'.//box[@frame="{frame_id}"]')
        
        for box in boxes:
            xtl = float(box.get('xtl'))
            ytl = float(box.get('ytl'))
            xbr = float(box.get('xbr'))
            ybr = float(box.get('ybr'))
            
            # Get confidence if available
            confidence = 1.0
            conf_attr = box.find('.//attribute[@name="confidence"]')
            if conf_attr is not None:
                try:
                    confidence = float(conf_attr.text)
                except (ValueError, TypeError):
                    pass
            
            annotations.append({
                'track_id': track_id,
                'label': label,
                'bbox': (xtl, ytl, xbr, ybr),
                'confidence': confidence
            })
    
    return annotations


def cvat_bbox_to_coco(xtl: float, ytl: float, xbr: float, ybr: float) -> Tuple[float, float, float, float]:
    """
    Convert CVAT bbox format (xtl, ytl, xbr, ybr) to COCO format (x, y, width, height)
    
    Args:
        xtl: Top-left x coordinate
        ytl: Top-left y coordinate
        xbr: Bottom-right x coordinate
        ybr: Bottom-right y coordinate
    
    Returns:
        Tuple of (x, y, width, height)
    """
    x = xtl
    y = ytl
    width = xbr - xtl
    height = ybr - ytl
    
    # Ensure non-negative dimensions
    width = max(0, width)
    height = max(0, height)
    
    return (x, y, width, height)


def label_to_category_id(label: str) -> int:
    """
    Map CVAT label to COCO category ID
    
    Args:
        label: Label name ("player", "ball", etc.)
    
    Returns:
        Category ID (1=player, 2=ball)
    """
    label_lower = label.lower()
    if label_lower == 'player':
        return 1
    elif label_lower == 'ball':
        return 2
    else:
        # Default to player for unknown labels
        return 1


def create_coco_json(
    image_path: str,
    image_id: int,
    width: int,
    height: int,
    annotations: List[Dict],
    output_path: Optional[str] = None
) -> Dict:
    """
    Create COCO format JSON from frame annotations
    
    Args:
        image_path: Path to image file
        image_id: Unique image ID
        width: Image width
        height: Image height
        annotations: List of annotation dicts from extract_frame_annotations()
        output_path: Optional path to save JSON file
    
    Returns:
        COCO format dictionary
    """
    # Categories
    categories = [
        {"id": 1, "name": "player", "supercategory": "object"},
        {"id": 2, "name": "ball", "supercategory": "object"}
    ]
    
    # Image entry
    image_entry = {
        "id": image_id,
        "file_name": Path(image_path).name,
        "width": width,
        "height": height
    }
    
    # Convert annotations to COCO format
    coco_annotations = []
    for ann_idx, ann in enumerate(annotations):
        xtl, ytl, xbr, ybr = ann['bbox']
        x, y, w, h = cvat_bbox_to_coco(xtl, ytl, xbr, ybr)
        
        category_id = label_to_category_id(ann['label'])
        
        coco_ann = {
            "id": ann_idx + 1,
            "image_id": image_id,
            "category_id": category_id,
            "bbox": [x, y, w, h],
            "area": w * h,
            "iscrowd": 0
        }
        
        coco_annotations.append(coco_ann)
    
    # Create COCO structure
    coco_data = {
        "info": {
            "description": "Single frame training dataset",
            "version": "1.0"
        },
        "licenses": [],
        "images": [image_entry],
        "annotations": coco_annotations,
        "categories": categories
    }
    
    # Save if output path provided
    if output_path:
        with open(output_path, 'w') as f:
            json.dump(coco_data, f, indent=2)
    
    return coco_data


def convert_frame_to_coco(
    xml_path: str,
    frame_id: int,
    image_path: str,
    image_width: int,
    image_height: int,
    output_json_path: str
) -> Dict:
    """
    Main function to convert CVAT XML frame to COCO format
    
    Args:
        xml_path: Path to CVAT XML file
        frame_id: Frame number to extract (default: 0)
        image_path: Path to extracted frame image
        image_width: Image width
        image_height: Image height
        output_json_path: Path to save COCO JSON
    
    Returns:
        COCO format dictionary
    """
    # Parse XML
    tree = parse_cvat_xml(xml_path)
    
    # Extract annotations for frame
    annotations = extract_frame_annotations(tree, frame_id)
    
    # Create COCO JSON
    coco_data = create_coco_json(
        image_path=image_path,
        image_id=1,
        width=image_width,
        height=image_height,
        annotations=annotations,
        output_path=output_json_path
    )
    
    return coco_data