import os
import sys
import torch
import argparse
import logging
from pathlib import Path
from typing import List, Dict, Optional, Tuple
from collections import OrderedDict
import json
from datetime import datetime
from tqdm import tqdm
import numpy as np
import cv2  # OpenCV for video processing
import matplotlib.pyplot as plt  # Matplotlib for plotting
import io

# Image processing imports
from PIL import Image, ImageDraw, ImageFont
import torchvision.transforms as transforms

# Import your existing model components
# Ensure these files (models/, utils/) are in the same directory or accessible in PYTHONPATH
from models.MIQA_base import get_torch_model, get_timm_model
from models.RA_MIQA import RegionVisionTransformer
from models.hf_model_registry import HF_REPO_ID, HF_REVISION, MODEL_FILENAMES
from utils.hf_download_utils import ensure_checkpoint_from_hf
SUPPORTED_VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv'}


class MIQAInference:
    """
    MODIFIED Inference wrapper for MIQA models.
    Now includes a method to predict on PIL Image objects directly.
    """

    def __init__(self, task: str, model_name: str = 'ra_miqa',
                 metric_type: str = 'composite', device: Optional[str] = None):
        self.task = task.lower()
        self.model_name = model_name
        self.metric_type = metric_type
        self.logger = self._setup_logger()

        if device is None:
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = torch.device(device)

        self.logger.info(f"🚀 Initializing MIQA Inference System")
        self.logger.info(f"   Task: {self.task.upper()}")
        self.logger.info(f"   Model: {self.model_name}")
        self.logger.info(f"   Metric Type: {self.metric_type}")
        self.logger.info(f"   Device: {self.device}")

        self._validate_config()
        self.model = self._load_model()
        self.transforms1, self.transforms2 = self._get_transforms()
        self.logger.info("✅ System ready for inference\n")


    def _setup_logger(self) -> logging.Logger:
        """Configure logging with both file and console output."""
        logger = logging.getLogger('MIQA_Inference')
        logger.setLevel(logging.INFO)

        if logger.hasHandlers():
            return logger

        logger.propagate = False

        # Console handler with clean formatting
        console_handler = logging.StreamHandler(sys.stdout)
        console_handler.setLevel(logging.INFO)
        console_formatter = logging.Formatter('%(message)s')
        console_handler.setFormatter(console_formatter)
        logger.addHandler(console_handler)

        return logger

    def _validate_config(self) -> None:
        """Validate that the requested configuration is supported."""

        if self.metric_type not in ['composite', 'consistency', 'accuracy']:
            raise ValueError(
                f"Invalid metric_type '{self.metric_type}'. "
                f"Supported: ['composite', 'consistency', 'accuracy']"
            )

        if self.task not in MODEL_FILENAMES[self.metric_type]:
            raise ValueError(
                f"Invalid task '{self.task}'. "
                f"Supported tasks: {list(MODEL_FILENAMES[self.metric_type].keys())}"
            )

        if self.model_name not in MODEL_FILENAMES[self.metric_type][self.task]:
            available = list(MODEL_FILENAMES[self.metric_type][self.task].keys())
            raise ValueError(
                f"Model '{self.model_name}' not available for task '{self.task}'. "
                f"Available models: {available}"
            )

    def _get_checkpoint_path(self) -> str:
        """Generate the path where model checkpoint should be stored."""
        base_dir = Path('models') / 'checkpoints' / f'{self.metric_type}_metric'
        base_dir.mkdir(parents=True, exist_ok=True)

        filename = MODEL_FILENAMES[self.metric_type][self.task][self.model_name]
        return str(base_dir / filename)

    def _download_weights(self, checkpoint_path: str) -> bool:
        """
        Download model weights if not present locally.

        Returns:
            True if weights are available (already existed or successfully downloaded)
        """
        if os.path.exists(checkpoint_path):
            self.logger.info(f"✓ Found cached model weights")
            return True

        self.logger.info(
            f"⏬ Downloading from Hugging Face: repo={HF_REPO_ID}, "
            f"file={Path(checkpoint_path).name}, rev={HF_REVISION}"
        )
        try:
            ensure_checkpoint_from_hf(
                repo_id=HF_REPO_ID,
                filename=Path(checkpoint_path).name,
                local_dir=str(Path(checkpoint_path).parent),
                revision=HF_REVISION,
            )
            self.logger.info("✓ Successfully downloaded model weights")
            return True
        except Exception as e:
            self.logger.error(f"❌ Failed to download model weights from Hugging Face: {e}")
            return False

    def _create_model(self) -> torch.nn.Module:
        """Create the model architecture."""
        if self.model_name == 'ra_miqa':
            self.logger.info("Building Region-Aware Vision Transformer...")
            model = RegionVisionTransformer(
                base_model_name='vit_small_patch16_224',
                pretrained=False,  # We'll load our trained weights
                mmseg_config_path='models/model_configs/fcn_sere-small_finetuned_fp16_8x32_224x224_3600_imagenets919.py',
                checkpoint_path='models/checkpoints/sere_finetuned_vit_small_ep100.pth'
            )
        else:
            try:
                self.logger.info(f"Building {self.model_name} from PyTorch...")
                model = get_torch_model(model_name=self.model_name, pretrained=False, num_classes=1)
            except Exception:
                self.logger.info(f"Building {self.model_name} from timm library...")
                model = get_timm_model(model_name=self.model_name, pretrained=False, num_classes=1)

        return model

    def _load_model(self) -> torch.nn.Module:
        """Load model with pre-trained weights."""
        checkpoint_path = self._get_checkpoint_path()

        # Ensure weights are available
        if not self._download_weights(checkpoint_path):
            raise RuntimeError("Cannot proceed without model weights")

        # Create model architecture
        self.logger.info("🔧 Loading model...")
        model = self._create_model()

        # Load weights
        checkpoint = torch.load(checkpoint_path, map_location='cpu')
        state_dict = checkpoint.get('state_dict', checkpoint)

        # Remove 'module.' prefix if present (from DataParallel training)
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k.replace('module.', '') if k.startswith('module.') else k
            new_state_dict[name] = v

        model.load_state_dict(new_state_dict, strict=True)
        model = model.to(self.device)
        model.eval()  # Set to evaluation mode

        self.logger.info("✓ Model loaded successfully")

        return model

    def _get_transforms(self) -> Tuple[transforms.Compose, transforms.Compose | None]:
        """
        Return preprocessing transforms based on model type.
        """
        IMAGENET_MEAN = (0.485, 0.456, 0.406)
        IMAGENET_STD = (0.229, 0.224, 0.225)
        SIMPLE_MEAN = (0.5, 0.5, 0.5)
        SIMPLE_STD = (0.5, 0.5, 0.5)

        # Default (for single-input backbones)
        transform_imagenet = transforms.Compose([
            transforms.Resize(288),
            transforms.CenterCrop(size=224),
            transforms.ToTensor(),
            transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
        ])

        transform_simple = transforms.Compose([
            transforms.Resize(288),
            transforms.CenterCrop(size=224),
            transforms.ToTensor(),
            transforms.Normalize(mean=SIMPLE_MEAN, std=SIMPLE_STD)
        ])

        # 1️⃣ CNNs（ResNet / EfficientNet）
        if any(k in self.model_name for k in ['resnet', 'efficientnet']):
            return transform_imagenet, None

        # 2️⃣ ViT
        elif 'vit' in self.model_name:
            return transform_simple, None

        # 3️⃣ ra_miqa
        elif 'ra_miqa' in self.model_name:
            transform_1 = transforms.Compose([
                transforms.Resize(288),
                transforms.CenterCrop(size=224),
                transforms.ToTensor(),
                transforms.Normalize(mean=SIMPLE_MEAN, std=SIMPLE_STD)
            ])
            transform_2 = transforms.Compose([
                transforms.Resize(288),
                transforms.CenterCrop((288, 288)),
                transforms.ToTensor(),
                transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
            ])
            return transform_1, transform_2

        # fallback
        else:
            print(f"[Warning] Unknown model type '{self.model_name}', using ImageNet normalization.")
            return transform_imagenet, None

    @torch.no_grad()
    def predict_image_object(self, image: Image.Image) -> float:
        """
        NEW METHOD: Run inference on a PIL Image object.
        """
        # Preprocess the image
        img1 = self.transforms1(image).unsqueeze(0).to(self.device)
        img2 = self.transforms2(image).unsqueeze(0).to(self.device) if self.transforms2 else None

        # Run inference based on model input requirements
        if img2 is None:
            output = self.model(img1)
        else:
            output = self.model(img1, img2)

        score = output.item() if torch.is_tensor(output) else float(output)
        return score


class VideoMIQAProcessor:
    """
    A wrapper to process videos using the MIQAInference engine and create
    a visualized output video with scores and plots.
    """
    # --- Visualization Constants ---
    PANEL_WIDTH = 480
    FONT = cv2.FONT_HERSHEY_SIMPLEX
    FONT_SCALE_L = 1.0
    FONT_SCALE_M = 0.8
    FONT_COLOR = (255, 255, 255)  # White
    LINE_THICKNESS = 2

    # Plotting style
    plt.style.use('dark_background')

    def __init__(self, miqa_engine: MIQAInference):
        self.miqa_engine = miqa_engine
        self.logger = miqa_engine.logger

    def _create_score_plot(self, scores: List[float], width: int, height: int) -> np.ndarray:
        """
        Creates a line chart of scores using Matplotlib and returns it as an OpenCV image.
        """
        fig, ax = plt.subplots(figsize=(width / 100, height / 100), dpi=100)
        ax.plot(scores, color='#4287f5', linewidth=2)
        ax.set_xlim(0, max(1, len(scores)))
        ax.set_ylim(0, 1)
        ax.set_title("Quality Score Fluctuation", fontsize=10)
        ax.set_xlabel("Frame", fontsize=8)
        ax.set_ylabel("Score", fontsize=8)
        ax.grid(True, alpha=0.3)
        fig.tight_layout(pad=1.5)

        # Render plot to an in-memory buffer
        buf = io.BytesIO()
        fig.savefig(buf, format='png')
        buf.seek(0)
        plt.close(fig)

        # Convert buffer to a PIL Image and then to an OpenCV image
        plot_img_pil = Image.open(buf)
        plot_img_np = np.array(plot_img_pil)
        plot_img_bgr = cv2.cvtColor(plot_img_np, cv2.COLOR_RGBA2BGR)

        return plot_img_bgr

    def process_video(self, input_path: str, output_path: str):
        """
        Reads a video, analyzes each frame for quality, and writes an annotated output video.
        """
        self.logger.info(f"📹 Starting processing for: {Path(input_path).name}")
        cap = cv2.VideoCapture(input_path)
        if not cap.isOpened():
            self.logger.error(f"❌ Failed to open video: {input_path}")
            return

        # Video properties
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        orig_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        orig_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # New dimensions for output video (with side panel)
        output_width = orig_width + self.PANEL_WIDTH
        output_height = orig_height

        # Setup video writer
        fourcc = cv2.VideoWriter_fourcc(*'avc1')
        out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, output_height))

        scores = []
        progress_bar = tqdm(range(frame_count), desc="Analyzing frames", ncols=100)

        for frame_idx in progress_bar:
            ret, frame = cap.read()
            if not ret:
                break

            # --- MIQA Inference ---
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(frame_rgb)
            score = self.miqa_engine.predict_image_object(pil_image)
            scores.append(score)

            # --- Visualization Panel ---
            panel = np.zeros((orig_height, self.PANEL_WIDTH, 3), dtype=np.uint8)

            # 1. Task Info
            task_text = f"Task: {self.miqa_engine.task.upper()}"
            cv2.putText(panel, task_text, (20, 50), self.FONT, self.FONT_SCALE_M, self.FONT_COLOR, self.LINE_THICKNESS)

            # 2. Current Score
            score_text = f"Quality Score: {score:.3f}"
            # Color coding for score text
            norm_score = max(0, score)
            if norm_score < 0.5:
                color = (0, int(255 * (norm_score * 2)), 255)  # Red -> Yellow
            else:
                color = (0, 255, int(255 * (2 - norm_score * 2)))  # Yellow -> Green
            cv2.putText(panel, score_text, (20, 110), self.FONT, self.FONT_SCALE_L, color, self.LINE_THICKNESS + 1)

            # 3. Frame Info
            frame_text = f"Frame: {frame_idx + 1}/{frame_count}"
            cv2.putText(panel, frame_text, (20, orig_height - 30), self.FONT, self.FONT_SCALE_M, self.FONT_COLOR, 1)

            # 4. Score Plot
            if len(scores) > 1:
                plot_height = 300
                plot_width = self.PANEL_WIDTH - 40  # with margins
                plot_img = self._create_score_plot(scores, plot_width, plot_height)

                # Position the plot on the panel
                y_offset = 160
                panel[y_offset:y_offset + plot_img.shape[0], 20:20 + plot_img.shape[1]] = plot_img

            # --- Combine and Write Frame ---
            combined_frame = np.concatenate((frame, panel), axis=1)
            out.write(combined_frame)

        # Release resources
        cap.release()
        out.release()
        self.logger.info(f"✅ Finished processing. Annotated video saved to: {output_path}\n")


def main():
    """Command-line interface for Video MIQA inference."""
    parser = argparse.ArgumentParser(
        description='MIQA for Video: Machine-centric Image Quality Assessment on Video Frames',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
    Examples:
      # Analyze a single video and save the annotated output
      python video_annotator_inference.py --input my_video.mp4 --task cls --model ra_miqa

      # Analyze all videos in a directory
      python video_annotator_inference.py --input ./video_folder/ --task det --model resnet50
            """
    )

    parser.add_argument('--input', type=str, required=True,
                        help='Path to input video file or a directory containing videos.')
    parser.add_argument('--task', type=str, required=True,
                        choices=['cls', 'det', 'ins'],
                        help='Task type: cls (classification), det (detection), ins (instance).')
    parser.add_argument('--model', type=str, default='ra_miqa',
                        choices=['ra_miqa'],
                        help='Model architecture (default: ra_miqa; Hub weights are RA-MIQA only).')
    parser.add_argument('--metric-type', type=str, default='composite',
                        choices=['composite', 'consistency', 'accuracy'],
                        help='Training metric type (default: composite).')
    parser.add_argument('--device', type=str, default=None,
                        choices=['cuda', 'cpu'],
                        help='Device to run on (auto-detect if not specified).')
    parser.add_argument('--output-dir', type=str, default='inference_results',
                        help='Directory to save the output annotated videos.')

    args = parser.parse_args()

    try:
        # Initialize the core inference engine
        miqa_engine = MIQAInference(
            task=args.task,
            model_name=args.model,
            metric_type=args.metric_type,
            device=args.device
        )

        # Initialize the video processor
        video_processor = VideoMIQAProcessor(miqa_engine)

        # Find videos to process
        input_path = Path(args.input)
        videos_to_process = []
        if input_path.is_dir():
            for ext in SUPPORTED_VIDEO_EXTENSIONS:
                videos_to_process.extend(input_path.glob(f"*{ext}"))
        elif input_path.is_file() and input_path.suffix.lower() in SUPPORTED_VIDEO_EXTENSIONS:
            videos_to_process.append(input_path)

        if not videos_to_process:
            raise FileNotFoundError(f"No supported video files found in '{args.input}'")


        # Create output directory
        output_dir = Path(args.output_dir) / 'video' /args.task / args.metric_type
        output_dir.mkdir(parents=True, exist_ok=True)

        # Process each video
        for video_path in videos_to_process:
            output_filename = f"{video_path.stem}_miqa_{args.model}_{args.task}.mp4"
            output_filepath = str(output_dir / output_filename)
            video_processor.process_video(str(video_path), output_filepath)

    except Exception as e:
        # Use the logger if it exists, otherwise print
        try:
            miqa_engine.logger.error(f"\n❌ An error occurred: {str(e)}")
        except:
            print(f"\n❌ An error occurred: {str(e)}", file=sys.stderr)
        sys.exit(1)


if __name__ == '__main__':
    main()