import os
import json
import re
from typing import Optional, List, Union
import google.genai as genai


def _extract_and_parse_json(text: str) -> Optional[Union[dict, list]]:
    """
    Extract and parse JSON from text that might contain extra content.
    Handles cases where JSON is wrapped in markdown, has extra text, multiple objects,
    or wrapped in tool response format like {"tool_name_response": {...}}.
    """
    if not text or not isinstance(text, str):
        return None

    text = text.strip()

    # Try direct parsing first
    try:
        parsed = json.loads(text)
        # Check if it's wrapped in a tool response format
        # e.g., {"video_summarizer_tool_response": {...}} or [{"video_summarizer_tool_response": {...}}]
        if isinstance(parsed, dict):
            # Check if it's a single tool response wrapper
            if len(parsed) == 1:
                key = list(parsed.keys())[0]
                if "_tool_response" in key.lower() or "_response" in key.lower():
                    # Extract the actual data from the wrapper
                    return parsed[key]
        elif isinstance(parsed, list) and len(parsed) > 0:
            # Check if list contains wrapped responses
            unwrapped = []
            for item in parsed:
                if isinstance(item, dict) and len(item) == 1:
                    key = list(item.keys())[0]
                    if "_tool_response" in key.lower() or "_response" in key.lower():
                        unwrapped.append(item[key])
                    else:
                        unwrapped.append(item)
                else:
                    unwrapped.append(item)
            return unwrapped if unwrapped else parsed
        return parsed
    except json.JSONDecodeError:
        pass

    # Try to extract JSON array by finding balanced brackets
    bracket_count = 0
    array_start = -1
    for i, char in enumerate(text):
        if char == "[":
            if bracket_count == 0:
                array_start = i
            bracket_count += 1
        elif char == "]":
            bracket_count -= 1
            if bracket_count == 0 and array_start >= 0:
                array_str = text[array_start : i + 1]
                try:
                    return json.loads(array_str)
                except json.JSONDecodeError:
                    pass
                array_start = -1

    # Try to find multiple JSON objects and combine them into an array
    # This handles cases where objects are concatenated: {}{}
    objects = []
    brace_count = 0
    start_idx = -1

    for i, char in enumerate(text):
        if char == "{":
            if brace_count == 0:
                start_idx = i
            brace_count += 1
        elif char == "}":
            brace_count -= 1
            if brace_count == 0 and start_idx >= 0:
                obj_str = text[start_idx : i + 1]
                try:
                    obj = json.loads(obj_str)
                    objects.append(obj)
                except json.JSONDecodeError:
                    pass
                start_idx = -1

    if objects:
        # If we found multiple objects, return as list
        # If only one, return it directly (will be wrapped in list by caller)
        return objects if len(objects) > 1 else objects[0]

    # Try to extract a single JSON object by finding balanced braces
    brace_count = 0
    obj_start = -1
    for i, char in enumerate(text):
        if char == "{":
            if brace_count == 0:
                obj_start = i
            brace_count += 1
        elif char == "}":
            brace_count -= 1
            if brace_count == 0 and obj_start >= 0:
                obj_str = text[obj_start : i + 1]
                try:
                    return json.loads(obj_str)
                except json.JSONDecodeError:
                    pass
                obj_start = -1

    return None


def video_script_generator(
    video_summaries: Union[str, List[dict], List[str]],
    user_description: Optional[str] = None,
    target_duration: float = 30.0,
) -> str:
    """
    Create a detailed script/storyboard for the final 30-second video.
    Uses Google Gemini API to intelligently generate a script based on video summaries
    and user requirements.

    Args:
        video_summaries: Video summaries from video_summarizer tool.
                        Can be:
                        - JSON string (single summary)
                        - List of dict objects (multiple summaries)
                        - List of JSON strings (multiple summaries)
        user_description: Optional user description of desired mood/style/content
        target_duration: Target duration in seconds (default: 30.0)

    Returns:
        str: JSON string containing detailed script with:
            - Scene sequence with source video and timestamps
            - Duration for each scene segment (must sum to ~target_duration seconds)
            - Transition types between scenes (cut, fade, crossfade)
            - Pacing and rhythm plan
            - Music synchronization points (beat markers, mood changes)
            - Overall narrative structure and flow
            - Visual style recommendations

    Example output format:
    {
        "total_duration": 30.0,
        "scenes": [
            {
                "scene_id": 1,
                "source_video": 0,
                "start_time": 5.2,
                "end_time": 8.5,
                "duration": 3.3,
                "description": "Opening shot of landscape",
                "transition_in": "fade",
                "transition_out": "crossfade"
            },
            ...
        ],
        "music": {
            "mood": "energetic",
            "bpm": 120,
            "sync_points": [0.0, 7.5, 15.0, 22.5, 30.0],
            "volume": 0.5
        },
        "pacing": "fast",
        "narrative_structure": "hook -> build -> climax -> resolution",
        "visual_style": "bright, colorful, dynamic"
    }
    """
    try:
        # Parse video summaries input
        summaries_list = []
        if isinstance(video_summaries, str):
            # JSON string - could be a single object or an array
            # Use robust parsing to handle malformed JSON
            parsed = _extract_and_parse_json(video_summaries)

            if parsed is None:
                raise ValueError(
                    f"Invalid JSON format for video_summaries. "
                    f"Could not parse: {video_summaries[:200]}..."
                )

            if isinstance(parsed, list):
                # It's a JSON array
                summaries_list = parsed
            else:
                # It's a single JSON object
                summaries_list = [parsed]
        elif isinstance(video_summaries, list):
            # List of summaries
            for summary in video_summaries:
                if isinstance(summary, str):
                    # Use robust parsing for string summaries
                    parsed = _extract_and_parse_json(summary)
                    if parsed is None:
                        raise ValueError(
                            f"Invalid JSON format in video_summaries: {summary[:200]}..."
                        )
                    # If parsed is a list, extend; if it's a dict, append
                    if isinstance(parsed, list):
                        summaries_list.extend(parsed)
                    else:
                        summaries_list.append(parsed)
                elif isinstance(summary, dict):
                    # Check if it's wrapped in a tool response format
                    if len(summary) == 1:
                        key = list(summary.keys())[0]
                        if (
                            "_tool_response" in key.lower()
                            or "_response" in key.lower()
                        ):
                            # Extract the actual data from the wrapper
                            summaries_list.append(summary[key])
                        else:
                            summaries_list.append(summary)
                    else:
                        summaries_list.append(summary)
                else:
                    raise ValueError(
                        f"Invalid summary type: {type(summary).__name__}. "
                        "Expected dict or JSON string."
                    )
        else:
            raise ValueError(
                f"Invalid video_summaries type: {type(video_summaries).__name__}. "
                "Expected str, list of dicts, or list of JSON strings."
            )

        if not summaries_list:
            raise ValueError("No video summaries provided")

        # Validate target_duration
        if target_duration <= 0:
            raise ValueError("target_duration must be greater than 0")

        # Get API key
        api_key = os.getenv("GOOGLE_API_KEY")
        if not api_key:
            # Fallback: create a simple script using first video
            summary = summaries_list[0]
            duration = summary.get("duration", target_duration)
            clip_duration = min(duration, target_duration)

            # Extract mood from summary
            mood_tags = summary.get("mood_tags", ["energetic"])
            mood = mood_tags[0] if mood_tags else "energetic"

            fallback_script = {
                "total_duration": clip_duration,
                "scenes": [
                    {
                        "scene_id": 1,
                        "source_video": 0,
                        "start_time": 0.0,
                        "end_time": clip_duration,
                        "duration": clip_duration,
                        "description": summary.get("summary", "Video clip")[:100],
                        "transition_in": "fade",
                        "transition_out": "fade",
                    }
                ],
                "music": {
                    "mood": mood,
                    "volume": 0.5,
                },
                "pacing": "moderate",
                "narrative_structure": "single scene",
            }
            return json.dumps(fallback_script, indent=2)

        # Initialize Gemini client
        client = genai.Client(api_key=api_key)

        # Build prompt for script generation
        summaries_text = "\n\n".join(
            [
                f"Video {i+1}:\n{json.dumps(s, indent=2)}"
                for i, s in enumerate(summaries_list)
            ]
        )

        user_desc_text = (
            f"\n\nUser Description: {user_description}" if user_description else ""
        )

        prompt = f"""You are a professional video editor creating a {target_duration}-second short-form video.

Here are the video summaries:
{summaries_text}
{user_desc_text}

Create a detailed video composition script that:
1. Selects the most engaging and relevant scenes from the videos
2. Creates a coherent narrative flow with a clear structure (hook -> build -> climax -> resolution)
3. Uses appropriate transitions (cut, fade, or crossfade) between scenes
4. Ensures the total duration is approximately {target_duration} seconds (within ±2 seconds)
5. Distributes scenes evenly across the duration, considering pacing
6. Identifies music mood, BPM, and sync points for rhythm matching
7. Provides visual style recommendations based on the content

Return ONLY a valid JSON object with this exact structure:
{{
    "total_duration": {target_duration},
    "scenes": [
        {{
            "scene_id": 1,
            "source_video": 0,
            "start_time": 0.0,
            "end_time": 5.0,
            "duration": 5.0,
            "description": "Brief description of what happens in this scene",
            "transition_in": "fade",
            "transition_out": "crossfade"
        }},
        {{
            "scene_id": 2,
            "source_video": 1,
            "start_time": 10.0,
            "end_time": 15.0,
            "duration": 5.0,
            "description": "Brief description of what happens in this scene",
            "transition_in": "crossfade",
            "transition_out": "fade"
        }}
    ],
    "music": {{
        "mood": "energetic",
        "bpm": 120,
        "sync_points": [0.0, 7.5, 15.0, 22.5, 30.0],
        "volume": 0.5
    }},
    "pacing": "fast",
    "narrative_structure": "hook -> build -> climax -> resolution",
    "visual_style": "bright, colorful, dynamic"
}}

Rules:
- source_video is 0-based index (0 for first video, 1 for second, etc.)
- Each scene must have start_time, end_time, and duration
- CRITICAL: start_time and end_time MUST be within the actual video duration. Check the "duration" field in each video summary to ensure timestamps don't exceed it.
- For example, if a video has duration 5.2 seconds, start_time must be < 5.2 and end_time must be <= 5.2
- Total of all scene durations should be approximately {target_duration} seconds (±2 seconds tolerance)
- Use transitions: "cut", "fade", or "crossfade"
- Extract mood tags from the video summaries for the music section
- sync_points should be evenly distributed or aligned to scene transitions
- pacing should be one of: "slow", "moderate", "fast", "very-fast"
- narrative_structure should describe the flow (e.g., "hook -> build -> climax -> resolution")
- visual_style should describe the aesthetic (e.g., "bright, colorful, dynamic" or "dark, moody, cinematic")
- Return ONLY the JSON, no other text or markdown formatting"""

        # Generate script using Gemini
        response = client.models.generate_content(
            model="gemini-2.5-flash-lite",
            contents=[prompt],
        )

        # Extract JSON from response
        response_text = response.text.strip()

        # Try to extract JSON if wrapped in markdown code blocks
        if "```json" in response_text:
            response_text = response_text.split("```json")[1].split("```")[0].strip()
        elif "```" in response_text:
            response_text = response_text.split("```")[1].split("```")[0].strip()

        script = json.loads(response_text)

        # Validate script structure
        if not isinstance(script, dict):
            raise ValueError("Generated script is not a valid dictionary")

        # Ensure required fields exist
        if "total_duration" not in script:
            script["total_duration"] = target_duration

        if "scenes" not in script:
            raise ValueError("Generated script missing 'scenes' field")

        if not isinstance(script["scenes"], list) or len(script["scenes"]) == 0:
            raise ValueError("Generated script must contain at least one scene")

        # Validate and fix scene timestamps to ensure they're within video durations
        # Create a mapping of video index to duration from summaries
        video_durations = {}
        for i, summary in enumerate(summaries_list):
            video_durations[i] = summary.get("duration", 0.0)

        num_videos = len(summaries_list)

        # Validate and fix each scene
        for scene in script["scenes"]:
            source_video_idx = scene.get("source_video")

            # Validate and fix source_video index if it's an integer
            if isinstance(source_video_idx, int):
                # Clamp index to valid range (0 to num_videos - 1)
                if source_video_idx < 0:
                    source_video_idx = 0
                elif source_video_idx >= num_videos:
                    # Clamp to last valid index
                    source_video_idx = max(0, num_videos - 1)
                scene["source_video"] = source_video_idx
            elif source_video_idx is None:
                # If source_video is missing, default to first video
                scene["source_video"] = 0

            # Now validate timestamps if we have a valid video index
            # Use the clamped value from scene (in case it was updated)
            validated_idx = scene.get("source_video")
            if isinstance(validated_idx, int) and validated_idx in video_durations:
                video_duration = video_durations[validated_idx]
                start_time = scene.get("start_time", 0.0)
                end_time = scene.get("end_time")
                scene_duration = scene.get("duration")

                # If start_time exceeds video duration, adjust it
                if start_time >= video_duration:
                    # Use the last portion of the video (last 2 seconds or video duration, whichever is smaller)
                    clip_duration = min(2.0, video_duration)
                    scene["start_time"] = max(0.0, video_duration - clip_duration)
                    if end_time is None and scene_duration:
                        scene["end_time"] = video_duration
                        scene["duration"] = video_duration - scene["start_time"]
                    elif end_time:
                        scene["end_time"] = video_duration
                        scene["duration"] = video_duration - scene["start_time"]
                    else:
                        scene["end_time"] = video_duration
                        scene["duration"] = video_duration - scene["start_time"]
                else:
                    # Clamp start_time to be within bounds
                    scene["start_time"] = max(
                        0.0, min(start_time, video_duration - 0.1)
                    )

                    # Calculate or validate end_time
                    if end_time is None:
                        if scene_duration:
                            calculated_end_time = scene["start_time"] + scene_duration
                        else:
                            calculated_end_time = video_duration
                    else:
                        calculated_end_time = end_time

                    # Clamp end_time to be within bounds
                    scene["end_time"] = max(
                        scene["start_time"] + 0.1,
                        min(calculated_end_time, video_duration),
                    )

                    # Update duration to match
                    scene["duration"] = scene["end_time"] - scene["start_time"]

        # Validate scene durations sum to approximately target_duration
        total_scene_duration = sum(
            scene.get("duration", 0) for scene in script["scenes"]
        )
        if abs(total_scene_duration - target_duration) > 5.0:
            # Adjust durations proportionally if they're way off
            if total_scene_duration > 0:
                scale_factor = target_duration / total_scene_duration
                for scene in script["scenes"]:
                    if "duration" in scene:
                        scene["duration"] = round(scene["duration"] * scale_factor, 2)
                    if "start_time" in scene and "end_time" in scene:
                        # Recalculate end_time based on scaled duration
                        scene["end_time"] = round(
                            scene["start_time"] + scene["duration"], 2
                        )
                script["total_duration"] = target_duration

        # Ensure music section exists
        if "music" not in script:
            # Extract mood from summaries
            mood_tags = []
            for summary in summaries_list:
                tags = summary.get("mood_tags", [])
                if isinstance(tags, list):
                    mood_tags.extend(tags)
            mood = mood_tags[0] if mood_tags else "energetic"
            script["music"] = {
                "mood": mood,
                "volume": 0.5,
            }

        # Add optional fields if missing
        if "pacing" not in script:
            script["pacing"] = "moderate"

        if "narrative_structure" not in script:
            script["narrative_structure"] = "linear"

        if "visual_style" not in script:
            script["visual_style"] = "standard"

        return json.dumps(script, indent=2)

    except json.JSONDecodeError as e:
        # Fallback to simple script
        if not summaries_list:
            raise ValueError("No video summaries provided")

        summary = summaries_list[0]
        duration = summary.get("duration", target_duration)
        clip_duration = min(duration, target_duration)

        # Extract mood from summary
        mood_tags = summary.get("mood_tags", ["energetic"])
        mood = mood_tags[0] if mood_tags else "energetic"

        fallback_script = {
            "total_duration": clip_duration,
            "scenes": [
                {
                    "scene_id": 1,
                    "source_video": 0,
                    "start_time": 0.0,
                    "end_time": clip_duration,
                    "duration": clip_duration,
                    "description": (
                        summary.get("summary", "Video clip")[:100]
                        if isinstance(summary.get("summary"), str)
                        else "Video clip"
                    ),
                    "transition_in": "fade",
                    "transition_out": "fade",
                }
            ],
            "music": {
                "mood": mood,
                "volume": 0.5,
            },
            "pacing": "moderate",
            "narrative_structure": "single scene",
        }
        return json.dumps(fallback_script, indent=2)

    except Exception as e:
        raise Exception(f"Error generating video script: {str(e)}")