"""
Enhanced Hardware Abstraction Layer (HAL) for Virtual GPU
Integrates with DuckDB for state management and multi-GPU support
"""

import duckdb
import numpy as np
import json
from typing import Dict, List, Optional, Union, Tuple, Any
from enum import Enum
import logging
from config import get_db_url, get_hf_token_cached

class HardwareType(Enum):
    COMPUTE_UNIT = "compute_unit"
    TENSOR_CORE = "tensor_core"
    SHADER_UNIT = "shader_unit"
    MEMORY_CONTROLLER = "memory_controller"
    DMA_ENGINE = "dma_engine"
    OPTICAL_INTERCONNECT = "optical_interconnect"

class HardwareAbstractionLayer:
    DB_URL = "hf://datasets/Fred808/helium/storage.json"

    def __init__(self, db_url: Optional[str] = None):
        """Initialize HAL with remote database connection"""
        self.db_url = db_url or self.DB_URL
        self.max_retries = 3
        self._connect_with_retries()

    def _connect_with_retries(self):
        """Establish database connection with retry logic"""
        for attempt in range(self.max_retries):
            try:
                self._init_db_connection()
                self._setup_database()
                return
            except Exception as e:
                if attempt == self.max_retries - 1:
                    raise RuntimeError(f"Failed to initialize database after {self.max_retries} attempts: {str(e)}")
                logging.warning(f"Database connection attempt {attempt + 1} failed, retrying...")

    def _init_db_connection(self) -> duckdb.DuckDBPyConnection:
        """Initialize database connection with HuggingFace configuration"""
        # Convert HF URL to S3 path and connect directly
        _, _, owner, dataset, db_file = self.db_url.split('/', 4)
        db_path = f"s3://datasets-cached/{owner}/{dataset}/{db_file}"
        
        # Connect directly to remote database
        self.conn = duckdb.connect(db_path)
        self.conn.execute("""
            INSTALL httpfs;
            LOAD httpfs;
            SET s3_region='us-east-1';
            SET s3_endpoint='s3.us-east-1.amazonaws.com';
            SET s3_url_style='path';
            SET s3_access_key_id='none';
            SET s3_secret_access_key=?;
        """, [self.HF_TOKEN])

    def ensure_connection(self):
        """Ensure database connection is active and valid"""
        try:
            self.conn.execute("SELECT 1")
        except:
            logging.warning("Database connection lost, attempting to reconnect...")
            self._connect_with_retries()
        
    def _setup_database(self):
        """Initialize database tables for hardware state tracking"""
        self.ensure_connection()
        # GPU Chips table
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS gpu_chips (
                chip_id INTEGER PRIMARY KEY,
                sm_count INTEGER,
                clock_speed_mhz INTEGER,
                memory_size_gb FLOAT,
                state_json JSON
            )
        """)
        
        # Streaming Multiprocessors
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS streaming_multiprocessors (
                sm_id INTEGER,
                chip_id INTEGER,
                core_count INTEGER,
                tensor_core_count INTEGER,
                state_json JSON,
                PRIMARY KEY (sm_id, chip_id)
            )
        """)
        
        # Optical Interconnects
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS optical_interconnects (
                link_id VARCHAR PRIMARY KEY,
                chip_a_id INTEGER,
                chip_b_id INTEGER,
                bandwidth_tbps FLOAT,
                latency_ns FLOAT,
                state_json JSON
            )
        """)
        
        # Hardware Queues
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS hardware_queues (
                queue_id INTEGER PRIMARY KEY,
                hardware_type VARCHAR,
                chip_id INTEGER,
                sm_id INTEGER,
                instructions JSON
            )
        """)
        
        # Memory Map
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS memory_map (
                address INTEGER PRIMARY KEY,
                chip_id INTEGER,
                size INTEGER,
                allocation_type VARCHAR,
                metadata JSON
            )
        """)
        
        # Shader Units
        self.conn.execute("""
            CREATE TABLE IF NOT EXISTS shader_units (
                unit_id INTEGER,
                chip_id INTEGER,
                sm_id INTEGER,
                current_program_id VARCHAR,
                state_json JSON,
                PRIMARY KEY (unit_id, chip_id, sm_id)
            )
        """)
        
        self.conn.commit()

    def get_chip(self, chip_id: int) -> Dict:
        """Get or create a GPU chip"""
        self.ensure_connection()
        result = self.conn.execute("""
            SELECT * FROM gpu_chips WHERE chip_id = ?
        """, [chip_id]).fetchone()
        
        if not result:
            # Initialize new chip
            self.ensure_connection()
            self.conn.execute("""
                INSERT INTO gpu_chips (
                    chip_id, sm_count, clock_speed_mhz, 
                    memory_size_gb, state_json
                ) VALUES (?, 64, 1500, 24.0, ?)
            """, [chip_id, json.dumps({"power_state": "idle", "temperature": 30})])
            
            # Initialize SMs for the chip
            sm_values = [(i, chip_id, 128, 4, json.dumps({"active": False}))
                        for i in range(64)]
            self.ensure_connection()
            self.conn.executemany("""
                INSERT INTO streaming_multiprocessors (
                    sm_id, chip_id, core_count, 
                    tensor_core_count, state_json
                ) VALUES (?, ?, ?, ?, ?)
            """, sm_values)
            
            # Initialize shader units
            shader_values = [(i, chip_id, j, None, json.dumps({"active": False}))
                           for j in range(64) for i in range(16)]
            self.ensure_connection()
            self.conn.executemany("""
                INSERT INTO shader_units (
                    unit_id, chip_id, sm_id, 
                    current_program_id, state_json
                ) VALUES (?, ?, ?, ?, ?)
            """, shader_values)
            
            self.conn.commit()
            self.ensure_connection()
            result = self.conn.execute("""
                SELECT * FROM gpu_chips WHERE chip_id = ?
            """, [chip_id]).fetchone()
            
        return {
            "chip_id": result[0],
            "sm_count": result[1],
            "clock_speed_mhz": result[2],
            "memory_size_gb": result[3],
            "state": json.loads(result[4])
        }

    def connect_chips(self, chip_id_a: int, chip_id_b: int, bandwidth_tbps: float = 800, latency_ns: float = 1):
        """Connect two chips with an optical interconnect"""
        link_id = f"link_{chip_id_a}_{chip_id_b}"
        
        # Ensure both chips exist
        self.get_chip(chip_id_a)
        self.get_chip(chip_id_b)
        
        # Create interconnect
        self.ensure_connection()
        self.conn.execute("""
            INSERT INTO optical_interconnects (
                link_id, chip_a_id, chip_b_id,
                bandwidth_tbps, latency_ns, state_json
            ) VALUES (?, ?, ?, ?, ?, ?)
            ON CONFLICT (link_id) DO UPDATE SET
                bandwidth_tbps = excluded.bandwidth_tbps,
                latency_ns = excluded.latency_ns
        """, [link_id, chip_id_a, chip_id_b, bandwidth_tbps, latency_ns, 
              json.dumps({"active": True, "errors": 0})])
        
        self.conn.commit()

    def execute_tensor_core_matmul(self, chip_id: int, sm_id: int, A: np.ndarray, B: np.ndarray) -> Optional[np.ndarray]:
        """Execute matrix multiplication on tensor core"""
        # Verify SM exists and has tensor cores
        self.ensure_connection()
        result = self.conn.execute("""
            SELECT tensor_core_count, state_json
            FROM streaming_multiprocessors
            WHERE chip_id = ? AND sm_id = ?
        """, [chip_id, sm_id]).fetchone()
        
        if not result or result[0] == 0:
            return None
            
        # Update SM state
        sm_state = json.loads(result[1])
        sm_state["active"] = True
        sm_state["operation"] = "matmul"
        
        self.ensure_connection()
        self.conn.execute("""
            UPDATE streaming_multiprocessors
            SET state_json = ?
            WHERE chip_id = ? AND sm_id = ?
        """, [json.dumps(sm_state), chip_id, sm_id])
        
        # Execute matmul
        try:
            result = np.matmul(A, B)
            
            # Update state on completion
            sm_state["active"] = False
            sm_state["last_operation"] = "matmul"
            sm_state["last_operation_status"] = "success"
            
            self.conn.execute("""
                UPDATE streaming_multiprocessors
                SET state_json = ?
                WHERE chip_id = ? AND sm_id = ?
            """, [json.dumps(sm_state), chip_id, sm_id])
            
            return result
            
        except Exception as e:
            sm_state["active"] = False
            sm_state["last_operation"] = "matmul"
            sm_state["last_operation_status"] = "error"
            sm_state["last_error"] = str(e)
            
            self.conn.execute("""
                UPDATE streaming_multiprocessors
                SET state_json = ?
                WHERE chip_id = ? AND sm_id = ?
            """, [json.dumps(sm_state), chip_id, sm_id])
            
            return None
        """
        Run vertex shader using provided instructions.
        Supports AI/ML ops: matmul, activation, softmax, etc.
        """
        chip = self.get_chip(chip_id)
        if not chip.sms:
            return vertex_data
            
        sm = chip.sms[0]  # Use first SM for shader execution
        registers = list(vertex_data)
        
        for instr in shader_program.get('instructions', []):
            op = instr.get('opcode')
            args = instr.get('args', [])
            
            if op == 'load_vertex_data':
                continue
            elif op == 'transform_vertex':
                registers = [v * 2 for v in registers]
            elif op == 'matmul':
                A = args[0] if args else [[v] for v in registers]
                B = args[1] if len(args) > 1 else [[1.0] * len(registers)]
                result = sm.tensor_core_matmul(np.array(A), np.array(B))
                if result is not None:
                    registers = result.flatten().tolist()
            elif op == 'activation':
                registers = [max(0, v) for v in registers]  # ReLU
            elif op == 'softmax':
                import math
                exp_vals = [math.exp(v) for v in registers]
                s = sum(exp_vals)
                registers = [v / s for v in exp_vals]
                
        return registers
        
    def v2_fragment_shader(self, chip_id: int, fragment_data: Dict[str, Any], 
                          shader_program: Dict[str, Any]) -> Tuple[float, float, float, float]:
        """
        Run fragment shader using provided instructions.
        Supports AI/ML ops: matmul, activation, softmax, etc.
        """
        chip = self.get_chip(chip_id)
        if not chip.sms:
            return (1.0, 1.0, 1.0, 1.0)
            
        sm = chip.sms[0]  # Use first SM for shader execution
        color = [1.0, 1.0, 1.0, 1.0]  # Default white
        
        for instr in shader_program.get('instructions', []):
            op = instr.get('opcode')
            args = instr.get('args', [])
            
            if op == 'load_fragment_data':
                continue
            elif op == 'compute_color':
                x = fragment_data.get('x', 0)
                y = fragment_data.get('y', 0)
                color = [x % 256 / 255.0, y % 256 / 255.0, 0.5, 1.0]
            elif op == 'matmul':
                A = args[0] if args else [[c] for c in color]
                B = args[1] if len(args) > 1 else [[1.0] * len(color)]
                result = sm.tensor_core_matmul(np.array(A), np.array(B))
                if result is not None:
                    color = result.flatten().tolist()
            elif op == 'activation':
                color = [max(0, v) for v in color]  # ReLU
            elif op == 'softmax':
                import math
                exp_vals = [math.exp(v) for v in color]
                s = sum(exp_vals)
                color = [v / s for v in exp_vals]
                
        return tuple(color[:4])  # Ensure RGBA output
        
    def allocate_vram(self, chip_id: int, size_bytes: int) -> Optional[str]:
        """Allocate VRAM on specified chip"""
        chip = self.get_chip(chip_id)
        return chip.allocate_memory(size_bytes)
        
    def transfer_data(self, src_chip_id: int, dst_chip_id: int, size_bytes: int) -> float:
        """Transfer data between chips, returns transfer time"""
        src_chip = self.get_chip(src_chip_id)
        dst_chip = self.get_chip(dst_chip_id)
        return src_chip.transfer_data(dst_chip, size_bytes)