import os
import logging
import requests
import numpy as np
import pandas as pd
from typing import Dict, List, Tuple, Optional
from datetime import datetime
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from src.entity.match import Match, RawMatch
from src.entity.odds import Odds
from src.entity.player import Player
from src.repository import match_repo
from src.jobs.player import schedule_player_details

# Set up logging
logger = logging.getLogger(__name__)

def get_match(raw_match: Dict) -> Match:
    """
    Parse a raw match
    """
    match = Match()
    match.date = raw_match.get("Date")
    match.comment = raw_match.get("Comment")
    match.winner_rank = raw_match.get("WRank")
    match.winner_points = raw_match.get("WPts")
    match.loser_rank = raw_match.get("LRank")
    match.loser_points = raw_match.get("LPts")
    match.tournament_name = raw_match.get("Tournament")
    match.tournament_series = raw_match.get("Series")
    match.tournament_surface = raw_match.get("Surface")
    match.tournament_court = raw_match.get("Court")
    match.tournament_round = raw_match.get("Round")
    match.tournament_location = raw_match.get("Location")

    return match

def get_all_odds(raw_match: Dict) -> List[Odds]:
    """
    Parse the odds data from the raw match
    """
    all_odds = []

    odds_data = {k: v for k, v in raw_match.items() if k[-1] in ["W", "L"]}
    bookmakers = set(k[:-1] for k in odds_data.keys())
    
    for bookmaker in bookmakers:
        odds = Odds()
        odds.bookmaker = bookmaker
        odds.winner = odds_data[f"{bookmaker}W"]
        odds.loser = odds_data[f"{bookmaker}L"]

        all_odds.append(odds)
    
    return all_odds

def get_players(raw_match: Dict) -> Tuple[Player, Player]:
    """
    Parse the players data from the raw match
    """
    winner = Player(name = raw_match.get("Winner"))
    loser = Player(name = raw_match.get("Loser"))

    return winner, loser

def parse_raw_match(raw_match: Dict) -> Match:
    """
    Parse a raw match and odds
    """
    match = get_match(raw_match)
    all_odds = get_all_odds(raw_match)
    winner, loser = get_players(raw_match)

    match.odds = all_odds
    match.winner = winner
    match.loser = loser

    return match

def parse_raw_matches(raw_matches: Dict) -> List[Match]:
    """
    Parse a list of raw matches
    """
    matches = []
    for raw_match in raw_matches:
        match = parse_raw_match(raw_match)
        matches.append(match)

    return matches

def insert_new_match(db: Session, raw_match: Dict, on_conflict_do_nothing: bool = False) -> Match:
    """
    Insert a new match into the database
    """
    match = parse_raw_match(raw_match)

    try:
        match_repo.insert_match(db, match)
    except IntegrityError as e:
        if on_conflict_do_nothing:
            logging.debug(f"Match already exists: {match.date}")
            db.rollback()
            return match
        else:
            # Log the error and re-raise
            logging.error(f"Error inserting match: {e}")
            db.rollback()
            raise
    except Exception as e:
        # Log the error and re-raise
        logging.error(f"Error inserting match: {e}")
        db.rollback()
        raise

    # Schedule tasks to fetch player details
    if _should_fetch_details(match.winner):
        schedule_player_details(match.winner.name)

    if _should_fetch_details(match.loser):
        schedule_player_details(match.loser.name)

    return match

def insert_batch_matches(db: Session, raw_matches: List[Dict], on_conflict_do_nothing: bool = False) -> Dict:
    matches = []
    nb_errors = 0
    for raw_match in raw_matches:
        try:
            match = insert_new_match(
                db=db,
                raw_match=raw_match.model_dump(exclude_unset=True) if isinstance(raw_match, RawMatch) else raw_match,
                on_conflict_do_nothing=on_conflict_do_nothing,
            )

            if match.id is not None:
                matches.append(match)
        except IntegrityError as e:
            nb_errors += 1
            logger.error(f"Error inserting match: {e}")

    logger.info(f"Number of matches inserted: {len(matches)}")

    if nb_errors > 0:
        logger.warning(f"Number of errors: {nb_errors}")
    
    return {'matches': matches, 'nb_errors': nb_errors}

def _should_fetch_details(player: Player) -> bool:
    """
    Check if player details should be fetched
    """
    return player.tennis_id is None or player.caracteristics is None

def fetch_raw_data(year: Optional[int] = None) -> None:
    """
    Fetch data from tennis-data.co.uk for a given year and circuit (ATP or WTA) and save it to a file

    Args:
        year (int, optional): Year to retrieve. If None, fetch current year data.
    """
    current_year = datetime.now().year
    
    if not year:
        year = current_year
    
    filename = f"{year}.xlsx"
    file_path = f"./data/atp/{filename}"

    # Check if the file already exists
    if os.path.exists(file_path) and year != current_year:
        logging.info(f"File {file_path} already exists. Skipping download.")
        return

    logging.info(f"Fetching data from tennis-data.co.uk for year {year}")

    url = f"http://www.tennis-data.co.uk/{year}/{filename}"
    
    response = requests.get(url, stream=True)

    # Check response status code
    response.raise_for_status()

    with open(file_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)
        file.flush()

    logging.info(f"Data fetched from {url} 👍 and saved to {file_path}")

def get_cleaned_data(year: Optional[int]) -> pd.DataFrame:
    if not year:
        year = datetime.now().year
    
    df = pd.read_excel(f'./data/atp/{year}.xlsx')

    # Remove rows where LRank or WRank is NaN
    df = df.dropna(subset=['LRank', 'WRank'])
    df['Lsets'] = df['Lsets'].fillna(0)
    df['Wsets'] = df['Wsets'].fillna(0)

    # Strip whitespace from 'winner' and 'loser' columns
    df['Winner'] = df['Winner'].str.strip()
    df['Loser'] = df['Loser'].str.strip()

    # Replace NaN values with None
    df = df.replace({np.nan: None})
    df = df.where(pd.notnull(df), None)

    return df