import pandas as pd
from datetime import datetime, timedelta
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def get_evals_for_journey(
    journey_grn, 
    runs_df, 
    turns_df, 
    ai_script_evals_df, 
    journey_evals_df,
    master_data,
    aggregation_type='mean', 
    filter_days=None,
    group_by_plots=True,
    runtime_only=False,
    group_by_thread=False
):
    """
    Query 2: Get all evals that have been run on a given Journey.
    
    Args:
        journey_grn (str): Journey GRN
        runs_df (pd.DataFrame): Runs data
        turns_df (pd.DataFrame): Turns data
        ai_script_evals_df (pd.DataFrame): AI script evaluations data
        journey_evals_df (pd.DataFrame): Journey evaluations data
        master_data (dict): Master configuration data
        aggregation_type (str): How to aggregate scores ('mean', 'median', etc.)
        filter_days (int, optional): Filter data to last N days
        group_by_plots (bool): Whether to group results by plots
        runtime_only (bool): Filter to runtime evaluations only
        group_by_thread (bool): Whether to group results by thread_id instead of plot_grn
        
    Returns:
        dict: Evaluation results
    """
    # Filter runs by journey GRN
    filtered_runs = runs_df[runs_df['journey_grn'] == journey_grn].copy()
    
    # Apply runtime filter if specified
    if runtime_only:
        filtered_runs = filtered_runs[filtered_runs['is_runtime'] == True]
    
    # Apply date filter if specified
    if filter_days is not None and filter_days > 0:
        cutoff_date = datetime.now() - timedelta(days=filter_days)
        filtered_runs = filtered_runs[filtered_runs['start_time'] >= cutoff_date]
    
    if filtered_runs.empty:
        return {"message": f"No evaluations found for journey {journey_grn} with the specified filters"}
    
    run_ids = filtered_runs['run_id'].tolist()

    logger.info(f"Found {len(run_ids)} runs for journey {journey_grn}")
    
    # Get journey evaluations for these runs
    filtered_journey_evals = journey_evals_df[journey_evals_df['run_id'].isin(run_ids)]
    
    # Get turns for these runs
    filtered_turns = turns_df[turns_df['run_id'].isin(run_ids)]
    
    # Join turns with filtered runs
    if group_by_thread and runtime_only:
        # Join with thread_id info
        turns_with_thread = filtered_turns.merge(
            filtered_runs[['run_id', 'thread_id']], 
            on='run_id', 
            how='left'
        )
        
        # Get AI script evaluations for these turns
        filtered_ai_script_evals = ai_script_evals_df[
            ai_script_evals_df['turn_id'].isin(turns_with_thread['turn_id'])
        ]
        
        # Add thread information to AI script evaluations
        ai_script_evals_with_thread = filtered_ai_script_evals.merge(
            turns_with_thread[['turn_id', 'thread_id']], 
            on='turn_id', 
            how='left'
        )
        
        # Add thread information to journey evaluations
        journey_evals_with_thread = filtered_journey_evals.merge(
            filtered_runs[['run_id', 'thread_id']], 
            on='run_id', 
            how='left'
        )
    else:
        # Join with plot info (original behavior)
        turns_with_plot = filtered_turns.merge(
            filtered_runs[['run_id', 'plot_grn']], 
            on='run_id', 
            how='left'
        )
        
        # Get AI script evaluations for these turns
        filtered_ai_script_evals = ai_script_evals_df[
            ai_script_evals_df['turn_id'].isin(turns_with_plot['turn_id'])
        ]
        
        # Add plot information to AI script evaluations
        ai_script_evals_with_plot = filtered_ai_script_evals.merge(
            turns_with_plot[['turn_id', 'plot_grn']], 
            on='turn_id', 
            how='left'
        )
        
        # Add plot information to journey evaluations
        journey_evals_with_plot = filtered_journey_evals.merge(
            filtered_runs[['run_id', 'plot_grn']], 
            on='run_id', 
            how='left'
        )
    
    # Define a helper function for aggregation
    def aggregate_scores(scores):
        if aggregation_type == 'mean':
            return scores.mean()
        elif aggregation_type == 'median':
            return scores.median()
        elif aggregation_type == 'count':
            return len(scores)
        elif aggregation_type == 'min':
            return scores.min()
        elif aggregation_type == 'max':
            return scores.max()
    
    if group_by_thread and runtime_only:
        # Group by thread_id instead of plot
        result = {}
        
        # Group journey evaluations by thread
        for thread_id, thread_group in journey_evals_with_thread.groupby('thread_id'):
            # Handle null thread_ids
            thread_name = "unknown_thread" if pd.isna(thread_id) else thread_id
            
            if thread_name not in result:
                result[thread_name] = {
                    "journeyEvals": {},
                    "aiScriptEvals": {}
                }
            
            for eval_name, eval_group in thread_group.groupby('eval_name'):
                result[thread_name]["journeyEvals"][eval_name] = {}
                
                for metric_name, metric_group in eval_group.groupby('metric_name'):
                    result[thread_name]["journeyEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
        
        # Group AI script evaluations by thread
        for thread_id, thread_group in ai_script_evals_with_thread.groupby('thread_id'):
            # Handle null thread_ids
            thread_name = "unknown_thread" if pd.isna(thread_id) else thread_id
            
            if thread_name not in result:
                result[thread_name] = {
                    "journeyEvals": {},
                    "aiScriptEvals": {}
                }
            
            for eval_name, eval_group in thread_group.groupby('eval_name'):
                result[thread_name]["aiScriptEvals"][eval_name] = {}
                
                for metric_name, metric_group in eval_group.groupby('metric_name'):
                    result[thread_name]["aiScriptEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    elif group_by_plots:
        # Original behavior: Group by plot first
        result = {}
        
        # Group journey evaluations by plot, then by eval_name and metric_name
        for plot_grn, plot_group in journey_evals_with_plot.groupby('plot_grn'):
            # Handle null plot_grns
            plot_name = "unknown_plot" if pd.isna(plot_grn) else plot_grn.split(':')[-1]
            
            if plot_name not in result:
                result[plot_name] = {
                    "journeyEvals": {},
                    "aiScriptEvals": {}
                }
            
            for eval_name, eval_group in plot_group.groupby('eval_name'):
                result[plot_name]["journeyEvals"][eval_name] = {}
                
                for metric_name, metric_group in eval_group.groupby('metric_name'):
                    result[plot_name]["journeyEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
        
        # Group AI script evaluations by plot, then by eval_name and metric_name
        for plot_grn, plot_group in ai_script_evals_with_plot.groupby('plot_grn'):
            # Handle null plot_grns
            plot_name = "unknown_plot" if pd.isna(plot_grn) else plot_grn.split(':')[-1]
            
            if plot_name not in result:
                result[plot_name] = {
                    "journeyEvals": {},
                    "aiScriptEvals": {}
                }
            
            for eval_name, eval_group in plot_group.groupby('eval_name'):
                result[plot_name]["aiScriptEvals"][eval_name] = {}
                
                for metric_name, metric_group in eval_group.groupby('metric_name'):
                    result[plot_name]["aiScriptEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    else:
        # Don't group by plots or threads
        result = {
            "journeyEvals": {},
            "aiScriptEvals": {}
        }
        
        # Process all journey evaluations together
        for eval_name, eval_group in filtered_journey_evals.groupby('eval_name'):
            result["journeyEvals"][eval_name] = {}
            
            for metric_name, metric_group in eval_group.groupby('metric_name'):
                result["journeyEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
        
        # Process all AI script evaluations together
        for eval_name, eval_group in filtered_ai_script_evals.groupby('eval_name'):
            result["aiScriptEvals"][eval_name] = {}
            
            for metric_name, metric_group in eval_group.groupby('metric_name'):
                result["aiScriptEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    return result

def get_evals_for_ai_script(
    script_grn, 
    runs_df, 
    turns_df, 
    ai_script_evals_df, 
    journey_evals_df,
    master_data,
    aggregation_type='mean', 
    filter_days=None,
    group_by_plots=True,
    runtime_only=False,
    group_by_thread=False
):
    """
    Query 3: Get all evals that have been run on an AIScript.
    
    Args:
        script_grn (str): AI Script GRN
        runs_df (pd.DataFrame): Runs data
        turns_df (pd.DataFrame): Turns data
        ai_script_evals_df (pd.DataFrame): AI script evaluations data
        journey_evals_df (pd.DataFrame): Journey evaluations data
        master_data (dict): Master configuration data
        aggregation_type (str): How to aggregate scores ('mean', 'median', etc.)
        filter_days (int, optional): Filter data to last N days
        group_by_plots (bool): Whether to group results by plots
        runtime_only (bool): Filter to runtime evaluations only
        group_by_thread (bool): Whether to group results by thread_id instead of plot_grn
        
    Returns:
        dict: Evaluation results
    """
    # Filter turns by AI script GRN
    filtered_turns = turns_df[turns_df['ai_script_grn'] == script_grn].copy()
    
    # Get the corresponding runs
    run_ids = filtered_turns['run_id'].tolist()
    runs_for_turns = runs_df[runs_df['run_id'].isin(run_ids)].copy()

    logger.info(f"Found {len(run_ids)} runs for AI script {script_grn}")
    
    # Apply runtime filter if specified
    if runtime_only:
        runs_for_turns = runs_for_turns[runs_for_turns['is_runtime'] == True]
        # Get the filtered run_ids back
        filtered_run_ids = runs_for_turns['run_id'].tolist()
        # Filter turns again
        filtered_turns = filtered_turns[filtered_turns['run_id'].isin(filtered_run_ids)]
    
    # Apply date filter if specified
    if filter_days is not None and filter_days > 0:
        cutoff_date = datetime.now() - timedelta(days=filter_days)
        filtered_turns = filtered_turns[filtered_turns['timestamp'] >= cutoff_date]
    
    if filtered_turns.empty:
        return {"message": f"No evaluations found for AI script {script_grn} with the specified filters"}
    
    turn_ids = filtered_turns['turn_id'].tolist()
    
    # Get AI script evaluations for these turns
    filtered_ai_script_evals = ai_script_evals_df[ai_script_evals_df['turn_id'].isin(turn_ids)]
    
    # Define a helper function for aggregation
    def aggregate_scores(scores):
        if aggregation_type == 'mean':
            return scores.mean()
        elif aggregation_type == 'median':
            return scores.median()
        elif aggregation_type == 'count':
            return len(scores)
        elif aggregation_type == 'min':
            return scores.min()
        elif aggregation_type == 'max':
            return scores.max()
        
    if group_by_thread and runtime_only:
        # Join with runs to get thread information
        if 'thread_id' in runs_for_turns.columns:
            turns_with_thread = filtered_turns.merge(
                runs_for_turns[['run_id', 'thread_id']], 
                on='run_id', 
                how='left'
            )
            
            # Add thread information to AI script evaluations
            ai_script_evals_with_thread = filtered_ai_script_evals.merge(
                turns_with_thread[['turn_id', 'thread_id']], 
                on='turn_id', 
                how='left'
            )
            
            # Group by thread
            result = {}
            
            # Group AI script evaluations by thread, then by eval_name and metric_name
            for thread_id, thread_group in ai_script_evals_with_thread.groupby('thread_id'):
                # Handle null thread_ids
                thread_name = "unknown_thread" if pd.isna(thread_id) else thread_id
                
                if thread_name not in result:
                    result[thread_name] = {}
                
                for eval_name, eval_group in thread_group.groupby('eval_name'):
                    result[thread_name][eval_name] = {}
                    
                    for metric_name, metric_group in eval_group.groupby('metric_name'):
                        result[thread_name][eval_name][metric_name] = aggregate_scores(metric_group['score'])
        else:
            # Fall back to non-grouped results if thread_id column doesn't exist
            result = {}
            
            # Group AI script evaluations by eval_name and metric_name
            for eval_name, eval_group in filtered_ai_script_evals.groupby('eval_name'):
                result[eval_name] = {}
                
                for metric_name, metric_group in eval_group.groupby('metric_name'):
                    result[eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    elif group_by_plots and 'plot_grn' in runs_for_turns.columns:
        # Original behavior: Join with runs to get plot information
        turns_with_plot = filtered_turns.merge(
            runs_for_turns[['run_id', 'plot_grn']], 
            on='run_id', 
            how='left'
        )
        
        # Add plot information to AI script evaluations
        ai_script_evals_with_plot = filtered_ai_script_evals.merge(
            turns_with_plot[['turn_id', 'plot_grn']], 
            on='turn_id', 
            how='left'
        )
        
        # Group by plot
        result = {}
        
        # Group AI script evaluations by plot, then by eval_name and metric_name
        for plot_grn, plot_group in ai_script_evals_with_plot.groupby('plot_grn'):
            # Handle null plot_grns
            plot_name = 'unknown_plot' if pd.isna(plot_grn) else plot_grn.split(':')[-1]
            
            if plot_name not in result:
                result[plot_name] = {}
            
            for eval_name, eval_group in plot_group.groupby('eval_name'):
                result[plot_name][eval_name] = {}
                
                for metric_name, metric_group in eval_group.groupby('metric_name'):
                    result[plot_name][eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    else:
        # Don't group by plots or threads
        result = {}
        
        # Group AI script evaluations by eval_name and metric_name
        for eval_name, eval_group in filtered_ai_script_evals.groupby('eval_name'):
            result[eval_name] = {}
            
            for metric_name, metric_group in eval_group.groupby('metric_name'):
                result[eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    return result

# Keep the rest of the functions unchanged
def get_evals_for_plot(
    plot_grn, 
    runs_df, 
    turns_df, 
    ai_script_evals_df, 
    journey_evals_df,
    master_data,
    aggregation_type='mean', 
    filter_days=None,
    runtime_only=False,
    group_by_plots=True
):
    """
    Query 1: Get all available evals that have been run on a given plot.
    
    Args:
        plot_grn (str): Plot GRN
        runs_df (pd.DataFrame): Runs data
        turns_df (pd.DataFrame): Turns data
        ai_script_evals_df (pd.DataFrame): AI script evaluations data
        journey_evals_df (pd.DataFrame): Journey evaluations data
        master_data (dict): Master configuration data
        aggregation_type (str): How to aggregate scores ('mean', 'median', etc.)
        filter_days (int, optional): Filter data to last N days
        runtime_only (bool): Filter to runtime evaluations only
        
    Returns:
        dict: Evaluation results
    """
    # Filter runs by plot GRN
    filtered_runs = runs_df[runs_df['plot_grn'] == plot_grn].copy()
    
    # Apply runtime filter if specified
    if runtime_only:
        filtered_runs = filtered_runs[filtered_runs['is_runtime'] == True]
    
    # Apply date filter if specified
    if filter_days is not None and filter_days > 0:
        cutoff_date = datetime.now() - timedelta(days=filter_days)
        filtered_runs = filtered_runs[filtered_runs['start_time'] >= cutoff_date]
    
    if filtered_runs.empty:
        return {"message": f"No evaluations found for plot {plot_grn} with the specified filters"}
    
    run_ids = filtered_runs['run_id'].tolist()
    
    # Get journey evaluations for these runs
    filtered_journey_evals = journey_evals_df[journey_evals_df['run_id'].isin(run_ids)]
    
    # Get turns for these runs
    filtered_turns = turns_df[turns_df['run_id'].isin(run_ids)]
    turn_ids = filtered_turns['turn_id'].tolist()
    
    # Get AI script evaluations for these turns
    filtered_ai_script_evals = ai_script_evals_df[ai_script_evals_df['turn_id'].isin(turn_ids)]
    
    # Define a helper function for aggregation
    def aggregate_scores(scores):
        if aggregation_type == 'mean':
            return scores.mean()
        elif aggregation_type == 'median':
            return scores.median()
        elif aggregation_type == 'count':
            return len(scores)
        elif aggregation_type == 'min':
            return scores.min()
        elif aggregation_type == 'max':
            return scores.max()
    
    # Group journey evaluations by eval_name and metric_name, then aggregate
    journey_eval_result = {}
    for eval_name, group in filtered_journey_evals.groupby('eval_name'):
        journey_eval_result[eval_name] = {}
        for metric_name, scores in group.groupby('metric_name'):
            journey_eval_result[eval_name][metric_name] = aggregate_scores(scores['score'])
    
    # Group AI script evaluations by eval_name and metric_name, then aggregate
    ai_script_eval_result = {}
    for eval_name, group in filtered_ai_script_evals.groupby('eval_name'):
        ai_script_eval_result[eval_name] = {}
        for metric_name, scores in group.groupby('metric_name'):
            ai_script_eval_result[eval_name][metric_name] = aggregate_scores(scores['score'])
    
    return {
        "journeyEvals": journey_eval_result,
        "aiScriptEvals": ai_script_eval_result
    }

def get_shared_evals(
    runs_df, 
    turns_df, 
    ai_script_evals_df, 
    journey_evals_df,
    master_data,
    aggregation_type='mean', 
    filter_days=None,
    runtime_only=False
):
    """
    Query 4: Get all shared evals across all runs.
    
    Args:
        runs_df (pd.DataFrame): Runs data
        turns_df (pd.DataFrame): Turns data
        ai_script_evals_df (pd.DataFrame): AI script evaluations data
        journey_evals_df (pd.DataFrame): Journey evaluations data
        master_data (dict): Master configuration data
        aggregation_type (str): How to aggregate scores ('mean', 'median', etc.)
        filter_days (int, optional): Filter data to last N days
        runtime_only (bool): Filter to runtime evaluations only
        
    Returns:
        dict: Evaluation results
    """
    # Find all shared evals from master data
    shared_ai_script_evals = master_data['ai_script_evals'][
        master_data['ai_script_evals']['is_shared_eval'] == True
    ]['name'].tolist()
    
    shared_journey_evals = master_data['journey_evals'][
        master_data['journey_evals']['is_shared_eval'] == True
    ]['name'].tolist()
    
    # Apply runtime filter and date filter if specified
    filtered_runs = runs_df.copy()
    
    if runtime_only:
        filtered_runs = filtered_runs[filtered_runs['is_runtime'] == True]
    
    if filter_days is not None and filter_days > 0:
        cutoff_date = datetime.now() - timedelta(days=filter_days)
        filtered_runs = filtered_runs[filtered_runs['start_time'] >= cutoff_date]
    
    if filtered_runs.empty:
        return {"message": "No evaluations found with the specified filters"}
    
    run_ids = filtered_runs['run_id'].tolist()
    
    # Get turns for these runs
    filtered_turns = turns_df[turns_df['run_id'].isin(run_ids)]
    turn_ids = filtered_turns['turn_id'].tolist()
    
    # Filter AI script evals to only include shared evals
    filtered_ai_script_evals = ai_script_evals_df[
        (ai_script_evals_df['turn_id'].isin(turn_ids)) & 
        (ai_script_evals_df['eval_name'].isin(shared_ai_script_evals))
    ]
    
    # Filter journey evals to only include shared evals
    filtered_journey_evals = journey_evals_df[
        (journey_evals_df['run_id'].isin(run_ids)) & 
        (journey_evals_df['eval_name'].isin(shared_journey_evals))
    ]
    
    # Define a helper function for aggregation
    def aggregate_scores(scores):
        if aggregation_type == 'mean':
            return scores.mean()
        elif aggregation_type == 'median':
            return scores.median()
        elif aggregation_type == 'count':
            return len(scores)
        elif aggregation_type == 'min':
            return scores.min()
        elif aggregation_type == 'max':
            return scores.max()
    
    # Initialize result structure
    result = {
        "aiScriptEvals": {},
        "journeyEvals": {}
    }
    
    # Aggregate AI script evaluations
    for eval_name in shared_ai_script_evals:
        eval_group = filtered_ai_script_evals[filtered_ai_script_evals['eval_name'] == eval_name]
        
        if not eval_group.empty:
            result["aiScriptEvals"][eval_name] = {}
            
            for metric_name, metric_group in eval_group.groupby('metric_name'):
                result["aiScriptEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    # Aggregate journey evaluations
    for eval_name in shared_journey_evals:
        eval_group = filtered_journey_evals[filtered_journey_evals['eval_name'] == eval_name]
        
        if not eval_group.empty:
            result["journeyEvals"][eval_name] = {}
            
            for metric_name, metric_group in eval_group.groupby('metric_name'):
                result["journeyEvals"][eval_name][metric_name] = aggregate_scores(metric_group['score'])
    
    return result