import os
import pandas as pd
import sys
import logging
from typing import Optional, List
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings, SettingsConfigDict

# Ensure stdout encoding for Unicode
sys.stdout.reconfigure(encoding='utf-8')

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class Settings(BaseSettings):
    """Settings for Cash Flow Statement CSV extraction"""
    
    model_config = SettingsConfigDict(
        env_prefix='CFS_',
        env_file='.env',
        extra='ignore'
    )
    
    excel_file_path: str = Field(default="data/input/In Lakhs  BS_FY 23-24 V5 - Final.xlsx")
    output_folder: str = Field(default="data/csv_notes_cfs")
    note_16_23_sheet: str = Field(default="Note 16-23")
    note_2_8_sheet: str = Field(default="Note 2 - 8")
    note_9_sheet: str = Field(default="Note 9")
    note_10_15_sheet: str = Field(default="Note 10-15")
    note_24_30_sheet: str = Field(default="Note 24-30")
    skiprows: int = Field(default=3)

settings = Settings()

class NoteCSVInfo(BaseModel):
    name: str
    rows: int

def get_available_sheets(xls: pd.ExcelFile) -> List[str]:
    """Get list of available sheet names"""
    return xls.sheet_names

def find_trial_balance_sheet(xls: pd.ExcelFile) -> Optional[str]:
    """Find the main trial balance sheet"""
    sheet_names = get_available_sheets(xls)
    
    # Common trial balance sheet name patterns
    trial_balance_patterns = [
        'trial balance',
        'tb',
        'trialbalance',
        'trial_balance',
        'sheet1',
        'sheet 1'
    ]
    
    for sheet in sheet_names:
        sheet_lower = sheet.lower()
        for pattern in trial_balance_patterns:
            if pattern in sheet_lower:
                logger.info(f"Found trial balance sheet: {sheet}")
                return sheet
    
    # If no match, return the first sheet
    logger.info(f"No trial balance pattern matched, using first sheet: {sheet_names[0]}")
    return sheet_names[0] if sheet_names else None

def extract_from_trial_balance(xls: pd.ExcelFile, output_folder: str) -> List[NoteCSVInfo]:
    """
    Extract data from a standard trial balance file (fallback method)
    """
    tb_sheet = find_trial_balance_sheet(xls)
    
    if not tb_sheet:
        raise ValueError("No suitable sheet found in Excel file")
    
    logger.info(f"Using sheet '{tb_sheet}' as trial balance")
    
    # Try different skiprows values to find the header
    for skiprows in [0, 1, 2, 3, 4, 5]:
        try:
            df = xls.parse(tb_sheet, skiprows=skiprows)
            df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
            
            # Check if we have reasonable data
            if len(df) > 5 and len(df.columns) >= 2:
                logger.info(f"Successfully parsed with skiprows={skiprows}, rows={len(df)}, cols={len(df.columns)}")
                
                # Save the entire trial balance
                os.makedirs(output_folder, exist_ok=True)
                output_path = os.path.join(output_folder, "Trial_Balance_Full.csv")
                df.to_csv(output_path, index=False)
                logger.info(f"Saved trial balance to: {output_path}")
                
                return [NoteCSVInfo(name="Trial_Balance_Full.csv", rows=df.shape[0])]
        except Exception as e:
            logger.debug(f"Failed with skiprows={skiprows}: {e}")
            continue
    
    raise ValueError("Could not parse trial balance sheet with any skiprows configuration")

def clean_note(xls: pd.ExcelFile, sheet_name: str, skiprows: int = 3) -> pd.DataFrame:
    """
    Parse and clean a sheet from the Excel file.
    """
    if sheet_name not in xls.sheet_names:
        raise ValueError(f"Sheet '{sheet_name}' not found. Available sheets: {', '.join(xls.sheet_names)}")
    
    df = xls.parse(sheet_name, skiprows=skiprows)
    df = df.dropna(how='all').dropna(axis=1, how='all').reset_index(drop=True)
    return df

def export_note_to_csv(df: pd.DataFrame, filename: str, output_folder: str) -> NoteCSVInfo:
    """
    Export DataFrame to CSV and return info.
    """
    output_path = os.path.join(output_folder, filename)
    df.to_csv(output_path, index=False)
    logger.info(f"Exported: {output_path}")
    return NoteCSVInfo(name=filename, rows=df.shape[0])

def main() -> None:
    """
    Main function to extract notes from Excel and export as CSVs.
    Handles both structured note files and standard trial balance files.
    """
    # Use command-line argument for Excel file path if provided
    excel_path = settings.excel_file_path
    if len(sys.argv) > 1:
        excel_path = sys.argv[1]
        logger.info(f"Excel file path from argument: {excel_path}")
    else:
        logger.info(f"Excel file path from settings: {excel_path}")
    
    if not os.path.exists(excel_path):
        logger.error(f"Excel file not found: {excel_path}")
        sys.exit(1)
    
    try:
        xls = pd.ExcelFile(excel_path)
    except Exception as e:
        logger.error(f"Failed to open Excel file: {e}")
        sys.exit(1)
    
    available_sheets = get_available_sheets(xls)
    logger.info(f"Available sheets in file: {', '.join(available_sheets)}")
    
    # Ensure output folder exists
    os.makedirs(settings.output_folder, exist_ok=True)
    
    # Try to extract from structured notes first
    expected_sheets = [
        settings.note_16_23_sheet,
        settings.note_2_8_sheet,
        settings.note_9_sheet,
        settings.note_10_15_sheet,
        settings.note_24_30_sheet
    ]
    
    # Check if this is a structured notes file
    has_structured_notes = any(sheet in available_sheets for sheet in expected_sheets)
    
    if has_structured_notes:
        logger.info("="*60)
        logger.info("Detected structured notes file - extracting from individual note sheets")
        logger.info("="*60)
        
        try:
            info_16_23 = export_note_to_csv(
                clean_note(xls, settings.note_16_23_sheet, settings.skiprows),
                "Note_16_to_23_Full.csv",
                settings.output_folder
            )
            info_2_8 = export_note_to_csv(
                clean_note(xls, settings.note_2_8_sheet, settings.skiprows),
                "Note_2_to_8_Full.csv",
                settings.output_folder
            )
            info_9 = export_note_to_csv(
                clean_note(xls, settings.note_9_sheet, settings.skiprows),
                "Note_9_Full.csv",
                settings.output_folder
            )
            info_10_15 = export_note_to_csv(
                clean_note(xls, settings.note_10_15_sheet, settings.skiprows),
                "Note_10_to_15_Full.csv",
                settings.output_folder
            )
            info_24_30 = export_note_to_csv(
                clean_note(xls, settings.note_24_30_sheet, settings.skiprows),
                "Note_24_to_30_Full.csv",
                settings.output_folder
            )
            
            logger.info("="*60)
            logger.info("EXTRACTION SUMMARY")
            logger.info("="*60)
            logger.info(f"Note 16-23: {info_16_23.rows} rows")
            logger.info(f"Note 2-8:   {info_2_8.rows} rows")
            logger.info(f"Note 9:     {info_9.rows} rows")
            logger.info(f"Note 10-15: {info_10_15.rows} rows")
            logger.info(f"Note 24-30: {info_24_30.rows} rows")
            logger.info("="*60)
            
        except ValueError as e:
            logger.error(f"Error extracting from structured notes: {e}")
            sys.exit(1)
    else:
        logger.info("="*60)
        logger.info("Standard trial balance file detected - using fallback extraction")
        logger.info("="*60)
        logger.warning("⚠️  Cash Flow Statement generation may be limited with trial balance data only")
        logger.info("")
        logger.info("For complete Cash Flow Statement, please provide a file with these sheets:")
        logger.info("  • Note 16-23 (Revenue details)")
        logger.info("  • Note 2-8 (Share capital, reserves, liabilities)")
        logger.info("  • Note 9 (Fixed assets)")
        logger.info("  • Note 10-15 (Current assets, loans)")
        logger.info("  • Note 24-30 (Additional disclosures)")
        logger.info("="*60)
        
        try:
            csv_infos = extract_from_trial_balance(xls, settings.output_folder)
            
            logger.info("="*60)
            logger.info("EXTRACTION SUMMARY")
            logger.info("="*60)
            for info in csv_infos:
                logger.info(f"{info.name}: {info.rows} rows")
            logger.info("="*60)
            logger.info("✓ Trial balance data extracted successfully")
            logger.info("Note: Cash Flow generation will proceed with available data")
            
        except Exception as e:
            logger.error(f"Error extracting from trial balance: {e}")
            logger.error("Please check if the file contains valid financial data")
            sys.exit(1)

if __name__ == "__main__":
    main()