import traceback
from fastapi import FastAPI
import pickle
import numpy as np
import pandas as pd
import joblib
import re
from datetime import datetime, time
import pytz

# Load the trained XGBoost model
model = joblib.load("xgb_model.pkl")

# Load the feature names from the trained model
model_features = model.get_booster().feature_names

app = FastAPI(root_path="/")

@app.get("/")
def home():
    return {"message": "Lead Scoring Model is Live!"}

def should_send_to_ai_caller(estimated_volume, country):
    try:
        max_attempts = 3  # Always fixed at 3
        
        # --- Country to timezone mapping ---
        country_tz = {
            "France": "Europe/Paris",
            "Italy": "Europe/Paris",
            "United Kingdom": "Europe/London",
            "Spain": "Europe/Madrid",
            "Germany": "Europe/Berlin"
        }

        # --- Only allow France and UK ---
        if country not in country_tz:
            print(f"Country not supported for AI routing: {country}")
            return {"send": False, "max_attempts": max_attempts}
        
        # Default timezone (fallback)
        timezone_str = country_tz.get(country, "UTC")
        tz = pytz.timezone(timezone_str)

        now = datetime.now(tz)
        current_time = now.time()
        weekday = now.weekday()  # 0 = Monday, 6 = Sunday

        # --- Time Windows ---
        early_morning = time(7, 0)
        late_morning = time(9, 0)
        evening_start = time(18, 0)
        evening_end = time(21, 0)
        business_start = time(9, 0)
        business_end = time(18, 0)
        weekend_end = time(21, 0)

        is_weekend = weekday >= 5  # Saturday or Sunday

        # Normalize TPV
        def parse_tpv_range(value):
            if not value or str(value).strip().lower() in {'none', ''}:
                return 'None'
            value = value.upper().replace('K', '000').replace('M', '000000')
            value = re.sub(r'[£$€¥]', '', value)
            value = value.replace(' – ', '-').replace(' ', '').strip()
            if '-' in value:
                low, high = map(int, re.findall(r'\d+', value))
                return f"{low}-{high}"
            elif value.isdigit():
                return f"{value}-{value}"
            return 'None'

        parsed_tpv = parse_tpv_range(estimated_volume)

        # --- Rule 1: All leads during early/late weekday hours ---
        if not is_weekend and (
            early_morning <= current_time <= late_morning or
            evening_start <= current_time <= evening_end
        ):
            return {"send": True, "max_attempts": max_attempts}

        # --- Rule 2: All leads on weekends between 7 AM and 8 PM ---
        if early_morning <= current_time <= weekend_end:
            if weekday == 5:  # Saturday
                if country in {"France", "United Kingdom", "Italy", "Spain", "Germany"}:
                    return {"send": True, "max_attempts": max_attempts}
            elif weekday == 6:  # Sunday
                if country in {"France", "United Kingdom", "Italy", "Spain"}:
                    return {"send": True, "max_attempts": max_attempts}

        # --- Rule 3: Weekday 9 AM–6 PM only if low TPV or None ---
        if not is_weekend and business_start <= current_time <= business_end:
            if country in {"France", "United Kingdom"}:
                if parsed_tpv in {'None', '0-35000'}:
                    return {"send": True, "max_attempts": max_attempts}
            elif country in {"Italy", "Spain"}:
                if parsed_tpv == '0-35000':
                    return {"send": True, "max_attempts": max_attempts}

        # Otherwise, don't send
        return {"send": False, "max_attempts": max_attempts}

    except Exception as e:
        print(f"[AI Caller Decision Error] {str(e)}")  
        return {"send": False, "max_attempts": 3}  
        
# Function to preprocess a single lead
def preprocess_single_lead(data):
    df = pd.DataFrame([data])  # Convert dict to DataFrame
    df.columns = df.columns.str.strip().str.replace('"', '').str.replace(';', ',')

    # Handle missing values
    cat_cols = df.select_dtypes(include=['object']).columns
    df[cat_cols] = df[cat_cols].fillna(pd.NA)
    num_cols = df.select_dtypes(include=['float64', 'int64']).columns
    df[num_cols] = df[num_cols].fillna(pd.NA)
    
    expected_columns = ['Email', 'Phone', 'GA Campaign', 'LP: Campaign', 'Lead Source', 'GA Source', 'Prospect product interest', 'Estimated Yearly Transaction Volume', 'Estimated Turnover']
    for col in expected_columns:
        if col not in df.columns:
            df[col] = pd.NA

    # Feature engineering (add other encoding as needed)
    df['Email Available'] = df['Email'].apply(lambda x: 1 if pd.notna(x) else 0)
    df['Phone Available'] = df['Phone'].apply(lambda x: 1 if pd.notna(x) else 0)
    df['GA Campaign Available'] = df['GA Campaign'].apply(lambda x: 1 if pd.notna(x) else 0)
    df['LP: Campaign Available'] = df['LP: Campaign'].apply(lambda x: 1 if pd.notna(x) else 0)
    df['Lead Source Available'] = df['Lead Source'].apply(lambda x: 1 if pd.notna(x) else 0)
    df['GA Source Available'] = df['GA Source'].apply(lambda x: 1 if pd.notna(x) else 0)

     # Assign default product interest based on estimated yearly transaction volume
    def assign_product_interest(row):
        if pd.isna(row['Prospect product interest']) or row['Prospect product interest'] == '':
            if pd.notna(row['Estimated Yearly Transaction Volume']):
                try:
                    value = row['Estimated Yearly Transaction Volume']
                    if '-' in value:
                        low, high = map(int, value.split('-'))
                        midpoint = (low + high) / 2
                    else:
                        midpoint = int(value)
                    
                    return 'Payment, POS Lite' if midpoint < 60000 else 'POS Pro, Kiosk'
                
                except Exception:
                    return 'Payment, POS Pro'  # Fallback value when transaction volume is invalid
            else:
                return ''  # Default when transaction volume is missing
        return row['Prospect product interest']
    
    df['Prospect product interest'] = df.apply(assign_product_interest, axis=1)

    df['POS Pro Available'] = df['Prospect product interest'].apply(lambda x: 1 if 'POS Pro' in str(x) else 0)
    df['Payment Available'] = df['Prospect product interest'].apply(lambda x: 1 if 'Payment' in str(x) else 0)
    df['Product Interest Available'] = df['Prospect product interest'].apply(lambda x: 1 if pd.notna(x) and x != '' else 0)

    df['Contacts Available'] = df[['Email Available', 'Phone Available']].sum(axis=1)
    df['Sources Available'] = df[['Lead Source Available', 'GA Source Available']].sum(axis=1)
    df['Campaigns Available'] = df[['GA Campaign Available', 'LP: Campaign Available']].sum(axis=1)

    # Drop the original 'Email', 'Phone', 'GA Campaign', and 'LP: Campaign' columns since they are now encoded
    df = df.drop(columns=['Email', 'Phone', 'GA Campaign', 'LP: Campaign', 'Lead Source', 'GA Source', 
                              'Email Available', 'Phone Available', 'GA Campaign Available', 'LP: Campaign Available', 
                              'Lead Source Available', 'GA Source Available', 'Prospect product interest'])

    # Apply function to convert to numeric ranges (same as in your training model)
    def convert_to_numeric_range(value):
        if pd.isna(value) or value == '':
            return "60000-100000"  # Default value when missing
        
        value = value.replace('\xa0', '').replace("'", '').replace(',', '').strip()  # Clean unwanted characters
        value = re.sub(r'[£$€¥]', '', value)  # Remove currency symbols
        value = value.replace(' – ', ' - ')  # Fix en dash to regular dash
        value = value.replace('K', '000').replace('M', '000000')

        if '+' in value:
            value = value.replace('+', '')  # Remove '+'
            if value.isdigit():  
                return f"{value}-{int(value) * 2}"  # Convert "5M+" to "5000000-10000000"

        if '-' in value:
            low, high = value.split('-')
            low = ''.join(re.findall(r'\d+', low))  # Keep only numeric characters
            high = ''.join(re.findall(r'\d+', high))  # Keep only numeric characters
            return f"{low}-{high}" if low and high else None

        value = ''.join(re.findall(r'\d+', value))  # Keep only numeric characters
        return value if value else "60000-100000"  # Default when not interpretable
    
    df['Estimated Yearly Transaction Volume'] = df['Estimated Yearly Transaction Volume'].apply(convert_to_numeric_range)
    df['Estimated Turnover'] = df['Estimated Turnover'].apply(convert_to_numeric_range)

    # Apply the same alignment and combination logic for 'Estimated Turnover' to 'Estimated Yearly Transaction Volume'
    def align_turnover_to_transaction(value, transaction_values):
        if pd.isna(value):
            return value  # Handle missing values

        transaction_midpoints = []
        for transaction_range in transaction_values:
            if '-' in transaction_range:
                low, high = map(int, transaction_range.split('-'))
                transaction_midpoints.append((low + high) // 2)
            else:
                transaction_midpoints.append(int(transaction_range))

        if '-' in value:
            low, high = map(int, value.split('-'))
            turnover_midpoint = (low + high) // 2
        else:
            turnover_midpoint = int(value)

        closest_index = min(range(len(transaction_midpoints)), key=lambda i: abs(transaction_midpoints[i] - turnover_midpoint))
        return transaction_values[closest_index]

    unique_transaction_values = df['Estimated Yearly Transaction Volume'].dropna().unique()
    df['Estimated Turnover'] = df['Estimated Turnover'].apply(lambda x: align_turnover_to_transaction(x, unique_transaction_values))

    # Combine the 'Estimated Yearly Transaction Volume' and 'Estimated Turnover' columns into one
    def combine_transaction_and_turnover(row):
        if pd.notna(row["Estimated Yearly Transaction Volume"]) and row["Estimated Yearly Transaction Volume"] != "0-0":
            return row["Estimated Yearly Transaction Volume"]
        elif pd.notna(row["Estimated Turnover"]) and row["Estimated Turnover"] != "0-0":
            return row["Estimated Turnover"]
        return "0-0"  # Default if both are missing
    
    df["Combined Volume and Turnover"] = df.apply(combine_transaction_and_turnover, axis=1)

    # Calculate midpoints for combined column
    def calculate_midpoint(value):
        if pd.isna(value) or '-' not in value:
            return None
        try:
            start, end = map(int, value.split('-'))
            return (start + end) / 2
        except ValueError:
            return None
    
    df['Combined Midpoint'] = df['Combined Volume and Turnover'].apply(calculate_midpoint)

    # Generate dynamic bins for 'Combined Midpoint'
    def generate_bins_and_labels():
        '''midpoints = [x for x in midpoints if pd.notna(x)]
        if len(midpoints) < 2:
            # If we have less than 2 values, fallback to a single default bin
            return [0, 1], ['Bin 1']
    
        unique_values = sorted(set(midpoints))
        bins = [min(unique_values)] + unique_values + [max(unique_values) * 1.1]
        bins = sorted(set(bins))  # Ensure bins are unique
    
        if len(bins) - 1 != len(unique_values):  
            labels = [f'Bin {i+1}' for i in range(len(bins) - 1)]
        else:
            labels = [f'Bin {i+1}' for i in range(len(unique_values))]
        return bins, labels'''
        # Predefined bins based on your given ranges
        bins = [0, 35000, 60000, 100000, 200000, 400000, 600000, 1000000, 2000000, 5000000, float('inf')]
        labels = [f'Bin {i+1}' for i in range(len(bins) - 1)]
        return bins, labels
    
    bins_combined, labels_combined = generate_bins_and_labels()
    df['Combined Volume Category'] = pd.cut(df['Combined Midpoint'], bins=bins_combined, labels=labels_combined, include_lowest=True)

    # One-hot encode categorical columns
    cat_columns = ['Pos Pro Segmentation Level 3', 'Combined Volume Category', 'Sourcing Direction']
    df_encoded = pd.get_dummies(df, columns=cat_columns, drop_first=True)

    # Drop columns that are not useful or of type 'object'
    drop_columns = ['Estimated Yearly Transaction Volume', 'Estimated Turnover', 'Combined Volume and Turnover']
    df_encoded = df_encoded.drop(columns=drop_columns)

    # Ensure that the final df has the same columns as the model expects
    for col in model_features:
        if col not in df_encoded.columns:
            df_encoded[col] = 0  # Assign 0 to missing columns

    df_encoded = df_encoded[model_features]  # Ensure the correct order of features

    return df_encoded

low_score_counter = {"POS Pro": 0, "Payment": 0, "Payment or POS Lite": 0, "global_count": 0, "score_1_count": 0, "score_2_count": 0, "score_3_count": 0}

@app.post("/api/predict")
async def predict(lead: dict):
    try:
        global low_score_counter  
        
        # Preprocess the lead data
        processed_lead = preprocess_single_lead(lead)

        # Predict probability of conversion
        probability = model.predict_proba(processed_lead)[0, 1]  # Class 1 probability

        # Cast to regular Python types
        probability = float(probability)  # Convert numpy.float32 to regular float
        
        
        # Map probability to score (1 to 10)
        score = int(np.ceil(probability * 10))  # Compute score
        score = score + 2 if 4 <= score < 9 else score  # Handle score to not push extremes

        estimated_volume = lead.get("Estimated Yearly Transaction Volume")
        product_interest = str(lead.get("Prospect product interest", "")).strip() 
        sourcing_direction = str(lead.get("Sourcing Direction", "")).strip()
        lp_campaign = str(lead.get("LP: Campaign", "")).strip()
        Leads_source_website = str(lead.get("Leads_source_Website__c", "")).strip()
        
        if product_interest:
            product_interest = set(map(str.strip, product_interest.split(";")))
        else:
            product_interest = set()

        # Determine if it should be sent to AI caller
        country = lead.get("Country")
        if "SAP-" in Leads_source_website:
            send_to_ai_caller = False
            max_attempts = 3
        elif sourcing_direction.lower() == 'inbound' and lp_campaign != 'London-Coffee-Festival-2025':
            ai_caller_decision = should_send_to_ai_caller(estimated_volume, country)
            send_to_ai_caller = ai_caller_decision["send"]
            max_attempts = ai_caller_decision["max_attempts"]
        else:
            send_to_ai_caller = False
            max_attempts = 3

        # Map score to conversion category
        if score >= 7:
            conversion_category = "Hot"
        elif 4 <= score <= 6:
            conversion_category = "Warm"
        else:
            conversion_category = "Cold"
        
        return {
            "score": score,
            "conversion_probability": conversion_category,  # Return as string category
            "send_to_ai_caller": False,
            "max_attempts": max_attempts
        }

    except Exception as e:
        # Capture and return detailed error information
        error_message = str(e)
        stack_trace = traceback.format_exc()
        return {
            "score": 8,
            "conversion_probability": "Hot",  # Since score 8 is in the "Warm" category
            "error": f"An error occurred, defaulting score to 8. {error_message}",
            "stack_trace": stack_trace
            }