import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import io
import traceback
import sys
from datetime import datetime

# Import ML libraries with error handling
try:
    from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
    from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
    from sklearn.preprocessing import LabelEncoder
    st.success("✅ Sklearn imported successfully")
except ImportError as e:
    st.error(f"❌ Sklearn import error: {e}")

# Import custom modules with error handling
try:
    from NoCodeTextClassifier.EDA import Informations, Visualizations
    from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization  
    from NoCodeTextClassifier.models import Models
    st.success("✅ NoCodeTextClassifier imported successfully")
except ImportError as e:
    st.error(f"❌ NoCodeTextClassifier import error: {e}")
    st.info("Please ensure NoCodeTextClassifier package is installed")

# Set page config
st.set_page_config(page_title="Debug Text Classification", page_icon="🔍", layout="wide")

# Debug section
st.sidebar.header("🔍 Debug Information")
debug_mode = st.sidebar.checkbox("Enable Debug Mode", value=True)

def debug_log(message, level="INFO"):
    """Debug logging function"""
    if debug_mode:
        timestamp = datetime.now().strftime("%H:%M:%S")
        st.sidebar.write(f"**{timestamp} [{level}]:** {message}")

def detailed_error_info(e):
    """Get detailed error information"""
    error_type = type(e).__name__
    error_message = str(e)
    error_traceback = traceback.format_exc()
    
    return {
        'type': error_type,
        'message': error_message,
        'traceback': error_traceback
    }

def inspect_uploaded_file(uploaded_file):
    """Inspect uploaded file properties"""
    debug_log("🔍 Inspecting uploaded file...")
    
    try:
        file_info = {
            'name': uploaded_file.name,
            'type': uploaded_file.type,
            'size': uploaded_file.size,
            'file_id': getattr(uploaded_file, 'file_id', 'Not available')
        }
        
        debug_log(f"File name: {file_info['name']}")
        debug_log(f"File type: {file_info['type']}")
        debug_log(f"File size: {file_info['size']} bytes")
        debug_log(f"File ID: {file_info['file_id']}")
        
        # Try to read first few bytes
        uploaded_file.seek(0)
        first_bytes = uploaded_file.read(100)
        debug_log(f"First 100 bytes type: {type(first_bytes)}")
        debug_log(f"First 100 bytes preview: {first_bytes[:50]}...")
        
        # Reset file pointer
        uploaded_file.seek(0)
        
        return file_info
        
    except Exception as e:
        error_info = detailed_error_info(e)
        debug_log(f"❌ Error inspecting file: {error_info['type']}: {error_info['message']}", "ERROR")
        st.sidebar.error(f"File inspection error: {error_info['message']}")
        return None

def safe_read_csv_debug(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']):
    """Safely read CSV with extensive debugging"""
    debug_log("🔄 Starting CSV read process...")
    
    # Inspect file first
    file_info = inspect_uploaded_file(uploaded_file)
    if file_info is None:
        return None
    
    # Try different reading methods
    methods = [
        ("Direct pandas read", lambda f: pd.read_csv(f)),
        ("BytesIO method", lambda f: pd.read_csv(io.BytesIO(f.read()))),
        ("StringIO method", lambda f: pd.read_csv(io.StringIO(f.read().decode('utf-8')))),
    ]
    
    for method_name, method_func in methods:
        debug_log(f"🔄 Trying method: {method_name}")
        
        for encoding in encoding_options:
            try:
                debug_log(f"  - Attempting encoding: {encoding}")
                uploaded_file.seek(0)
                
                if method_name == "Direct pandas read":
                    df = pd.read_csv(uploaded_file, encoding=encoding)
                elif method_name == "BytesIO method":
                    uploaded_file.seek(0)
                    content = uploaded_file.read()
                    df = pd.read_csv(io.BytesIO(content), encoding=encoding)
                elif method_name == "StringIO method":
                    uploaded_file.seek(0)
                    content = uploaded_file.read()
                    if isinstance(content, bytes):
                        content = content.decode(encoding)
                    df = pd.read_csv(io.StringIO(content))
                
                debug_log(f"✅ Success with {method_name} + {encoding}")
                debug_log(f"DataFrame shape: {df.shape}")
                debug_log(f"Columns: {list(df.columns)}")
                
                st.success(f"File loaded successfully using {method_name} with {encoding} encoding")
                return df
                
            except UnicodeDecodeError as e:
                debug_log(f"  - Unicode error with {encoding}: {str(e)}", "WARNING")
                continue
            except Exception as e:
                error_info = detailed_error_info(e)
                debug_log(f"  - Error with {method_name} + {encoding}: {error_info['type']}: {error_info['message']}", "ERROR")
                
                # Show detailed error for 403 or permission errors
                if "403" in str(e) or "permission" in str(e).lower():
                    st.error("🚨 PERMISSION ERROR DETECTED!")
                    st.error(f"Method: {method_name}, Encoding: {encoding}")
                    st.error(f"Error type: {error_info['type']}")
                    st.error(f"Error message: {error_info['message']}")
                    st.code(error_info['traceback'])
                
                continue
    
    debug_log("❌ All reading methods failed", "ERROR")
    st.error("All CSV reading methods failed. Check debug log for details.")
    return None

# Utility functions with debugging
def save_artifacts(obj, folder_name, file_name):
    """Save artifacts with debugging"""
    debug_log(f"💾 Saving {file_name} to {folder_name}")
    try:
        os.makedirs(folder_name, exist_ok=True)
        full_path = os.path.join(folder_name, file_name)
        
        with open(full_path, 'wb') as f:
            pickle.dump(obj, f)
        
        debug_log(f"✅ Successfully saved {file_name}")
        return True
        
    except Exception as e:
        error_info = detailed_error_info(e)
        debug_log(f"❌ Error saving {file_name}: {error_info['message']}", "ERROR")
        st.error(f"Save error: {error_info['message']}")
        return False

def load_artifacts(folder_name, file_name):
    """Load artifacts with debugging"""
    debug_log(f"📂 Loading {file_name} from {folder_name}")
    try:
        full_path = os.path.join(folder_name, file_name)
        
        if not os.path.exists(full_path):
            debug_log(f"❌ File not found: {full_path}", "ERROR")
            return None
            
        with open(full_path, 'rb') as f:
            obj = pickle.load(f)
        
        debug_log(f"✅ Successfully loaded {file_name}")
        return obj
        
    except Exception as e:
        error_info = detailed_error_info(e)
        debug_log(f"❌ Error loading {file_name}: {error_info['message']}", "ERROR")
        st.error(f"Load error: {error_info['message']}")
        return None

def load_model(model_name):
    """Load model with debugging"""
    debug_log(f"🤖 Loading model: {model_name}")
    return load_artifacts("models", model_name)

def predict_text(model_name, text, vectorizer_type="tfidf"):
    """Make prediction with debugging"""
    debug_log(f"🔮 Starting prediction with {model_name}")
    
    try:
        # Load components
        model = load_model(model_name)
        if model is None:
            return None, None
        
        vectorizer_file = f"{vectorizer_type}_vectorizer.pkl"
        vectorizer = load_artifacts("artifacts", vectorizer_file)
        if vectorizer is None:
            return None, None
        
        encoder = load_artifacts("artifacts", "encoder.pkl")
        if encoder is None:
            return None, None
        
        debug_log("🧹 Cleaning text...")
        text_cleaner = TextCleaner()
        clean_text = text_cleaner.clean_text(text)
        debug_log(f"Cleaned text preview: {clean_text[:50]}...")
        
        debug_log("🔢 Vectorizing text...")
        text_vector = vectorizer.transform([clean_text])
        debug_log(f"Vector shape: {text_vector.shape}")
        
        debug_log("🎯 Making prediction...")
        prediction = model.predict(text_vector)
        prediction_proba = None
        
        if hasattr(model, 'predict_proba'):
            try:
                prediction_proba = model.predict_proba(text_vector)[0]
                debug_log(f"Prediction probabilities: {prediction_proba}")
            except:
                debug_log("No prediction probabilities available", "WARNING")
        
        predicted_label = encoder.inverse_transform(prediction)[0]
        debug_log(f"✅ Prediction complete: {predicted_label}")
        
        return predicted_label, prediction_proba
        
    except Exception as e:
        error_info = detailed_error_info(e)
        debug_log(f"❌ Prediction error: {error_info['message']}", "ERROR")
        st.error(f"Prediction error: {error_info['message']}")
        if debug_mode:
            st.code(error_info['traceback'])
        return None, None

# Main App
st.title('🔍 Debug Text Classification App')
st.write('Debug version to identify and fix issues')

# Environment info
if debug_mode:
    st.sidebar.subheader("🖥️ Environment Info")
    st.sidebar.write(f"Python version: {sys.version}")
    st.sidebar.write(f"Streamlit version: {st.__version__}")
    st.sidebar.write(f"Pandas version: {pd.__version__}")
    st.sidebar.write(f"Current working directory: {os.getcwd()}")
    
    # Check directory permissions
    try:
        test_dir = "test_permissions"
        os.makedirs(test_dir, exist_ok=True)
        test_file = os.path.join(test_dir, "test.txt")
        with open(test_file, 'w') as f:
            f.write("test")
        os.remove(test_file)
        os.rmdir(test_dir)
        st.sidebar.success("✅ File system permissions OK")
    except Exception as e:
        st.sidebar.error(f"❌ File system permission issue: {e}")

# Sidebar navigation
section = st.sidebar.radio("Choose Section", ["File Upload Debug", "Data Analysis", "Train Model", "Predictions"])

# Session state initialization
if 'vectorizer_type' not in st.session_state:
    st.session_state.vectorizer_type = "tfidf"
if 'train_df' not in st.session_state:
    st.session_state.train_df = None

# File Upload Debug Section
if section == "File Upload Debug":
    st.subheader("🔍 File Upload Debugging")
    
    st.info("This section helps debug file upload issues. Upload your file and see detailed error information.")
    
    train_data = st.file_uploader("Upload training data (DEBUG MODE)", type=["csv"], key="debug_upload")
    
    if train_data is not None:
        st.write("### File Upload Detected!")
        
        # Show raw file info
        st.write("**Raw File Information:**")
        st.json({
            "name": train_data.name,
            "type": train_data.type if hasattr(train_data, 'type') else "Unknown",
            "size": train_data.size if hasattr(train_data, 'size') else "Unknown"
        })
        
        # Try to read the file
        st.write("### Attempting to Read File...")
        
        with st.spinner("Reading file with debug mode..."):
            df = safe_read_csv_debug(train_data)
        
        if df is not None:
            st.success("🎉 File successfully loaded!")
            st.write("**Data Preview:**")
            st.dataframe(df.head())
            st.write(f"**Shape:** {df.shape}")
            st.write(f"**Columns:** {list(df.columns)}")
            st.write(f"**Data Types:**")
            st.write(df.dtypes)
            
            # Store in session state
            st.session_state.train_df = df
            
        else:
            st.error("❌ Failed to load file. Check the debug log for details.")
            
            # Additional troubleshooting
            st.write("### 🔧 Troubleshooting Steps:")
            st.write("1. Check if your file is a valid CSV")
            st.write("2. Try saving your CSV with different encoding (UTF-8 recommended)")
            st.write("3. Check if file size is within limits")
            st.write("4. Ensure no special characters in filename")
            st.write("5. Try uploading from a different location")

# Other sections (simplified for debugging)
elif section == "Data Analysis":
    st.subheader("📊 Data Analysis")
    
    if st.session_state.train_df is not None:
        df = st.session_state.train_df
        st.write("Using loaded data from debug session:")
        st.dataframe(df.head())
        
        # Basic analysis without custom modules if they fail
        st.write(f"**Shape:** {df.shape}")
        st.write(f"**Columns:** {list(df.columns)}")
        st.write(f"**Missing values:**")
        st.write(df.isnull().sum())
        
    else:
        st.warning("No data loaded. Please use 'File Upload Debug' section first.")

elif section == "Train Model":
    st.subheader("🤖 Train Model")
    st.info("Use this section after successfully loading data in debug mode.")
    
    if st.session_state.train_df is not None:
        st.success("Data available for training!")
        # Add your training logic here
    else:
        st.warning("No data loaded. Please use 'File Upload Debug' section first.")

elif section == "Predictions":
    st.subheader("🔮 Predictions")
    st.info("Use this section after training a model.")
    
    # Check for trained models
    if os.path.exists("models"):
        models = [f for f in os.listdir("models") if f.endswith('.pkl')]
        if models:
            st.write(f"Available models: {models}")
        else:
            st.info("No trained models found.")
    else:
        st.info("Models directory not found.")

# Debug summary
if debug_mode:
    st.sidebar.markdown("---")
    st.sidebar.subheader("📋 Debug Summary")
    if st.session_state.train_df is not None:
        st.sidebar.success("✅ Data loaded successfully")
    else:
        st.sidebar.warning("⚠️ No data loaded")