import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense
import matplotlib.pyplot as plt

# Load dataset
@st.cache_data
def load_data():
    return pd.read_csv("f2.csv")

df = load_data()
st.title("Fertilizer Prediction with ANN - Interactive Demo")

# Define target and features
target = 'Fertilizer'

cat_features = ['Soil_Type', 'Crop_Type']
num_features = ['Temparature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']

X = df.drop(columns=[target])
y = df[target]

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_features)
    ]
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

# Sidebar params
st.sidebar.header("Model Parameters")
epochs = st.sidebar.slider("Epochs", 5, 100, 30)
batch_size = st.sidebar.selectbox("Batch Size", [8, 16, 32, 64], index=1)

# Build ANN model
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(16, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(len(np.unique(y_train)), activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
if st.button("Train Model"):
    history = model.fit(X_train, y_train, validation_split=0.2, epochs=epochs, batch_size=batch_size, verbose=0)
    st.success("Model training complete.")

    st.subheader("Model Summary")

    # Capture model summary into a list
    model_summary = []
    model.summary(print_fn=lambda x: model_summary.append(x))

    st.write("Model Summary")
    st.code("\n".join(model_summary))

    # Extract table rows from the summary
    table_data = []
    start_parsing = False
    for line in model_summary:
        if 'Layer' in line and 'Output' in line and 'Param' in line:
            start_parsing = True
            continue
        if start_parsing and all(char == '=' for char in line.strip()):
            continue
        if start_parsing and ('Total params' in line or 'Trainable params' in line or 'Non-trainable params' in line):
            break
        if start_parsing:
            # Try splitting by '|' first
            parts = [p.strip() for p in line.split('|')]
            parts = [p for p in parts if p]
            if len(parts) == 3:
                table_data.append(parts)
                continue # Move to the next line if successful

            # If '|' split fails, try splitting by multiple spaces
            parts = [p.strip() for p in line.split('  ') if p.strip()]
            if len(parts) >= 3:
                table_data.append(parts[:3])

    # Display as a table if data is available
    if table_data:
        # Ensure the correct number of columns
        if table_data and len(table_data[0]) == 3:
            model_df = pd.DataFrame(table_data, columns=["Layer (type)", "Output Shape", "Param #"])
            st.table(model_df)
        else:
            st.warning("Could not create table: Inconsistent number of columns found.")
            st.write("Extracted Data (for debugging):")
            st.write(table_data)
    else:
        st.warning("")

    # Plot training metrics
    st.subheader("Training and Validation Metrics")
    fig, ax = plt.subplots(2, 1, figsize=(8, 6))
    ax[0].plot(history.history['loss'], label='Loss')
    ax[0].plot(history.history['val_loss'], label='Val Loss')
    ax[0].legend()
    ax[0].set_title("Loss vs Validation Loss")

    ax[1].plot(history.history['accuracy'], label='Accuracy')
    ax[1].plot(history.history['val_accuracy'], label='Val Accuracy')
    ax[1].legend()
    ax[1].set_title("Accuracy vs Validation Accuracy")

    st.pyplot(fig)

    # Show min val loss and best val accuracy
    st.write(f"**Minimum Validation Loss:** {min(history.history['val_loss']):.4f}")
    st.write(f"**Best Validation Accuracy:** {max(history.history['val_accuracy']):.4f}")

    # Save model and label encoder in session state for predictions
    st.session_state['model'] = model
    st.session_state['preprocessor'] = preprocessor
    st.session_state['label_encoder'] = le

# Only show prediction UI if model is trained and saved
if 'model' in st.session_state:

    st.subheader("🌾 Predict Fertilizer Type Based on Input Features")

    input_soil = st.selectbox("Select Soil Type", df["Soil_Type"].unique())
    input_crop = st.selectbox("Select Crop Type", df["Crop_Type"].unique())
    input_temp = st.slider("Temperature (°C)", float(df["Temparature"].min()), float(df["Temparature"].max()), float(df["Temparature"].mean()))
    input_humidity = st.slider("Humidity (%)", float(df["Humidity"].min()), float(df["Humidity"].max()), float(df["Humidity"].mean()))
    input_moisture = st.slider("Moisture (%)", float(df["Moisture"].min()), float(df["Moisture"].max()), float(df["Moisture"].mean()))
    input_nitrogen = st.slider("Nitrogen Level", float(df["Nitrogen"].min()), float(df["Nitrogen"].max()), float(df["Nitrogen"].mean()))
    input_potassium = st.slider("Potassium Level", float(df["Potassium"].min()), float(df["Potassium"].max()), float(df["Potassium"].mean()))
    input_phosphorous = st.slider("Phosphorous Level", float(df["Phosphorous"].min()), float(df["Phosphorous"].max()), float(df["Phosphorous"].mean()))

    # Prepare input data
    input_dict = {
        "Soil_Type": input_soil,
        "Crop_Type": input_crop,
        "Temparature": input_temp,
        "Humidity": input_humidity,
        "Moisture": input_moisture,
        "Nitrogen": input_nitrogen,
        "Potassium": input_potassium,
        "Phosphorous": input_phosphorous,
    }
    input_df = pd.DataFrame([input_dict])
    X_input = st.session_state['preprocessor'].transform(input_df)

    # Predict
    probs = st.session_state['model'].predict(X_input)[0]
    pred_class_idx = np.argmax(probs)
    pred_label = st.session_state['label_encoder'].inverse_transform([pred_class_idx])[0]

    st.success(f"🌟 Predicted Fertilizer: **{pred_label}**")

    # Plot confidence
    fig, ax = plt.subplots()
    fertilizer_classes = st.session_state['label_encoder'].classes_
    ax.bar(fertilizer_classes, probs)
    ax.set_ylabel("Prediction Probability")
    ax.set_title("Fertilizer Prediction Confidence")
    plt.xticks(rotation=45)
    st.pyplot(fig)