import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, precision_score

df_train = None
model = None
vectorizer = None
test_metrics = None

df_predict = None  # for batch prediction file
df_predict_results = None  # to store batch prediction results for export

def load_training_file(file):
    global df_train
    if file is None:
        return "❌ Please upload a file.", gr.update(choices=[], value=None), gr.update(choices=[], value=None)

    df_train = pd.read_excel(file.name)
    col_names = list(df_train.columns)

    return f"✅ Loaded training file with {len(df_train)} rows", gr.update(choices=col_names, value=col_names[0]), gr.update(choices=col_names, value=col_names[-1])

def interpret_score(score):
    # Simple interpretation based on accuracy score
    if score < 0.6:
        return "🔴 The model performance is LOW. Consider improving your data or features."
    elif score < 0.8:
        return "🟠 The model performance is MODERATE. It may work but could be improved."
    else:
        return "🟢 The model performance is STRONG. The model is reliable."

def train_model(text_column, target_column):
    global model, vectorizer, test_metrics, df_train

    if df_train is None:
        return "❌ No training data loaded."

    if text_column not in df_train.columns or target_column not in df_train.columns:
        return "❌ Invalid column selection."

    df_filtered = df_train.dropna(subset=[text_column, target_column])

    if len(df_filtered) < 10:
        return "❌ Not enough data after filtering for training. Need at least 10 samples."

    X_train, X_test, y_train, y_test = train_test_split(
        df_filtered[text_column], df_filtered[target_column], test_size=0.2, random_state=42
    )

    vectorizer = TfidfVectorizer()
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    model = LogisticRegression(max_iter=1000)
    model.fit(X_train_vec, y_train)

    y_pred = model.predict(X_test_vec)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    report = classification_report(y_test, y_pred, zero_division=0)

    performance_msg = interpret_score(accuracy)

    test_metrics = (
        f"Accuracy: {accuracy:.2%}\n"
        f"Precision (weighted): {precision:.2%}\n\n"
        f"{performance_msg}\n\n"
        f"Classification Report:\n{report}"
    )

    return f"✅ Model trained on {len(df_filtered)} examples.\n\nTest set evaluation:\n{test_metrics}"

def predict_label(text_input):
    if model is None or vectorizer is None:
        return "❌ Model is not trained yet."

    X = vectorizer.transform([text_input])
    prediction = model.predict(X)[0]
    proba = model.predict_proba(X).max()

    return f"🔮 Prediction: {prediction} (confidence: {proba:.2%})"

def load_prediction_file(file):
    global df_predict
    if file is None:
        return "❌ Please upload a prediction file.", gr.update(choices=[], value=None)
    df_predict = pd.read_excel(file.name)
    col_names = list(df_predict.columns)
    return f"✅ Loaded prediction file with {len(df_predict)} rows", gr.update(choices=col_names, value=col_names[0])

def run_batch_prediction(text_column):
    global df_predict, model, vectorizer, df_predict_results
    if model is None or vectorizer is None:
        return "❌ Model is not trained yet.", None
    if df_predict is None:
        return "❌ No prediction file loaded.", None
    if text_column not in df_predict.columns:
        return "❌ Invalid text column selected.", None

    df_filtered = df_predict.dropna(subset=[text_column]).copy()
    X = vectorizer.transform(df_filtered[text_column])
    preds = model.predict(X)
    probs = model.predict_proba(X).max(axis=1)

    df_filtered["Prediction"] = preds
    df_filtered["Confidence"] = probs

    df_predict_results = df_filtered  # save for export

    # Show preview of first 10 rows
    return f"✅ Batch prediction completed on {len(df_filtered)} rows.", df_filtered.head(10)

def export_predictions():
    global df_predict_results
    if df_predict_results is None:
        return None
    export_path = "/mnt/data/predictions_output.xlsx"  # Gradio environment allows writing here
    df_predict_results.to_excel(export_path, index=False)
    return export_path

with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Text Classification App")

    gr.Markdown(
        """
        ### How does this model work?
        This app uses a **Logistic Regression** model trained on your text data.
        - Text data is transformed into numbers using **TF-IDF vectorization**, which converts text into features based on word importance.
        - The model learns patterns from labeled examples you provide.
        - After training, it can predict the label/category of new text inputs.
        \n
        **Note:** Model performance depends heavily on quality and quantity of your data.
        """
    )

    gr.Markdown(
        "### Step 1: Upload your training data\n"
        "Upload an Excel file (`.xlsx`) containing your texts and corresponding labels."
    )

    with gr.Row():
        file_input = gr.File(label="Upload Training Excel File (.xlsx)", file_types=[".xlsx"],
                             interactive=True)
        load_button = gr.Button("📂 Load Training File")

    status_output = gr.Markdown()

    gr.Markdown(
        "After loading, select the text and target columns for training."
    )

    with gr.Row():
        text_column_dropdown = gr.Dropdown(label="Text column",
                                           interactive=True,
                                           info="Select the column that contains the text data.")
        target_column_dropdown = gr.Dropdown(label="Target column",
                                             interactive=True,
                                             info="Select the column that contains the labels to predict.")

    train_button = gr.Button("🚀 Train Model")
    training_status = gr.Markdown()

    gr.Markdown(
        "### Step 2: Predict on single texts\n"
        "Enter a text below to get the model's predicted label."
    )

    with gr.Row():
        input_text = gr.Textbox(label="Enter text to classify", placeholder="Type some text here...")
        predict_button = gr.Button("🔍 Predict Single")

    prediction_output = gr.Markdown()

    gr.Markdown(
        "### Step 3: Batch prediction\n"
        "Upload a new Excel file with texts to predict multiple labels at once."
    )

    with gr.Row():
        pred_file_input = gr.File(label="Upload Prediction Excel File (.xlsx)", file_types=[".xlsx"])
        load_pred_button = gr.Button("📂 Load Prediction File")

    pred_status = gr.Markdown()

    pred_text_column_dropdown = gr.Dropdown(label="Text column for Prediction",
                                            info="Select the column in your prediction file containing text to classify.")

    batch_pred_button = gr.Button("⚡ Run Batch Prediction")
    batch_pred_status = gr.Markdown()
    batch_pred_preview = gr.Dataframe(headers=None, interactive=False)

    export_button = gr.Button("⬇️ Export Predictions")
    gr.Markdown(
        "Click **Export Predictions** to download the batch prediction results as an Excel file."
    )

    # Button connections
    load_button.click(
        fn=load_training_file,
        inputs=file_input,
        outputs=[status_output, text_column_dropdown, target_column_dropdown]
    )

    train_button.click(
        fn=train_model,
        inputs=[text_column_dropdown, target_column_dropdown],
        outputs=training_status
    )

    predict_button.click(
        fn=predict_label,
        inputs=input_text,
        outputs=prediction_output
    )

    load_pred_button.click(
        fn=load_prediction_file,
        inputs=pred_file_input,
        outputs=[pred_status, pred_text_column_dropdown]
    )

    batch_pred_button.click(
        fn=run_batch_prediction,
        inputs=pred_text_column_dropdown,
        outputs=[batch_pred_status, batch_pred_preview]
    )

    export_button.click(
        fn=export_predictions,
        inputs=[],
        outputs=gr.File(file_types=[".xlsx"])
    )

if __name__ == "__main__":
    demo.launch()