import os
import joblib
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

def train_model():
    print("Loading Iris dataset...")
    iris = load_iris()
    X = pd.DataFrame(iris.data, columns=iris.feature_names)
    y = iris.target

    print("Splitting dataset...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    print("Training Random Forest Classifier...")
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    print("Evaluating model...")
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"Model Accuracy: {accuracy:.4f}")

    # Ensure app directory exists to save the model
    # When deployed, the model will be read by the API in the app directory
    os.makedirs(os.path.join(os.path.dirname(__file__), '..', 'app', 'model_data'), exist_ok=True)
    model_path = os.path.join(os.path.dirname(__file__), '..', 'app', 'model_data', 'model.joblib')
    
    print(f"Saving model to {model_path}...")
    joblib.dump(model, model_path)
    print("Training complete and model saved.")

if __name__ == "__main__":
    train_model()