import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pickle

# 1. Load the iris dataset
iris = load_iris()
X = iris.data  # features
y = iris.target  # labels

# 2. Convert to a DataFrame (optional, for illustration)
df = pd.DataFrame(X, columns=iris.feature_names)
df['target'] = y

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)

# 4. Train a simple Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# 5. Evaluate the model (optional, just to see performance)
accuracy = model.score(X_test, y_test)
print(f"Model accuracy: {accuracy:.2f}")

# 6. Save the trained model as a pickle file
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model has been trained and saved as model.pkl")