import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from xgboost import XGBClassifier from sklearn.metrics import accuracy_score, classification_report, confusion_matrix from sklearn.preprocessing import StandardScaler from imblearn.over_sampling import SMOTE import gradio as gr import matplotlib.pyplot as plt import seaborn as sns import io import zipfile import joblib from PIL import Image import warnings warnings.filterwarnings('ignore') # Function to load and preprocess data def load_and_preprocess_data(file): try: data = pd.read_csv(file.name) # Convert suits and ranks to numerical values suit_order = {'spades': 0, 'hearts': 1, 'clubs': 2, 'diamonds': 3} rank_order = {'ace': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '10': 9, 'jack': 10, 'queen': 11, 'king': 12} data['Dragon Suit Num'] = data['Dragon Suit'].map(suit_order) data['Dragon Rank Num'] = data['Dragon Rank'].map(rank_order) data['Tiger Suit Num'] = data['Tiger Suit'].map(suit_order) data['Tiger Rank Num'] = data['Tiger Rank'].map(rank_order) data['Lion Suit Num'] = data['Lion Suit'].map(suit_order) data['Lion Rank Num'] = data['Lion Rank'].map(rank_order) return data, None except Exception as e: return None, f"Error loading data: {str(e)}" # Feature engineering def create_features(data, n_games=3): features = [] for i in range(n_games, len(data)): game_features = [] for j in range(1, n_games + 1): game_features.extend([ data['Dragon Suit Num'].iloc[i - j], data['Dragon Rank Num'].iloc[i - j], data['Tiger Suit Num'].iloc[i - j], data['Tiger Rank Num'].iloc[i - j], data['Lion Suit Num'].iloc[i - j], data['Lion Rank Num'].iloc[i - j] ]) for j in range(1, n_games + 1): game_features.extend([ data['Dragon Suit Num'].iloc[i - j] * data['Dragon Rank Num'].iloc[i - j], data['Tiger Suit Num'].iloc[i - j] * data['Tiger Rank Num'].iloc[i - j], data['Lion Suit Num'].iloc[i - j] * data['Lion Rank Num'].iloc[i - j] ]) recent_games = data.iloc[i-n_games:i] suit_freq = recent_games[['Dragon Suit Num', 'Tiger Suit Num', 'Lion Suit Num']].values.flatten() rank_freq = recent_games[['Dragon Rank Num', 'Tiger Rank Num', 'Lion Rank Num']].values.flatten() game_features.extend([ np.mean(suit_freq), np.std(suit_freq), np.mean(rank_freq), np.std(rank_freq) ]) features.append(game_features) columns = ([f'{hand}_{attr}_t-{j}' for j in range(1, n_games + 1) for hand in ['Dragon', 'Tiger', 'Lion'] for attr in ['Suit', 'Rank']] + [f'{hand}_suit_rank_inter_t-{j}' for j in range(1, n_games + 1) for hand in ['Dragon', 'Tiger', 'Lion']] + ['suit_mean', 'suit_std', 'rank_mean', 'rank_std']) return pd.DataFrame(features, columns=columns) # Function to plot confusion matrix def plot_confusion_matrix(y_true, y_pred, title): cm = confusion_matrix(y_true, y_pred) plt.figure(figsize=(6, 4)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues') plt.title(title) plt.xlabel('Predicted') plt.ylabel('Actual') buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) img = Image.open(buf) plt.close() return img # Function to plot accuracy bar chart def plot_accuracy_chart(accuracies): plt.figure(figsize=(8, 5)) plt.bar(accuracies.keys(), accuracies.values(), color='skyblue') plt.title('Model Accuracy Comparison') plt.ylabel('Accuracy') plt.xticks(rotation=45) plt.ylim(0, 1) buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) img = Image.open(buf) plt.close() return img # Function to create a ZIP file of models def create_model_zip(models): zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: for model_name, model in models.items(): model_buffer = io.BytesIO() joblib.dump(model, model_buffer) model_buffer.seek(0) zip_file.writestr(f"{model_name}_model.pkl", model_buffer.getvalue()) zip_buffer.seek(0) return zip_buffer # Training function with progress tracking and model saving def train_model(file, n_estimators, learning_rate, max_depth, subsample, progress=gr.Progress()): progress(0, desc="Starting...") results = [] try: # Load and preprocess data progress(0.1, desc="Loading and preprocessing data...") data, error = load_and_preprocess_data(file) if error: return error, None, None, None # Create features progress(0.2, desc="Engineering features...") n_games = 3 features = create_features(data, n_games) targets = { 'dragon_suit': data['Dragon Suit Num'][n_games:], 'dragon_rank': data['Dragon Rank Num'][n_games:], 'tiger_suit': data['Tiger Suit Num'][n_games:], 'tiger_rank': data['Tiger Rank Num'][n_games:], 'lion_suit': data['Lion Suit Num'][n_games:], 'lion_rank': data['Lion Rank Num'][n_games:] } # Scale features progress(0.3, desc="Scaling features...") scaler = StandardScaler() features_scaled = scaler.fit_transform(features) features_scaled = pd.DataFrame(features_scaled, columns=features.columns) accuracies = {} confusion_matrices = [] trained_models = {} # Train models for i, (target_name, target) in enumerate(targets.items()): progress(0.4 + (i / len(targets)) * 0.4, desc=f"Training {target_name} model...") # Split data X_train, X_test, y_train, y_test = train_test_split( features_scaled, target, test_size=0.2, random_state=42 ) # Apply SMOTE smote = SMOTE(random_state=42) X_train_res, y_train_res = smote.fit_resample(X_train, y_train) # Train model model = XGBClassifier( random_state=42, eval_metric='mlogloss', n_estimators=int(n_estimators), learning_rate=float(learning_rate), max_depth=int(max_depth), subsample=float(subsample) ) model.fit( X_train_res, y_train_res, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False ) # Save model trained_models[target_name] = model # Evaluate y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) report = classification_report(y_test, y_pred, zero_division=0) accuracies[target_name] = accuracy results.append(f"**{target_name} Results**\n") results.append(f"Accuracy: {accuracy:.2f}\n") results.append(f"Classification Report:\n{report}\n") # Generate confusion matrix plot cm_plot = plot_confusion_matrix(y_test, y_pred, f"Confusion Matrix - {target_name}") confusion_matrices.append(cm_plot) progress(0.9, desc="Generating visualizations and model archive...") # Generate accuracy bar chart accuracy_plot = plot_accuracy_chart(accuracies) # Create ZIP file of models model_zip = create_model_zip(trained_models) progress(1.0, desc="Completed!") return "\n".join(results), accuracy_plot, confusion_matrices, model_zip except Exception as e: return f"Error during training: {str(e)}", None, None, None # Gradio interface with gr.Blocks() as demo: gr.Markdown("# Card Game Prediction Model Training") gr.Markdown("Upload the training dataset and configure hyperparameters to train the model. Track progress, view results, and download trained models.") file_input = gr.File(label="Upload TRAINING_CARD_DATA.csv") n_estimators = gr.Slider(50, 300, value=100, step=10, label="Number of Estimators") learning_rate = gr.Slider(0.01, 0.3, value=0.1, step=0.01, label="Learning Rate") max_depth = gr.Slider(3, 10, value=5, step=1, label="Max Depth") subsample = gr.Slider(0.5, 1.0, value=0.8, step=0.1, label="Subsample") train_button = gr.Button("Train Model") output_text = gr.Textbox(label="Training Results") accuracy_plot = gr.Image(label="Accuracy Comparison") confusion_plots = gr.Gallery(label="Confusion Matrices") model_download = gr.File(label="Download Trained Models (ZIP)") train_button.click( fn=train_model, inputs=[file_input, n_estimators, learning_rate, max_depth, subsample], outputs=[output_text, accuracy_plot, confusion_plots, model_download] ) demo.launch()