Spaces:

Bachstelze
/

github_sync

Sleeping

App Files Files Community

Bachstelze commited on 6 days ago

Commit

a639edc

1 Parent(s): 2a23fe1

add time bench and viz

Browse files

Files changed (18) hide show

A6/adaboost_classes.py +196 -0
A6/all_classification.py +5 -0
A6/benchmark_results/benchmark_20260310_090052.json +247 -0
A6/benchmark_results/single_benchmark_20260310_090011.json +0 -0
A6/benchmark_results/visualizations/accuracy_vs_inference_time.png +3 -0
A6/benchmark_results/visualizations/compare_benchmarks.py +503 -0
A6/benchmark_results/visualizations/inference_time_distribution.png +3 -0
A6/benchmark_results/visualizations/mean_inference_times.png +3 -0
A6/benchmark_results/visualizations/percentile_comparison.png +3 -0
A6/benchmark_results/visualizations/response_time_comparison.html +234 -0
A6/benchmark_results/visualizations/response_time_comparison.png +3 -0
A6/benchmark_results/visualizations/standard_deviation_comparison.png +3 -0
A6/benchmark_results/visualizations/summary_statistics.png +3 -0
A6/benchmark_timing.md +335 -0
A6/benchmark_timing.py +960 -0
A6/check_svm_model.py +28 -0
A6/test_classification_loading.py +380 -0
A6/time_specification.md +22 -0

A6/adaboost_classes.py ADDED Viewed

	@@ -0,0 +1,196 @@

+#!/usr/bin/env python3
+"""
+Helper module to import AdaBoost classes without running module-level code.
+This module re-exports the AdaBoostEnsemble and WeightedDecisionTree classes
+from classification_adaboost.py, but without triggering the module-level
+data loading and training code.
+"""
+import numpy as np
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.tree import DecisionTreeClassifier
+from typing import List
+class WeightedDecisionTree(DecisionTreeClassifier):
+    """
+    A wrapper around DecisionTreeClassifier that properly handles sample weights.
+    This tree is grown based on weighted training errors.
+    """
+    def __init__(self, max_depth: int = 5, min_samples_split: int = 2,
+                 min_samples_leaf: int = 1, random_state: int = 42):
+        super().__init__(
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            random_state=random_state
+        )
+    def fit(self, X, y, sample_weight=None):
+        """Fit the decision tree with optional sample weights."""
+        return super().fit(X, y, sample_weight=sample_weight)
+class AdaBoostEnsemble(BaseEstimator, ClassifierMixin):
+    """
+    AdaBoost ensemble of decision trees where each tree is grown based on
+    weighted training errors. Weights are updated based on the error of
+    previous trees.
+    The algorithm:
+    1. Initialize equal weights for all training samples
+    2. For each tree in the ensemble:
+       - Train a decision tree on weighted data
+       - Calculate weighted error rate
+       - Compute tree weight (alpha)
+       - Update sample weights (increase for misclassified, decrease for correct)
+       - Normalize weights
+    3. Make predictions using weighted voting
+    """
+    def __init__(
+        self,
+        n_estimators: int = 50,
+        max_depth: int = 5,
+        min_samples_split: int = 2,
+        min_samples_leaf: int = 1,
+        random_state: int = 42
+    ):
+        self.n_estimators = n_estimators
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.random_state = random_state
+        self.trees: List[WeightedDecisionTree] = []
+        self.tree_weights: List[float] = []
+        self.n_classes: int = 0
+        self.classes_: np.ndarray = None
+    def _initialize_weights(self, n_samples: int) -> np.ndarray:
+        """Initialize equal weights for all samples."""
+        return np.ones(n_samples) / n_samples
+    def _update_weights(
+        self,
+        weights: np.ndarray,
+        y_true: np.ndarray,
+        y_pred: np.ndarray,
+        alpha: float
+    ) -> np.ndarray:
+        """
+        Update sample weights based on prediction errors.
+        Increase weight for misclassified samples, decrease for correct.
+        """
+        # Misclassified samples get multiplied by exp(alpha)
+        # Correctly classified samples get multiplied by exp(-alpha)
+        misclassified = y_true != y_pred
+        updated_weights = weights * np.exp(alpha * misclassified.astype(float))
+        # Normalize weights
+        return updated_weights / updated_weights.sum()
+    def _compute_weighted_error(
+        self,
+        weights: np.ndarray,
+        y_true: np.ndarray,
+        y_pred: np.ndarray
+    ) -> float:
+        """Compute weighted error rate."""
+        misclassified = (y_true != y_pred).astype(float)
+        return np.sum(weights * misclassified) / np.sum(weights)
+    def _compute_alpha(self, error: float) -> float:
+        """
+        Compute the weight of the classifier.
+        Avoid division by zero and log(0).
+        """
+        if error <= 0:
+            return 10.0  # Very high weight for perfect classifier
+        if error >= 1:
+            return -10.0  # Very negative weight for completely wrong classifier
+        return 0.5 * np.log((1 - error) / error)
+    def fit(self, X: np.ndarray, y: np.ndarray) -> 'AdaBoostEnsemble':
+        """Fit the AdaBoost ensemble."""
+        n_samples, n_features = X.shape
+        self.classes_ = np.unique(y)
+        self.n_classes = len(self.classes_)
+        # Initialize sample weights
+        weights = self._initialize_weights(n_samples)
+        for i in range(self.n_estimators):
+            # Create and train decision tree with current weights
+            tree = WeightedDecisionTree(
+                max_depth=self.max_depth,
+                min_samples_split=self.min_samples_split,
+                min_samples_leaf=self.min_samples_leaf,
+                random_state=self.random_state + i
+            )
+            tree.fit(X, y, sample_weight=weights)
+            # Make predictions
+            y_pred = tree.predict(X)
+            # Calculate weighted error
+            error = self._compute_weighted_error(weights, y, y_pred)
+            # Compute tree weight (alpha)
+            alpha = self._compute_alpha(error)
+            # Update sample weights
+            weights = self._update_weights(weights, y, y_pred, alpha)
+            # Store tree and its weight
+            self.trees.append(tree)
+            self.tree_weights.append(alpha)
+            print(f"Tree {i+1}/{self.n_estimators}: Error={error:.4f}, Alpha={alpha:.4f}")
+        return self
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        """Predict using weighted voting."""
+        # Get predictions from all trees
+        all_predictions = np.array([tree.predict(X) for tree in self.trees])
+        # Get class labels
+        classes = self.classes_
+        # Compute weighted votes for each class
+        n_samples = X.shape[0]
+        weighted_votes = np.zeros((n_samples, len(classes)))
+        for tree_idx, tree in enumerate(self.trees):
+            alpha = self.tree_weights[tree_idx]
+            predictions = all_predictions[tree_idx]
+            for class_idx, class_label in enumerate(classes):
+                weighted_votes[:, class_idx] += alpha * (predictions == class_label)
+        # Return class with highest weighted vote
+        return classes[np.argmax(weighted_votes, axis=1)]
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        """Predict class probabilities using weighted voting."""
+        # Get predictions from all trees
+        all_predictions = np.array([tree.predict(X) for tree in self.trees])
+        # Get class labels
+        classes = self.classes_
+        # Compute weighted vote proportions for each class
+        n_samples = X.shape[0]
+        weighted_votes = np.zeros((n_samples, len(classes)))
+        total_weight = sum(abs(w) for w in self.tree_weights)
+        for tree_idx, tree in enumerate(self.trees):
+            alpha = self.tree_weights[tree_idx]
+            predictions = all_predictions[tree_idx]
+            for class_idx, class_label in enumerate(classes):
+                weighted_votes[:, class_idx] += abs(alpha) * (predictions == class_label)
+        # Normalize to get probabilities
+        return weighted_votes / total_weight

A6/all_classification.py ADDED Viewed

	@@ -0,0 +1,5 @@

+a4_rf = "../A4/models/weaklink_classifier_rf.pkl"
+a5_ensemnble = "../A5/models/ensemble_classification_champion.pkl"
+a5b_adaboost = "../A5b/models/adaboost_classification.pkl"
+a5b_bagging_tree = "../A5b/models/bagging_trees_champion.pkl"
+a6_svm = "models/champion_svm.pkl"

A6/benchmark_results/benchmark_20260310_090052.json ADDED Viewed

	@@ -0,0 +1,247 @@

+{
+  "timestamp": "2026-03-10T09:00:50.070144",
+  "num_samples": 100,
+  "num_repeats": 10,
+  "models": {
+    "A4 Random Forest": {
+      "model_name": "A4 Random Forest",
+      "model_path": "../A4/models/weaklink_classifier_rf.pkl",
+      "inference_time_mean": 0.06072263170499355,
+      "inference_time_std": 0.0030473875509894866,
+      "inference_time_min": 0.058138252003118396,
+      "inference_time_max": 0.06896431901259348,
+      "inference_time_p50": 0.060211887990590185,
+      "inference_time_p95": 0.06896431901259348,
+      "inference_time_p99": 0.06896431901259348,
+      "memory_usage_mean": 360134.1,
+      "memory_usage_std": 67634.63257081308,
+      "memory_usage_peak": 512177,
+      "accuracy": 0.89,
+      "predictions_correct": 89,
+      "predictions_total": 100,
+      "model_size_bytes": 16381898,
+      "num_features": 41,
+      "num_parameters": 0,
+      "model_type": "RandomForestClassifier",
+      "feature_extraction_time_mean": 0.0,
+      "timing_samples": [
+        0.060492476040963084,
+        0.05959970800904557,
+        0.05881448305444792,
+        0.058138252003118396,
+        0.06896431901259348,
+        0.060211887990590185,
+        0.05942972801858559,
+        0.061595859995577484,
+        0.0596357659669593,
+        0.06034383695805445
+      ],
+      "memory_samples": [
+        512177,
+        377303,
+        302127,
+        358391,
+        379313,
+        354423,
+        380515,
+        281588,
+        379268,
+        276236
+      ],
+      "status": "SUCCESS",
+      "error_message": ""
+    },
+    "A5 Ensemble": {
+      "model_name": "A5 Ensemble",
+      "model_path": "../A5/models/ensemble_classification_champion.pkl",
+      "inference_time_mean": 0.08792474841466173,
+      "inference_time_std": 0.019674506115526187,
+      "inference_time_min": 0.067903274029959,
+      "inference_time_max": 0.13867365900659934,
+      "inference_time_p50": 0.08352956402814016,
+      "inference_time_p95": 0.13867365900659934,
+      "inference_time_p99": 0.13867365900659934,
+      "memory_usage_mean": 404756.5,
+      "memory_usage_std": 288156.9877403828,
+      "memory_usage_peak": 1210671,
+      "accuracy": 0.67,
+      "predictions_correct": 67,
+      "predictions_total": 100,
+      "model_size_bytes": 26660056,
+      "num_features": 36,
+      "num_parameters": 0,
+      "model_type": "VotingClassifier",
+      "feature_extraction_time_mean": 0.0,
+      "timing_samples": [
+        0.13867365900659934,
+        0.08352956402814016,
+        0.067903274029959,
+        0.08235391502967104,
+        0.09512816503411159,
+        0.09174130897736177,
+        0.07728461700025946,
+        0.07468455104390159,
+        0.07801902701612562,
+        0.0899294029804878
+      ],
+      "memory_samples": [
+        1210671,
+        276078,
+        257244,
+        374860,
+        258411,
+        374702,
+        277252,
+        270064,
+        372458,
+        375825
+      ],
+      "status": "SUCCESS",
+      "error_message": ""
+    },
+    "A5b Adaboost": {
+      "model_name": "A5b Adaboost",
+      "model_path": "../A5b/models/adaboost_classification.pkl",
+      "inference_time_mean": 0.03466975499759428,
+      "inference_time_std": 0.006925241966045739,
+      "inference_time_min": 0.030500065011437982,
+      "inference_time_max": 0.048356816987507045,
+      "inference_time_p50": 0.032038366014603525,
+      "inference_time_p95": 0.048356816987507045,
+      "inference_time_p99": 0.048356816987507045,
+      "memory_usage_mean": 204768.4,
+      "memory_usage_std": 311.91138342662504,
+      "memory_usage_peak": 205656,
+      "accuracy": 0.52,
+      "predictions_correct": 52,
+      "predictions_total": 100,
+      "model_size_bytes": 725059,
+      "num_features": 0,
+      "num_parameters": 0,
+      "model_type": "AdaBoostEnsemble",
+      "feature_extraction_time_mean": 0.0,
+      "timing_samples": [
+        0.048356816987507045,
+        0.047088092018384486,
+        0.03258101601386443,
+        0.03238268301356584,
+        0.03146621095947921,
+        0.032038366014603525,
+        0.030500065011437982,
+        0.03090687998337671,
+        0.03052046400262043,
+        0.03085695597110316
+      ],
+      "memory_samples": [
+        205656,
+        204684,
+        204668,
+        204668,
+        204668,
+        204668,
+        204668,
+        204668,
+        204668,
+        204668
+      ],
+      "status": "SUCCESS",
+      "error_message": ""
+    },
+    "A5b Bagging Trees": {
+      "model_name": "A5b Bagging Trees",
+      "model_path": "../A5b/models/bagging_trees_champion.pkl",
+      "inference_time_mean": 0.006075771508039907,
+      "inference_time_std": 0.0017926972777932554,
+      "inference_time_min": 0.0038332950207404792,
+      "inference_time_max": 0.00979096203809604,
+      "inference_time_p50": 0.006550171005073935,
+      "inference_time_p95": 0.00979096203809604,
+      "inference_time_p99": 0.00979096203809604,
+      "memory_usage_mean": 59716.6,
+      "memory_usage_std": 68.09176814335848,
+      "memory_usage_peak": 59866,
+      "accuracy": 0.0,
+      "predictions_correct": 0,
+      "predictions_total": 100,
+      "model_size_bytes": 6506123,
+      "num_features": 36,
+      "num_parameters": 0,
+      "model_type": "LGBMClassifier",
+      "feature_extraction_time_mean": 0.0,
+      "timing_samples": [
+        0.006550171005073935,
+        0.0061910360236652195,
+        0.0068354670074768364,
+        0.006988314969930798,
+        0.004823405994102359,
+        0.006920185987837613,
+        0.00979096203809604,
+        0.0038514090119861066,
+        0.0038332950207404792,
+        0.00497346802148968
+      ],
+      "memory_samples": [
+        59866,
+        59746,
+        59746,
+        59746,
+        59746,
+        59700,
+        59654,
+        59654,
+        59654,
+        59654
+      ],
+      "status": "SUCCESS",
+      "error_message": ""
+    },
+    "A6 SVM": {
+      "model_name": "A6 SVM",
+      "model_path": "models/champion_svm.pkl",
+      "inference_time_mean": 0.009102203900692985,
+      "inference_time_std": 0.0003233410993925297,
+      "inference_time_min": 0.008689811977092177,
+      "inference_time_max": 0.009627135004848242,
+      "inference_time_p50": 0.009107397985644639,
+      "inference_time_p95": 0.009627135004848242,
+      "inference_time_p99": 0.009627135004848242,
+      "memory_usage_mean": 62088.6,
+      "memory_usage_std": 193.42021036535397,
+      "memory_usage_peak": 62631,
+      "accuracy": 0.83,
+      "predictions_correct": 83,
+      "predictions_total": 100,
+      "model_size_bytes": 700346,
+      "num_features": 36,
+      "num_parameters": 0,
+      "model_type": "Pipeline",
+      "feature_extraction_time_mean": 0.0,
+      "timing_samples": [
+        0.009627135004848242,
+        0.009057053015567362,
+        0.009107397985644639,
+        0.008771255961619318,
+        0.00915416597854346,
+        0.008994235016871244,
+        0.00961044302675873,
+        0.00879047199850902,
+        0.009220069041475654,
+        0.008689811977092177
+      ],
+      "memory_samples": [
+        62631,
+        62063,
+        62047,
+        62047,
+        61955,
+        62047,
+        62047,
+        62047,
+        62001,
+        62001
+      ],
+      "status": "SUCCESS",
+      "error_message": ""
+    }
+  }
+}

A6/benchmark_results/single_benchmark_20260310_090011.json ADDED Viewed

The diff for this file is too large to render. See raw diff

A6/benchmark_results/visualizations/accuracy_vs_inference_time.png ADDED Viewed

Git LFS Details

SHA256: d9645f6a0c93cdecb0ba32f3466ea37f4230fbd203a3dd9e6e255b4e2aedb449
Pointer size: 131 Bytes
Size of remote file: 198 kB

A6/benchmark_results/visualizations/compare_benchmarks.py ADDED Viewed

	@@ -0,0 +1,503 @@

+#!/usr/bin/env python3
+"""
+Script to compare response times (inference times) from two benchmark JSON files.
+Generates a visualization comparing the models from both benchmarks.
+"""
+import json
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+# File paths
+benchmark_path = Path(__file__).parent / "../benchmark_20260310_090052.json"
+single_benchmark_path = Path(__file__).parent / "../single_benchmark_20260310_090011.json"
+# Load benchmark data
+with open(benchmark_path, 'r') as f:
+    benchmark_data = json.load(f)
+with open(single_benchmark_path, 'r') as f:
+    single_benchmark_data = json.load(f)
+# Extract model data
+def extract_model_data(data_dict):
+    models = {}
+    for model_name, model_info in data_dict.get('models', {}).items():
+        models[model_name] = {
+            'mean': model_info.get('inference_time_mean', 0),
+            'std': model_info.get('inference_time_std', 0),
+            'min': model_info.get('inference_time_min', 0),
+            'max': model_info.get('inference_time_max', 0),
+            'p50': model_info.get('inference_time_p50', 0),
+            'p95': model_info.get('inference_time_p95', 0),
+            'p99': model_info.get('inference_time_p99', 0),
+            'accuracy': model_info.get('accuracy', 0),
+            'timing_samples': model_info.get('timing_samples', [])
+        }
+    return models
+benchmark_models = extract_model_data(benchmark_data)
+single_benchmark_models = extract_model_data(single_benchmark_data)
+# Get all model names (should be the same in both)
+all_model_names = sorted(benchmark_models.keys())
+# Create figure with subplots
+fig = plt.figure(figsize=(16, 10))
+# 1. Bar chart comparing mean inference times
+ax1 = fig.add_subplot(2, 3, 1)
+x = np.arange(len(all_model_names))
+width = 0.35
+benchmark_means = [benchmark_models[m]['mean'] * 1000 for m in all_model_names]  # Convert to ms
+single_means = [single_benchmark_models[m]['mean'] * 1000 for m in all_model_names]  # Convert to ms
+bars1 = ax1.bar(x - width/2, benchmark_means, width, label='Multi-benchmark (100 samples)', alpha=0.8)
+bars2 = ax1.bar(x + width/2, single_means, width, label='Single-benchmark (10 samples)', alpha=0.8)
+ax1.set_xlabel('Model')
+ax1.set_ylabel('Mean Inference Time (ms)')
+ax1.set_title('Comparison of Mean Inference Times')
+ax1.set_xticks(x)
+ax1.set_xticklabels(all_model_names, rotation=45, ha='right')
+ax1.legend()
+ax1.grid(axis='y', alpha=0.3)
+# Add value labels on bars
+for bar in bars1:
+    height = bar.get_height()
+    ax1.annotate(f'{height:.3f}',
+                 xy=(bar.get_x() + bar.get_width() / 2, height),
+                 xytext=(0, 3),
+                 textcoords="offset points",
+                 ha='center', va='bottom', fontsize=8)
+for bar in bars2:
+    height = bar.get_height()
+    ax1.annotate(f'{height:.3f}',
+                 xy=(bar.get_x() + bar.get_width() / 2, height),
+                 xytext=(0, 3),
+                 textcoords="offset points",
+                 ha='center', va='bottom', fontsize=8)
+# 2. Box plot comparing timing distributions
+ax2 = fig.add_subplot(2, 3, 2)
+# Prepare data for box plot
+all_data = []
+labels = []
+colors = []
+for i, model_name in enumerate(all_model_names):
+    benchmark_samples = benchmark_models[model_name]['timing_samples'][:10]  # Use first 10 for comparison
+    single_samples = single_benchmark_models[model_name]['timing_samples'][:10]  # Use first 10 for comparison
+    # Convert to ms
+    benchmark_ms = [s * 1000 for s in benchmark_samples]
+    single_ms = [s * 1000 for s in single_samples]
+    all_data.append(benchmark_ms)
+    all_data.append(single_ms)
+    labels.append(f'{model_name}\nMulti')
+    labels.append(f'{model_name}\nSingle')
+    colors.extend([f'C{i}', f'C{i}'])
+bp = ax2.boxplot(all_data, labels=labels, patch_artist=True, vert=True)
+for patch, color in zip(bp['boxes'], colors):
+    patch.set_facecolor(color)
+    patch.set_alpha(0.6)
+ax2.set_xlabel('Model (Benchmark Type)')
+ax2.set_ylabel('Inference Time (ms)')
+ax2.set_title('Distribution of Inference Times (Box Plot)')
+ax2.tick_params(axis='x', rotation=45)
+ax2.grid(axis='y', alpha=0.3)
+# 3. Comparison scatter plot with accuracy
+ax3 = fig.add_subplot(2, 3, 3)
+benchmark_accs = [benchmark_models[m]['accuracy'] * 100 for m in all_model_names]
+single_accs = [single_benchmark_models[m]['accuracy'] * 100 for m in all_model_names]
+benchmark_times = [benchmark_models[m]['mean'] * 1000 for m in all_model_names]
+single_times = [single_benchmark_models[m]['mean'] * 1000 for m in all_model_names]
+# Create scatter plot
+for i, model_name in enumerate(all_model_names):
+    ax3.scatter([benchmark_times[i]], [benchmark_accs[i]], marker='o', s=100,
+                label=f'{model_name} (Multi)', alpha=0.8, color=f'C{i}')
+    ax3.scatter([single_times[i]], [single_accs[i]], marker='s', s=100,
+                label=f'{model_name} (Single)', alpha=0.8, color=f'C{i}')
+ax3.set_xlabel('Mean Inference Time (ms)')
+ax3.set_ylabel('Accuracy (%)')
+ax3.set_title('Accuracy vs Inference Time Comparison')
+ax3.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
+ax3.grid(True, alpha=0.3)
+# 4. Percentile comparison
+ax4 = fig.add_subplot(2, 3, 4)
+x = np.arange(len(all_model_names))
+width = 0.25
+benchmark_p50 = [benchmark_models[m]['p50'] * 1000 for m in all_model_names]
+benchmark_p95 = [benchmark_models[m]['p95'] * 1000 for m in all_model_names]
+benchmark_p99 = [benchmark_models[m]['p99'] * 1000 for m in all_model_names]
+single_p50 = [single_benchmark_models[m]['p50'] * 1000 for m in all_model_names]
+single_p95 = [single_benchmark_models[m]['p95'] * 1000 for m in all_model_names]
+single_p99 = [single_benchmark_models[m]['p99'] * 1000 for m in all_model_names]
+bars_p50 = ax4.bar(x - width, benchmark_p50, width, label='P50 (Multi)', alpha=0.8)
+bars_p95 = ax4.bar(x, benchmark_p95, width, label='P95 (Multi)', alpha=0.8)
+bars_p99 = ax4.bar(x + width, benchmark_p99, width, label='P99 (Multi)', alpha=0.8)
+# Single benchmark percentiles (offset)
+ax4.bar(x - width + 0.05, single_p50, width*0.8, label='P50 (Single)', alpha=0.6, hatch='//')
+ax4.bar(x + 0.05, single_p95, width*0.8, label='P95 (Single)', alpha=0.6, hatch='//')
+ax4.bar(x + width + 0.05, single_p99, width*0.8, label='P99 (Single)', alpha=0.6, hatch='//')
+ax4.set_xlabel('Model')
+ax4.set_ylabel('Inference Time (ms)')
+ax4.set_title('Percentile Comparison (P50, P95, P99)')
+ax4.set_xticks(x)
+ax4.set_xticklabels(all_model_names, rotation=45, ha='right')
+ax4.legend(fontsize='small')
+ax4.grid(axis='y', alpha=0.3)
+# 5. Standard deviation comparison
+ax5 = fig.add_subplot(2, 3, 5)
+benchmark_std = [benchmark_models[m]['std'] * 1000 for m in all_model_names]
+single_std = [single_benchmark_models[m]['std'] * 1000 for m in all_model_names]
+x = np.arange(len(all_model_names))
+width = 0.35
+bars_std1 = ax5.bar(x - width/2, benchmark_std, width, label='Multi-benchmark', alpha=0.8)
+bars_std2 = ax5.bar(x + width/2, single_std, width, label='Single-benchmark', alpha=0.8)
+ax5.set_xlabel('Model')
+ax5.set_ylabel('Standard Deviation (ms)')
+ax5.set_title('Standard Deviation of Inference Times')
+ax5.set_xticks(x)
+ax5.set_xticklabels(all_model_names, rotation=45, ha='right')
+ax5.legend()
+ax5.grid(axis='y', alpha=0.3)
+# Add value labels
+for bar in bars_std1:
+    height = bar.get_height()
+    ax5.annotate(f'{height:.4f}',
+                 xy=(bar.get_x() + bar.get_width() / 2, height),
+                 xytext=(0, 3),
+                 textcoords="offset points",
+                 ha='center', va='bottom', fontsize=7)
+for bar in bars_std2:
+    height = bar.get_height()
+    ax5.annotate(f'{height:.4f}',
+                 xy=(bar.get_x() + bar.get_width() / 2, height),
+                 xytext=(0, 3),
+                 textcoords="offset points",
+                 ha='center', va='bottom', fontsize=7)
+# 6. Summary statistics table
+ax6 = fig.add_subplot(2, 3, 6)
+ax6.axis('off')
+# Create table data
+table_data = []
+for model_name in all_model_names:
+    row = [
+        model_name,
+        f"{benchmark_models[model_name]['mean']*1000:.3f} ± {benchmark_models[model_name]['std']*1000:.3f}",
+        f"{benchmark_models[model_name]['min']*1000:.3f}",
+        f"{benchmark_models[model_name]['max']*1000:.3f}",
+        f"{benchmark_models[model_name]['accuracy']*100:.1f}%",
+        f"{single_benchmark_models[model_name]['mean']*1000:.3f} ± {single_benchmark_models[model_name]['std']*1000:.3f}",
+        f"{single_benchmark_models[model_name]['min']*1000:.3f}",
+        f"{single_benchmark_models[model_name]['max']*1000:.3f}",
+        f"{single_benchmark_models[model_name]['accuracy']*100:.1f}%"
+    ]
+    table_data.append(row)
+columns = ['Model', 'Mean ± Std (ms)', 'Min (ms)', 'Max (ms)', 'Acc (%)',
+           'Mean ± Std (ms)', 'Min (ms)', 'Max (ms)', 'Acc (%)']
+row_labels = ['Multi', 'Single'] * len(all_model_names)
+# Create table
+table = ax6.table(cellText=table_data, colLabels=columns, cellLoc='center', loc='center')
+table.auto_set_font_size(False)
+table.set_fontsize(9)
+table.scale(1.1, 1.8)
+# Style the table
+for i in range(len(all_model_names)):
+    for j in range(len(columns)):
+        cell = table[(i+1, j)]
+        cell.set_height(0.4)
+        if j < 5:
+            cell.set_facecolor('#f0f0f0')  # Light gray for multi-benchmark columns
+        else:
+            cell.set_facecolor('#e0e0f0')  # Light blue for single-benchmark columns
+ax6.set_title('Summary Statistics Comparison', fontsize=12, pad=20)
+# Save each subplot as a separate PNG image
+output_dir = Path(__file__).parent
+# 1. Bar chart comparing mean inference times
+fig1, ax1_single = plt.subplots(figsize=(10, 6))
+x = np.arange(len(all_model_names))
+width = 0.35
+benchmark_means = [benchmark_models[m]['mean'] * 1000 for m in all_model_names]
+single_means = [single_benchmark_models[m]['mean'] * 1000 for m in all_model_names]
+bars1 = ax1_single.bar(x - width/2, benchmark_means, width, label='Multi-benchmark (100 samples)', alpha=0.8)
+bars2 = ax1_single.bar(x + width/2, single_means, width, label='Single-benchmark (10 samples)', alpha=0.8)
+ax1_single.set_xlabel('Model')
+ax1_single.set_ylabel('Mean Inference Time (ms)')
+ax1_single.set_title('Comparison of Mean Inference Times')
+ax1_single.set_xticks(x)
+ax1_single.set_xticklabels(all_model_names, rotation=45, ha='right')
+ax1_single.legend()
+ax1_single.grid(axis='y', alpha=0.3)
+for bar in bars1:
+    height = bar.get_height()
+    ax1_single.annotate(f'{height:.3f}', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)
+for bar in bars2:
+    height = bar.get_height()
+    ax1_single.annotate(f'{height:.3f}', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)
+plt.tight_layout()
+plt.savefig(output_dir / "mean_inference_times.png", dpi=300, bbox_inches='tight')
+plt.close(fig1)
+print(f"Saved: mean_inference_times.png")
+# 2. Box plot comparing timing distributions
+fig2, ax2_single = plt.subplots(figsize=(12, 6))
+all_data = []
+labels = []
+colors = []
+for i, model_name in enumerate(all_model_names):
+    benchmark_samples = benchmark_models[model_name]['timing_samples'][:10]
+    single_samples = single_benchmark_models[model_name]['timing_samples'][:10]
+    benchmark_ms = [s * 1000 for s in benchmark_samples]
+    single_ms = [s * 1000 for s in single_samples]
+    all_data.append(benchmark_ms)
+    all_data.append(single_ms)
+    labels.append(f'{model_name}\nMulti')
+    labels.append(f'{model_name}\nSingle')
+    colors.extend([f'C{i}', f'C{i}'])
+bp = ax2_single.boxplot(all_data, labels=labels, patch_artist=True, vert=True)
+for patch, color in zip(bp['boxes'], colors):
+    patch.set_facecolor(color)
+    patch.set_alpha(0.6)
+ax2_single.set_xlabel('Model (Benchmark Type)')
+ax2_single.set_ylabel('Inference Time (ms)')
+ax2_single.set_title('Distribution of Inference Times (Box Plot)')
+ax2_single.tick_params(axis='x', rotation=45)
+ax2_single.grid(axis='y', alpha=0.3)
+plt.tight_layout()
+plt.savefig(output_dir / "inference_time_distribution.png", dpi=300, bbox_inches='tight')
+plt.close(fig2)
+print(f"Saved: inference_time_distribution.png")
+# 3. Comparison scatter plot with accuracy
+fig3, ax3_single = plt.subplots(figsize=(10, 6))
+benchmark_accs = [benchmark_models[m]['accuracy'] * 100 for m in all_model_names]
+single_accs = [single_benchmark_models[m]['accuracy'] * 100 for m in all_model_names]
+benchmark_times = [benchmark_models[m]['mean'] * 1000 for m in all_model_names]
+single_times = [single_benchmark_models[m]['mean'] * 1000 for m in all_model_names]
+for i, model_name in enumerate(all_model_names):
+    ax3_single.scatter([benchmark_times[i]], [benchmark_accs[i]], marker='o', s=100, label=f'{model_name} (Multi)', alpha=0.8, color=f'C{i}')
+    ax3_single.scatter([single_times[i]], [single_accs[i]], marker='s', s=100, label=f'{model_name} (Single)', alpha=0.8, color=f'C{i}')
+ax3_single.set_xlabel('Mean Inference Time (ms)')
+ax3_single.set_ylabel('Accuracy (%)')
+ax3_single.set_title('Accuracy vs Inference Time Comparison')
+ax3_single.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
+ax3_single.grid(True, alpha=0.3)
+plt.tight_layout()
+plt.savefig(output_dir / "accuracy_vs_inference_time.png", dpi=300, bbox_inches='tight')
+plt.close(fig3)
+print(f"Saved: accuracy_vs_inference_time.png")
+# 4. Percentile comparison
+fig4, ax4_single = plt.subplots(figsize=(12, 6))
+x = np.arange(len(all_model_names))
+width = 0.25
+benchmark_p50 = [benchmark_models[m]['p50'] * 1000 for m in all_model_names]
+benchmark_p95 = [benchmark_models[m]['p95'] * 1000 for m in all_model_names]
+benchmark_p99 = [benchmark_models[m]['p99'] * 1000 for m in all_model_names]
+single_p50 = [single_benchmark_models[m]['p50'] * 1000 for m in all_model_names]
+single_p95 = [single_benchmark_models[m]['p95'] * 1000 for m in all_model_names]
+single_p99 = [single_benchmark_models[m]['p99'] * 1000 for m in all_model_names]
+bars_p50 = ax4_single.bar(x - width, benchmark_p50, width, label='P50 (Multi)', alpha=0.8)
+bars_p95 = ax4_single.bar(x, benchmark_p95, width, label='P95 (Multi)', alpha=0.8)
+bars_p99 = ax4_single.bar(x + width, benchmark_p99, width, label='P99 (Multi)', alpha=0.8)
+ax4_single.bar(x - width + 0.05, single_p50, width*0.8, label='P50 (Single)', alpha=0.6, hatch='//')
+ax4_single.bar(x + 0.05, single_p95, width*0.8, label='P95 (Single)', alpha=0.6, hatch='//')
+ax4_single.bar(x + width + 0.05, single_p99, width*0.8, label='P99 (Single)', alpha=0.6, hatch='//')
+ax4_single.set_xlabel('Model')
+ax4_single.set_ylabel('Inference Time (ms)')
+ax4_single.set_title('Percentile Comparison (P50, P95, P99)')
+ax4_single.set_xticks(x)
+ax4_single.set_xticklabels(all_model_names, rotation=45, ha='right')
+ax4_single.legend(fontsize='small')
+ax4_single.grid(axis='y', alpha=0.3)
+plt.tight_layout()
+plt.savefig(output_dir / "percentile_comparison.png", dpi=300, bbox_inches='tight')
+plt.close(fig4)
+print(f"Saved: percentile_comparison.png")
+# 5. Standard deviation comparison
+fig5, ax5_single = plt.subplots(figsize=(10, 6))
+benchmark_std = [benchmark_models[m]['std'] * 1000 for m in all_model_names]
+single_std = [single_benchmark_models[m]['std'] * 1000 for m in all_model_names]
+x = np.arange(len(all_model_names))
+width = 0.35
+bars_std1 = ax5_single.bar(x - width/2, benchmark_std, width, label='Multi-benchmark', alpha=0.8)
+bars_std2 = ax5_single.bar(x + width/2, single_std, width, label='Single-benchmark', alpha=0.8)
+ax5_single.set_xlabel('Model')
+ax5_single.set_ylabel('Standard Deviation (ms)')
+ax5_single.set_title('Standard Deviation of Inference Times')
+ax5_single.set_xticks(x)
+ax5_single.set_xticklabels(all_model_names, rotation=45, ha='right')
+ax5_single.legend()
+ax5_single.grid(axis='y', alpha=0.3)
+for bar in bars_std1:
+    height = bar.get_height()
+    ax5_single.annotate(f'{height:.4f}', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=7)
+for bar in bars_std2:
+    height = bar.get_height()
+    ax5_single.annotate(f'{height:.4f}', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=7)
+plt.tight_layout()
+plt.savefig(output_dir / "standard_deviation_comparison.png", dpi=300, bbox_inches='tight')
+plt.close(fig5)
+print(f"Saved: standard_deviation_comparison.png")
+# 6. Summary statistics table
+fig6, ax6_single = plt.subplots(figsize=(14, 6))
+ax6_single.axis('off')
+table_data = []
+for model_name in all_model_names:
+    row = [
+        model_name,
+        f"{benchmark_models[model_name]['mean']*1000:.3f} ± {benchmark_models[model_name]['std']*1000:.3f}",
+        f"{benchmark_models[model_name]['min']*1000:.3f}",
+        f"{benchmark_models[model_name]['max']*1000:.3f}",
+        f"{benchmark_models[model_name]['accuracy']*100:.1f}%",
+        f"{single_benchmark_models[model_name]['mean']*1000:.3f} ± {single_benchmark_models[model_name]['std']*1000:.3f}",
+        f"{single_benchmark_models[model_name]['min']*1000:.3f}",
+        f"{single_benchmark_models[model_name]['max']*1000:.3f}",
+        f"{single_benchmark_models[model_name]['accuracy']*100:.1f}%"
+    ]
+    table_data.append(row)
+columns = ['Model', 'Mean ± Std (ms)', 'Min (ms)', 'Max (ms)', 'Acc (%)',
+           'Mean ± Std (ms)', 'Min (ms)', 'Max (ms)', 'Acc (%)']
+table = ax6_single.table(cellText=table_data, colLabels=columns, cellLoc='center', loc='center')
+table.auto_set_font_size(False)
+table.set_fontsize(9)
+table.scale(1.1, 1.8)
+for i in range(len(all_model_names)):
+    for j in range(len(columns)):
+        cell = table[(i+1, j)]
+        cell.set_height(0.4)
+        if j < 5:
+            cell.set_facecolor('#f0f0f0')
+        else:
+            cell.set_facecolor('#e0e0f0')
+ax6_single.set_title('Summary Statistics Comparison', fontsize=12, pad=20)
+plt.tight_layout()
+plt.savefig(output_dir / "summary_statistics.png", dpi=300, bbox_inches='tight')
+plt.close(fig6)
+print(f"Saved: summary_statistics.png")
+print(f"\nAll individual visualizations saved to: {output_dir}")
+# Also save as interactive HTML
+html_output = Path(__file__).parent / "response_time_comparison.html"
+with open(html_output, 'w') as f:
+    f.write(f"""<!DOCTYPE html>
+<html>
+<head>
+    <title>Benchmark Response Time Comparison</title>
+    <style>
+        body {{ font-family: Arial, sans-serif; margin: 20px; }}
+        h1 {{ text-align: center; }}
+        .chart {{ max-width: 1200px; margin: 0 auto; }}
+        .model-section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }}
+        .model-title {{ font-weight: bold; font-size: 1.2em; margin-bottom: 10px; }}
+        table {{ width: 100%; border-collapse: collapse; }}
+        th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
+        th {{ background-color: #f4f4f4; }}
+    </style>
+</head>
+<body>
+    <h1>Benchmark Response Time Comparison</h1>
+    <p><strong>Multi-benchmark:</strong> {benchmark_data['num_samples']} samples, {benchmark_data['num_repeats']} repeats</p>
+    <p><strong>Single-benchmark:</strong> {single_benchmark_data['num_samples']} samples, {single_benchmark_data['num_repeats']} repeats</p>
+    <p><img src="response_time_comparison.png" alt="Comparison Chart" class="chart"></p>
+    <h2>Detailed Statistics</h2>
+""")
+    for model_name in all_model_names:
+        f.write(f"""
+    <div class="model-section">
+        <div class="model-title">{model_name}</div>
+        <table>
+            <tr>
+                <th>Metric</th>
+                <th>Multi-benchmark</th>
+                <th>Single-benchmark</th>
+                <th>Change</th>
+            </tr>
+            <tr>
+                <td>Mean (ms)</td>
+                <td>{benchmark_models[model_name]['mean']*1000:.4f}</td>
+                <td>{single_benchmark_models[model_name]['mean']*1000:.4f}</td>
+                <td>{((single_benchmark_models[model_name]['mean'] - benchmark_models[model_name]['mean']) / benchmark_models[model_name]['mean'] * 100):.1f}%</td>
+            </tr>
+            <tr>
+                <td>Std (ms)</td>
+                <td>{benchmark_models[model_name]['std']*1000:.4f}</td>
+                <td>{single_benchmark_models[model_name]['std']*1000:.4f}</td>
+                <td>{((single_benchmark_models[model_name]['std'] - benchmark_models[model_name]['std']) / benchmark_models[model_name]['std'] * 100):.1f}%</td>
+            </tr>
+            <tr>
+                <td>Min (ms)</td>
+                <td>{benchmark_models[model_name]['min']*1000:.4f}</td>
+                <td>{single_benchmark_models[model_name]['min']*1000:.4f}</td>
+                <td>{((single_benchmark_models[model_name]['min'] - benchmark_models[model_name]['min']) / benchmark_models[model_name]['min'] * 100):.1f}%</td>
+            </tr>
+            <tr>
+                <td>Max (ms)</td>
+                <td>{benchmark_models[model_name]['max']*1000:.4f}</td>
+                <td>{single_benchmark_models[model_name]['max']*1000:.4f}</td>
+                <td>{((single_benchmark_models[model_name]['max'] - benchmark_models[model_name]['max']) / benchmark_models[model_name]['max'] * 100):.1f}%</td>
+            </tr>
+            <tr>
+                <td>Accuracy</td>
+                <td>{benchmark_models[model_name]['accuracy']*100:.1f}%</td>
+                <td>{single_benchmark_models[model_name]['accuracy']*100:.1f}%</td>
+                <td>{(single_benchmark_models[model_name]['accuracy'] - benchmark_models[model_name]['accuracy']) * 100:.1f}pp</td>
+            </tr>
+        </table>
+    </div>
+""")
+    f.write("""
+</body>
+</html>""")
+print(f"HTML report saved to: {html_output}")
+# Print summary to console
+print("\n=== Summary ===")
+print(f"Multi-benchmark: {benchmark_data['num_samples']} samples, {benchmark_data['num_repeats']} repeats")
+print(f"Single-benchmark: {single_benchmark_data['num_samples']} samples, {single_benchmark_data['num_repeats']} repeats")
+print("\nModel Comparison:")
+print("-" * 80)
+for model_name in all_model_names:
+    b_mean = benchmark_models[model_name]['mean'] * 1000
+    s_mean = single_benchmark_models[model_name]['mean'] * 1000
+    change = ((s_mean - b_mean) / b_mean * 100)
+    print(f"{model_name:20s} | Multi: {b_mean:6.3f}ms | Single: {s_mean:6.3f}ms | Change: {change:+6.1f}%")

A6/benchmark_results/visualizations/inference_time_distribution.png ADDED Viewed

Git LFS Details

SHA256: b7198c081720eb9d8266ccd15f6209879e4819c1ed121e01daa1925bfc1dd1dd
Pointer size: 131 Bytes
Size of remote file: 206 kB

A6/benchmark_results/visualizations/mean_inference_times.png ADDED Viewed

Git LFS Details

SHA256: eade239b378cec9616bca5cecb80b61b16f9dcac3faf55b1aa6b634a5f19df8f
Pointer size: 131 Bytes
Size of remote file: 207 kB

A6/benchmark_results/visualizations/percentile_comparison.png ADDED Viewed

Git LFS Details

SHA256: 2f8e98332885b80b4d6c62a3ba4af9211fd1dff4881fbb144b6d91e7b8894db4
Pointer size: 131 Bytes
Size of remote file: 238 kB

A6/benchmark_results/visualizations/response_time_comparison.html ADDED Viewed

	@@ -0,0 +1,234 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Benchmark Response Time Comparison</title>
+    <style>
+        body { font-family: Arial, sans-serif; margin: 20px; }
+        h1 { text-align: center; }
+        .chart { max-width: 1200px; margin: 0 auto; }
+        .model-section { margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 5px; }
+        .model-title { font-weight: bold; font-size: 1.2em; margin-bottom: 10px; }
+        table { width: 100%; border-collapse: collapse; }
+        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
+        th { background-color: #f4f4f4; }
+    </style>
+</head>
+<body>
+    <h1>Benchmark Response Time Comparison</h1>
+    <p><strong>Multi-benchmark:</strong> 100 samples, 10 repeats</p>
+    <p><strong>Single-benchmark:</strong> 100 samples, 10 repeats</p>
+    <p><img src="response_time_comparison.png" alt="Comparison Chart" class="chart"></p>
+    <h2>Detailed Statistics</h2>
+    <div class="model-section">
+        <div class="model-title">A4 Random Forest</div>
+        <table>
+            <tr>
+                <th>Metric</th>
+                <th>Multi-benchmark</th>
+                <th>Single-benchmark</th>
+                <th>Change</th>
+            </tr>
+            <tr>
+                <td>Mean (ms)</td>
+                <td>60.7226</td>
+                <td>54.1178</td>
+                <td>-10.9%</td>
+            </tr>
+            <tr>
+                <td>Std (ms)</td>
+                <td>3.0474</td>
+                <td>8.3909</td>
+                <td>175.3%</td>
+            </tr>
+            <tr>
+                <td>Min (ms)</td>
+                <td>58.1383</td>
+                <td>41.5801</td>
+                <td>-28.5%</td>
+            </tr>
+            <tr>
+                <td>Max (ms)</td>
+                <td>68.9643</td>
+                <td>139.2800</td>
+                <td>102.0%</td>
+            </tr>
+            <tr>
+                <td>Accuracy</td>
+                <td>89.0%</td>
+                <td>89.0%</td>
+                <td>0.0pp</td>
+            </tr>
+        </table>
+    </div>
+    <div class="model-section">
+        <div class="model-title">A5 Ensemble</div>
+        <table>
+            <tr>
+                <th>Metric</th>
+                <th>Multi-benchmark</th>
+                <th>Single-benchmark</th>
+                <th>Change</th>
+            </tr>
+            <tr>
+                <td>Mean (ms)</td>
+                <td>87.9247</td>
+                <td>88.4395</td>
+                <td>0.6%</td>
+            </tr>
+            <tr>
+                <td>Std (ms)</td>
+                <td>19.6745</td>
+                <td>15.3584</td>
+                <td>-21.9%</td>
+            </tr>
+            <tr>
+                <td>Min (ms)</td>
+                <td>67.9033</td>
+                <td>60.6458</td>
+                <td>-10.7%</td>
+            </tr>
+            <tr>
+                <td>Max (ms)</td>
+                <td>138.6737</td>
+                <td>213.1680</td>
+                <td>53.7%</td>
+            </tr>
+            <tr>
+                <td>Accuracy</td>
+                <td>67.0%</td>
+                <td>67.0%</td>
+                <td>0.0pp</td>
+            </tr>
+        </table>
+    </div>
+    <div class="model-section">
+        <div class="model-title">A5b Adaboost</div>
+        <table>
+            <tr>
+                <th>Metric</th>
+                <th>Multi-benchmark</th>
+                <th>Single-benchmark</th>
+                <th>Change</th>
+            </tr>
+            <tr>
+                <td>Mean (ms)</td>
+                <td>34.6698</td>
+                <td>33.1184</td>
+                <td>-4.5%</td>
+            </tr>
+            <tr>
+                <td>Std (ms)</td>
+                <td>6.9252</td>
+                <td>3.6793</td>
+                <td>-46.9%</td>
+            </tr>
+            <tr>
+                <td>Min (ms)</td>
+                <td>30.5001</td>
+                <td>30.1910</td>
+                <td>-1.0%</td>
+            </tr>
+            <tr>
+                <td>Max (ms)</td>
+                <td>48.3568</td>
+                <td>67.5596</td>
+                <td>39.7%</td>
+            </tr>
+            <tr>
+                <td>Accuracy</td>
+                <td>52.0%</td>
+                <td>52.0%</td>
+                <td>0.0pp</td>
+            </tr>
+        </table>
+    </div>
+    <div class="model-section">
+        <div class="model-title">A5b Bagging Trees</div>
+        <table>
+            <tr>
+                <th>Metric</th>
+                <th>Multi-benchmark</th>
+                <th>Single-benchmark</th>
+                <th>Change</th>
+            </tr>
+            <tr>
+                <td>Mean (ms)</td>
+                <td>6.0758</td>
+                <td>3.0341</td>
+                <td>-50.1%</td>
+            </tr>
+            <tr>
+                <td>Std (ms)</td>
+                <td>1.7927</td>
+                <td>1.2043</td>
+                <td>-32.8%</td>
+            </tr>
+            <tr>
+                <td>Min (ms)</td>
+                <td>3.8333</td>
+                <td>2.4478</td>
+                <td>-36.1%</td>
+            </tr>
+            <tr>
+                <td>Max (ms)</td>
+                <td>9.7910</td>
+                <td>17.5220</td>
+                <td>79.0%</td>
+            </tr>
+            <tr>
+                <td>Accuracy</td>
+                <td>0.0%</td>
+                <td>0.0%</td>
+                <td>0.0pp</td>
+            </tr>
+        </table>
+    </div>
+    <div class="model-section">
+        <div class="model-title">A6 SVM</div>
+        <table>
+            <tr>
+                <th>Metric</th>
+                <th>Multi-benchmark</th>
+                <th>Single-benchmark</th>
+                <th>Change</th>
+            </tr>
+            <tr>
+                <td>Mean (ms)</td>
+                <td>9.1022</td>
+                <td>0.6455</td>
+                <td>-92.9%</td>
+            </tr>
+            <tr>
+                <td>Std (ms)</td>
+                <td>0.3233</td>
+                <td>0.0336</td>
+                <td>-89.6%</td>
+            </tr>
+            <tr>
+                <td>Min (ms)</td>
+                <td>8.6898</td>
+                <td>0.6043</td>
+                <td>-93.0%</td>
+            </tr>
+            <tr>
+                <td>Max (ms)</td>
+                <td>9.6271</td>
+                <td>1.1998</td>
+                <td>-87.5%</td>
+            </tr>
+            <tr>
+                <td>Accuracy</td>
+                <td>83.0%</td>
+                <td>83.0%</td>
+                <td>0.0pp</td>
+            </tr>
+        </table>
+    </div>
+</body>
+</html>

A6/benchmark_results/visualizations/response_time_comparison.png ADDED Viewed

Git LFS Details

SHA256: 0ec363043b93e8d327675bec5ae0946b321bd1cea2a28bcccad4ce98c63fcfe2
Pointer size: 131 Bytes
Size of remote file: 960 kB

A6/benchmark_results/visualizations/standard_deviation_comparison.png ADDED Viewed

Git LFS Details

SHA256: 1a9479a4d33aa87e5c7e8ae7aba6cdae32b3d794f2e8e456bd9a415923a325a8
Pointer size: 131 Bytes
Size of remote file: 200 kB

A6/benchmark_results/visualizations/summary_statistics.png ADDED Viewed

Git LFS Details

SHA256: 2e6904ca54eed3c36235aede9e33c43db26a03a495a2c766c0900ce1cf0acd99
Pointer size: 131 Bytes
Size of remote file: 212 kB

A6/benchmark_timing.md ADDED Viewed

	@@ -0,0 +1,335 @@

+# Standardized Timing Benchmarking Framework
+A comprehensive benchmarking framework for fair and consistent comparison of classification models (A4, A5, A5b, A6).
+## Features
+This framework provides standardized metrics for model comparison:
+- **Inference Time**: Mean, standard deviation, min, max, and percentiles (P50, P95, P99)
+- **Memory Usage**: Mean, standard deviation, and peak memory consumption
+- **Prediction Accuracy**: Correct predictions and accuracy percentage
+- **Model Characteristics**: Model size, number of features, model type
+- **Consistent Data Pipeline**: Uses the same data processing for all models
+## Installation
+No additional dependencies required. Uses existing project dependencies:
+- `numpy`
+- `pandas`
+- `scikit-learn`
+- `pickle` (standard library)
+## Usage
+### Basic Usage
+```bash
+python benchmark_timing.py
+```
+### Advanced Usage
+```bash
+# Specify number of samples and repeats
+python benchmark_timing.py --samples 200 --repeats 20
+# Save results to specific file
+python benchmark_timing.py --output results/my_benchmark.json
+# Print comparison table
+python benchmark_timing.py --compare
+# Print model recommendations
+python benchmark_timing.py --recommend
+# All options combined
+python benchmark_timing.py -n 150 -r 15 -o results/benchmark.json -c -R
+```
+### Command Line Arguments
+| Argument | Short | Description | Default |
+|----------|-------|-------------|---------|
+| `--samples` | `-n` | Number of test samples | 100 |
+| `--repeats` | `-r` | Number of repetitions per sample | 10 |
+| `--output` | `-o` | Output file path for JSON results | Auto-generated |
+| `--compare` | `-c` | Print comparison table | False |
+| `--recommend` | `-R` | Print model recommendations | False |
+## Output
+### Console Output
+The framework prints real-time progress and results:
+```
+======================================================================
+STANDARDIZED TIMING BENCHMARKING FRAMEWORK
+======================================================================
+Configuration:
+  Number of samples: 100
+  Number of repeats per sample: 10
+  Total predictions per model: 1000
+Loading data...
+  Movement features shape: (1000, 150)
+  Weak link scores shape: (1000, 20)
+  Merged dataset shape: (1000, 165)
+  Feature matrix shape: (1000, 160)
+  Number of features: 160
+  Number of classes: 14
+======================================================================
+Running Benchmarks
+======================================================================
+  Benchmarking A4 Random Forest...
+  A4 Random Forest Results:
+    Status: SUCCESS
+    Inference Time:
+      Mean: 1.234 ms
+      Std:  0.123 ms
+      P50:  1.200 ms
+      P95:  1.500 ms
+      P99:  1.800 ms
+    Memory Usage:
+      Mean: 256.5 KB
+      Peak: 512.0 KB
+    Accuracy: 78.5% (78/100)
+    Model Size: 1250.0 KB
+    Features: 160
+```
+### JSON Results
+Results are saved to JSON format with all metrics:
+```json
+{
+  "timestamp": "2024-01-15T10:30:45.123456",
+  "num_samples": 100,
+  "num_repeats": 10,
+  "models": {
+    "A4 Random Forest": {
+      "model_name": "A4 Random Forest",
+      "model_path": "../A4/models/weaklink_classifier_rf.pkl",
+      "inference_time_mean": 0.001234,
+      "inference_time_std": 0.000123,
+      "inference_time_min": 0.001000,
+      "inference_time_max": 0.001800,
+      "inference_time_p50": 0.001200,
+      "inference_time_p95": 0.001500,
+      "inference_time_p99": 0.001800,
+      "memory_usage_mean": 262656.0,
+      "memory_usage_std": 10240.0,
+      "memory_usage_peak": 524288.0,
+      "accuracy": 0.785,
+      "predictions_correct": 78,
+      "predictions_total": 100,
+      "model_size_bytes": 1280000,
+      "num_features": 160,
+      "num_parameters": 10,
+      "model_type": "RandomForestClassifier",
+      "timing_samples": [0.0012, 0.0013, ...],
+      "memory_samples": [262144, 266240, ...],
+      "status": "SUCCESS",
+      "error_message": ""
+    }
+  }
+}
+```
+## Model Comparison Table
+With `--compare` flag, prints a formatted comparison:
+```
+==========================================================================
+MODEL COMPARISON SUMMARY
+==========================================================================
+Model                Time (ms)       Std       P95       Acc (%)    Mem (KB)   Size (KB)
+--------------------------------------------------------------------------
+A5b Adaboost         0.850           0.050     1.100     75.2       128.5      512.0
+A5 Ensemble          1.100           0.080     1.350     79.8       256.3      768.0
+A4 Random Forest     1.234           0.123     1.500     78.5       256.5      1250.0
+A5b Bagging Trees    1.450           0.150     1.800     77.1       384.2      1024.0
+A6 SVM               2.100           0.200     2.500     81.2       512.0      2048.0
+==========================================================================
+```
+## Model Recommendations
+With `--recommend` flag, provides optimal model suggestions:
+```
+======================================================================
+MODEL RECOMMENDATIONS
+======================================================================
+Fastest Inference:
+  Model: A5b Adaboost
+  Inference Time: 0.850 ms
+Highest Accuracy:
+  Model: A6 SVM
+  Accuracy: 81.2%
+Lowest Memory Usage:
+  Model: A5b Adaboost
+  Memory Usage: 128.5 KB
+Best Balanced Performance:
+  Model: A5 Ensemble
+  Inference Time: 1.100 ms
+  Accuracy: 79.8%
+  Memory Usage: 256.3 KB
+```
+## Benchmarking Metrics Explained
+### Inference Time Metrics
+| Metric | Description |
+|--------|-------------|
+| **Mean** | Average inference time across all repetitions |
+| **Std** | Standard deviation (variability) |
+| **Min/Max** | Fastest and slowest inference times |
+| **P50** | Median (50th percentile) |
+| **P95** | 95th percentile (95% of predictions are faster) |
+| **P99** | 99th percentile (99% of predictions are faster) |
+### Memory Metrics
+| Metric | Description |
+|--------|-------------|
+| **Mean** | Average memory usage |
+| **Std** | Standard deviation of memory usage |
+| **Peak** | Maximum memory consumed |
+### Accuracy Metrics
+| Metric | Description |
+|--------|-------------|
+| **Accuracy** | Percentage of correct predictions |
+| **Predictions Correct/Total** | Raw counts |
+## Implementation Details
+### Data Pipeline
+All models use the same data loading and preprocessing pipeline:
+1. Load movement features and weaklink scores
+2. Create WeakestLink target column
+3. Merge datasets
+4. Extract features (excluding ID, WeakestLink, EstimatedScore)
+5. Train/test split (80/20, stratified, random_state=42)
+6. StandardScaler fitted on training data
+### Feature Handling
+- A4 Random Forest model was trained WITH duplicate NASM columns
+- Other models (A5, A5b, A6) were trained WITHOUT duplicate NASM columns
+- The framework automatically filters features based on each model's expectations
+### Memory Tracking
+Uses Python's `tracemalloc` module for accurate memory measurement:
+- Tracks memory before and after each prediction
+- Records both current and peak memory usage
+### Timing Precision
+Uses `time.perf_counter()` for high-resolution timing measurements.
+## Extending the Framework
+### Adding New Models
+1. Add model path to `all_classification.py`:
+```python
+a7_new_model = "../A7/models/new_model.pkl"
+```
+2. Import in `benchmark_timing.py`:
+```python
+from all_classification import (
+    a4_rf,
+    a5_ensemnble,
+    a5b_adaboost,
+    a5b_bagging_tree,
+    a6_svm,
+    a7_new_model,  # Add here
+)
+```
+3. Add to `models_to_benchmark` list in `run_benchmark()`:
+```python
+models_to_benchmark = [
+    ('A4 Random Forest', a4_rf),
+    ('A5 Ensemble', a5_ensemnble),
+    ('A5b Adaboost', a5b_adaboost),
+    ('A5b Bagging Trees', a5b_bagging_tree),
+    ('A6 SVM', a6_svm),
+    ('A7 New Model', a7_new_model),  # Add here
+]
+```
+### Adding New Metrics
+Extend the `ModelMetrics` dataclass:
+```python
+@dataclass
+class ModelMetrics:
+    # ... existing fields ...
+    new_metric: float = 0.0
+```
+Then calculate it in `benchmark_single_model()`.
+## Troubleshooting
+### Model Loading Errors
+If a model fails to load:
+1. Verify the model file exists at the specified path
+2. Check that all required dependencies are installed
+3. Ensure custom classes (like `AdaBoostEnsemble`) are importable
+### Inconsistent Results
+If results vary significantly between runs:
+1. Increase `--repeats` for better averaging
+2. Ensure no other processes are running during benchmarking
+3. Check system resource utilization
+### Memory Issues
+If running out of memory:
+1. Reduce `--samples` count
+2. Close other applications
+3. Check available system memory
+## License
+Part of the Data Intensive Systems project.
+```
+Now let me also create a simple comparison script for quick analysis:
+<tool_call>
+<function=edit_file>
+<parameter=display_description>
+Create simple comparison script for quick analysis
+</parameter>
+<parameter=path>
+/home/cyclonaut/Dokumente/Studium Växjö/data intensive project/repo_march/Data-intensive-systems/A6/compare_benchmarks.py
+</parameter>
+<parameter=mode>
+create
+</parameter>
+</function>
+</tool_call>

A6/benchmark_timing.py ADDED Viewed

	@@ -0,0 +1,960 @@

+#!/usr/bin/env python3
+"""
+Standardized Timing Benchmarking Framework for Classification Models
+This framework provides fair and consistent timing benchmarks for comparing
+classification models (A4, A5, A5b, A6) with metrics for:
+- Inference time (mean, std, min, max, percentiles)
+- Memory usage
+- Prediction accuracy
+- Model size
+- Feature extraction time
+Usage:
+    python benchmark_timing.py [--samples N] [--repeats M] [--output FILE]
+Author: Benchmark Framework v1.0
+"""
+import os
+import sys
+import pickle
+import time
+import tracemalloc
+import warnings
+import json
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from datetime import datetime
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass, field, asdict
+from collections import defaultdict
+import statistics
+# Suppress warnings for cleaner output
+warnings.filterwarnings('ignore')
+# Add project root to path
+project_root = os.path.abspath(os.path.dirname(__file__))
+sys.path.insert(0, project_root)
+# Import model paths
+from all_classification import (
+    a4_rf,
+    a5_ensemnble,
+    a5b_adaboost,
+    a5b_bagging_tree,
+    a6_svm
+)
+# Import custom classes for unpickling
+from adaboost_classes import (
+    AdaBoostEnsemble,
+    WeightedDecisionTree
+)
+# ============================================================================
+# Configuration
+# ============================================================================
+REPO_ROOT    = os.path.abspath(os.path.join(project_root, '..'))
+DATA_DIR     = os.path.join(REPO_ROOT, 'Datasets_all')
+OUTPUT_DIR   = os.path.join(project_root, 'benchmark_results')
+# Weaklink categories (14 classes)
+WEAKLINK_CATEGORIES = [
+    'ExcessiveForwardLean', 'ForwardHead', 'LeftArmFallForward',
+    'LeftAsymmetricalWeightShift', 'LeftHeelRises', 'LeftKneeMovesInward',
+    'LeftKneeMovesOutward', 'LeftShoulderElevation', 'RightArmFallForward',
+    'RightAsymmetricalWeightShift', 'RightHeelRises', 'RightKneeMovesInward',
+    'RightKneeMovesOutward', 'RightShoulderElevation'
+]
+# Duplicate NASM columns
+DUPLICATE_NASM_COLS = [
+    'No_1_NASM_Deviation',
+    'No_2_NASM_Deviation',
+    'No_3_NASM_Deviation',
+    'No_4_NASM_Deviation',
+    'No_5_NASM_Deviation',
+]
+EXCLUDE_COLS = ['ID', 'WeakestLink', 'EstimatedScore']
+EXPECTED_CLASSES = WEAKLINK_CATEGORIES.copy()
+# Benchmark parameters
+DEFAULT_NUM_SAMPLES = 100
+DEFAULT_NUM_REPEATES = 10
+DEFAULT_OUTPUT_FILE = None
+# ============================================================================
+# Data Classes for Results
+# ============================================================================
+@dataclass
+class ModelMetrics:
+    """Metrics for a single model benchmark."""
+    model_name: str
+    model_path: str
+    # Timing metrics (seconds)
+    inference_time_mean: float = 0.0
+    inference_time_std: float = 0.0
+    inference_time_min: float = 0.0
+    inference_time_max: float = 0.0
+    inference_time_p50: float = 0.0
+    inference_time_p95: float = 0.0
+    inference_time_p99: float = 0.0
+    # Memory metrics (bytes)
+    memory_usage_mean: float = 0.0
+    memory_usage_std: float = 0.0
+    memory_usage_peak: float = 0.0
+    # Prediction metrics
+    accuracy: float = 0.0
+    predictions_correct: int = 0
+    predictions_total: int = 0
+    # Model characteristics
+    model_size_bytes: int = 0
+    num_features: int = 0
+    num_parameters: int = 0
+    model_type: str = ""
+    # Feature extraction time (seconds)
+    feature_extraction_time_mean: float = 0.0
+    # Raw timing samples
+    timing_samples: List[float] = field(default_factory=list)
+    memory_samples: List[float] = field(default_factory=list)
+    # Status
+    status: str = "SUCCESS"
+    error_message: str = ""
+@dataclass
+class BenchmarkResults:
+    """Complete benchmark results for all models."""
+    timestamp: str
+    num_samples: int
+    num_repeats: int
+    models: Dict[str, ModelMetrics] = field(default_factory=dict)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            'timestamp': self.timestamp,
+            'num_samples': self.num_samples,
+            'num_repeats': self.num_repeats,
+            'models': {
+                name: {
+                    **asdict(metrics),
+                    'timing_samples': list(metrics.timing_samples),
+                    'memory_samples': list(metrics.memory_samples)
+                }
+                for name, metrics in self.models.items()
+            }
+        }
+    def to_json(self, filepath: Optional[str] = None) -> str:
+        """Export to JSON string or file."""
+        data = self.to_dict()
+        json_str = json.dumps(data, indent=2, default=str)
+        if filepath:
+            os.makedirs(os.path.dirname(filepath) or '.', exist_ok=True)
+            with open(filepath, 'w') as f:
+                f.write(json_str)
+        return json_str
+# ============================================================================
+# Data Loading Functions
+# ============================================================================
+def load_and_prepare_data() -> Dict[str, Any]:
+    """Load and prepare data following the same pipeline as classification_baseline.py.
+    Returns:
+        Dictionary containing:
+        - feature_columns: List of feature column names
+        - scaler: Fitted StandardScaler
+        - X_train, X_test: Feature matrices (unscaled)
+        - y_train, y_test: Target arrays
+        - merged_df: Merged dataframe
+    """
+    # Load datasets
+    movement_features_df = pd.read_csv(os.path.join(DATA_DIR, 'aimoscores.csv'))
+    weaklink_scores_df = pd.read_csv(os.path.join(DATA_DIR, 'scores_and_weaklink.csv'))
+    print(f'  Movement features shape: {movement_features_df.shape}')
+    print(f'  Weak link scores shape: {weaklink_scores_df.shape}')
+    # Create WeakestLink target column
+    weaklink_scores_df['WeakestLink'] = (
+        weaklink_scores_df[WEAKLINK_CATEGORIES].idxmax(axis=1)
+    )
+    # Merge datasets
+    target_df = weaklink_scores_df[['ID', 'WeakestLink']].copy()
+    merged_df = movement_features_df.merge(target_df, on='ID', how='inner')
+    print(f'  Merged dataset shape: {merged_df.shape}')
+    # Extract feature columns - include ALL columns except EXCLUDE_COLS
+    feature_columns = [c for c in merged_df.columns if c not in EXCLUDE_COLS]
+    X = merged_df[feature_columns].values
+    y = merged_df['WeakestLink'].values
+    print(f'  Feature matrix shape: {X.shape}')
+    print(f'  Number of features: {len(feature_columns)}')
+    print(f'  Number of classes: {len(np.unique(y))}')
+    # Create train/test split
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    # Fit scaler on training data
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    X_test_scaled = scaler.transform(X_test)
+    return {
+        'feature_columns': feature_columns,
+        'scaler': scaler,
+        'X_train': X_train,
+        'X_train_scaled': X_train_scaled,
+        'y_train': y_train,
+        'X_test': X_test,
+        'X_test_scaled': X_test_scaled,
+        'y_test': y_test,
+        'merged_df': merged_df,
+    }
+def create_samples_from_test_data(
+    data: Dict[str, Any],
+    num_samples: int
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Create samples from test data for benchmarking.
+    Args:
+        data: Dictionary from load_and_prepare_data()
+        num_samples: Number of samples to select
+    Returns:
+        Tuple of (sample_features, true_labels)
+    """
+    # Use test data for benchmarking
+    X_test = data['X_test']
+    y_test = data['y_test']
+    # Select first num_samples from test set
+    n_samples = min(num_samples, len(X_test))
+    sample_features = X_test[:n_samples]
+    true_labels = y_test[:n_samples]
+    return sample_features, true_labels
+# ============================================================================
+# Model Loading Functions
+# ============================================================================
+def load_model(model_path: str, model_name: str) -> Tuple[Any, Optional[Any], Optional[List[str]], Any]:
+    """Load a model from a pickle file.
+    Args:
+        model_path: Path to the pickle file
+        model_name: Name of the model for logging
+    Returns:
+        Tuple of (model, scaler, feature_columns, artifact)
+    """
+    full_path = os.path.join(project_root, model_path)
+    if not os.path.exists(full_path):
+        print(f"  ⚠️  Model file not found: {full_path}")
+        return None, None, None, None
+    try:
+        with open(full_path, 'rb') as f:
+            artifact = pickle.load(f)
+        # Extract model and scaler based on artifact structure
+        if isinstance(artifact, dict):
+            model = artifact.get('model')
+            scaler = artifact.get('scaler')
+            feature_columns = artifact.get('feature_columns')
+        else:
+            # A6 SVM is a Pipeline object
+            model = artifact
+            scaler = None
+            feature_columns = None
+            # Extract scaler from pipeline if it exists
+            if hasattr(model, 'steps') and len(model.steps) >= 1:
+                for step_name, step_obj in model.steps:
+                    if hasattr(step_obj, 'transform'):
+                        if hasattr(step_obj, 'n_features_in_') and not hasattr(step_obj, 'predict'):
+                            scaler = step_obj
+                            break
+                # Extract feature columns from scaler
+                if hasattr(model, 'steps') and len(model.steps) > 0:
+                    first_step = model.steps[0][1]
+                    if hasattr(first_step, 'get_feature_names_out'):
+                        try:
+                            names = first_step.get_feature_names_out()
+                            import re
+                            if not all(re.fullmatch(r'x\d+', n) for n in names):
+                                feature_columns = names
+                        except:
+                            pass
+        print(f"  ✓ Loaded {model_name}")
+        return model, scaler, feature_columns, artifact
+    except Exception as e:
+        print(f"  ✗ Error loading {model_name}: {e}")
+        return None, None, None, None
+def get_model_info(model: Any) -> Dict[str, Any]:
+    """Extract model information for benchmarking.
+    Args:
+        model: The trained model
+    Returns:
+        Dictionary with model characteristics
+    """
+    info = {
+        'model_type': type(model).__name__,
+        'num_parameters': 0,
+        'num_features': 0
+    }
+    # Count parameters based on model type
+    if hasattr(model, 'n_estimators'):
+        info['num_parameters'] += getattr(model, 'n_estimators', 0)
+    if hasattr(model, 'estimators_'):
+        info['num_parameters'] += len(getattr(model, 'estimators_', []))
+    if hasattr(model, 'n_features_in_'):
+        info['num_features'] = model.n_features_in_
+    if hasattr(model, 'classes_'):
+        info['num_classes'] = len(model.classes_)
+    # For ensemble models
+    if hasattr(model, 'estimators_'):
+        for est in getattr(model, 'estimators_', []):
+            if hasattr(est, 'n_features_in_'):
+                info['num_features'] = est.n_features_in_
+                break
+    return info
+# ============================================================================
+# Benchmarking Functions
+# ============================================================================
+def measure_inference_time(
+    model: Any,
+    scaler: Optional[Any],
+    sample_features: np.ndarray,
+    model_feature_columns: Optional[List[str]],
+    feature_columns: List[str],
+    num_repeats: int,
+    single_sample_mode: bool = False
+) -> Tuple[List[float], List[float], Optional[str]]:
+    """Measure inference time for a model.
+    Args:
+        model: The trained model
+        scaler: Scaler for feature preprocessing
+        sample_features: Input features
+        model_feature_columns: Expected feature columns for the model
+        feature_columns: All available feature columns
+        num_repeats: Number of repetitions for averaging
+        single_sample_mode: If True, measure each sample individually (for single sample latency)
+    Returns:
+        Tuple of (timing_samples, memory_samples, error_message)
+    """
+    timing_samples = []
+    memory_samples = []
+    try:
+        # Filter features if needed
+        if model_feature_columns is not None:
+            available_features = [f for f in model_feature_columns if f in feature_columns]
+            if len(available_features) > 0:
+                # Convert column names to indices for numpy array
+                feature_indices = [feature_columns.index(f) for f in available_features]
+                test_features = sample_features[:, feature_indices]
+            else:
+                test_features = sample_features
+        else:
+            # model_feature_columns is None - likely A6 SVM pipeline
+            # Check if we need to drop duplicate NASM columns
+            if hasattr(model, 'steps') and len(model.steps) > 0:
+                first_step = model.steps[0][1]
+                n_expected = getattr(first_step, 'n_features_in_', None)
+                if n_expected is not None:
+                    # Identify indices of duplicate NASM columns
+                    dup_indices = [i for i, c in enumerate(feature_columns) if c in DUPLICATE_NASM_COLS]
+                    # Get all indices except duplicate NASM columns
+                    valid_indices = [i for i in range(len(feature_columns)) if i not in dup_indices]
+                    if len(valid_indices) == n_expected:
+                        # Select only the columns that match expected features
+                        test_features = sample_features[:, valid_indices]
+                    else:
+                        # Fallback: slice to expected number of features
+                        test_features = sample_features[:, :n_expected]
+                else:
+                    test_features = sample_features
+            else:
+                test_features = sample_features
+        # Handle A6 SVM pipeline (scaler already in pipeline)
+        if model_feature_columns is None and hasattr(model, 'steps'):
+            scaler_to_use = None
+        else:
+            scaler_to_use = scaler
+        # Determine how many predictions to make
+        if single_sample_mode:
+            # For single sample mode: repeat each sample individually
+            num_predictions = num_repeats * len(test_features)
+        else:
+            # For batch mode: num_repeats on all samples
+            num_predictions = num_repeats
+        for i in range(num_predictions):
+            # Start memory tracking
+            tracemalloc.start()
+            start_time = time.perf_counter()
+            # Make prediction
+            if single_sample_mode:
+                # Single sample prediction: use one row at a time
+                single_sample = test_features[i % len(test_features)].reshape(1, -1)
+                if scaler_to_use is not None:
+                    features = scaler_to_use.transform(single_sample)
+                else:
+                    features = single_sample
+            else:
+                # Batch prediction: use all samples
+                if scaler_to_use is not None:
+                    features = scaler_to_use.transform(test_features)
+                else:
+                    features = test_features
+            prediction = model.predict(features)
+            end_time = time.perf_counter()
+            current, peak = tracemalloc.get_traced_memory()
+            tracemalloc.stop()
+            # Record measurements
+            timing_samples.append(end_time - start_time)
+            memory_samples.append(peak)
+        return timing_samples, memory_samples, None
+    except Exception as e:
+        return [], [], str(e)
+def calculate_percentiles(values: List[float]) -> Dict[str, float]:
+    """Calculate percentiles for a list of values.
+    Args:
+        values: List of numeric values
+    Returns:
+        Dictionary with percentile values
+    """
+    if not values:
+        return {
+            'p50': 0.0,
+            'p95': 0.0,
+            'p99': 0.0
+        }
+    sorted_values = sorted(values)
+    n = len(sorted_values)
+    return {
+        'p50': sorted_values[int(n * 0.50)],
+        'p95': sorted_values[int(n * 0.95)],
+        'p99': sorted_values[int(n * 0.99)]
+    }
+def benchmark_single_model(
+    model_name: str,
+    model_path: str,
+    sample_features: np.ndarray,
+    true_labels: np.ndarray,
+    feature_columns: List[str],
+    num_repeats: int,
+    single_sample_mode: bool = False
+) -> ModelMetrics:
+    """Benchmark a single model.
+    Args:
+        model_name: Name of the model
+        model_path: Path to the model file
+        sample_features: Input features for benchmarking
+        true_labels: Ground truth labels
+        feature_columns: All available feature columns
+        num_repeats: Number of repetitions
+        single_sample_mode: If True, measure each sample individually (for single sample latency)
+    Returns:
+        ModelMetrics object with benchmark results
+    """
+    metrics = ModelMetrics(model_name=model_name, model_path=model_path)
+    print(f"\n  Benchmarking {model_name}...")
+    # Load model
+    model, scaler, model_feature_columns, artifact = load_model(model_path, model_name)
+    if model is None:
+        metrics.status = "LOAD_ERROR"
+        metrics.error_message = "Failed to load model"
+        return metrics
+    # Get model info
+    model_info = get_model_info(model)
+    metrics.model_type = model_info.get('model_type', type(model).__name__)
+    metrics.num_features = model_info.get('num_features', 0)
+    # Get model size
+    try:
+        model_size = os.path.getsize(os.path.join(project_root, model_path))
+        metrics.model_size_bytes = model_size
+    except:
+        metrics.model_size_bytes = 0
+    # Run inference benchmarks
+    timing_samples, memory_samples, error = measure_inference_time(
+        model, scaler, sample_features, model_feature_columns,
+        feature_columns, num_repeats, single_sample_mode=single_sample_mode
+    )
+    if error:
+        metrics.status = "INFERENCE_ERROR"
+        metrics.error_message = error
+        return metrics
+    # Store raw samples
+    metrics.timing_samples = timing_samples
+    metrics.memory_samples = memory_samples
+    # Calculate timing statistics
+    if timing_samples:
+        metrics.inference_time_mean = statistics.mean(timing_samples)
+        metrics.inference_time_std = statistics.stdev(timing_samples) if len(timing_samples) > 1 else 0.0
+        metrics.inference_time_min = min(timing_samples)
+        metrics.inference_time_max = max(timing_samples)
+        percentiles = calculate_percentiles(timing_samples)
+        metrics.inference_time_p50 = percentiles['p50']
+        metrics.inference_time_p95 = percentiles['p95']
+        metrics.inference_time_p99 = percentiles['p99']
+    # Calculate memory statistics
+    if memory_samples:
+        metrics.memory_usage_mean = statistics.mean(memory_samples)
+        metrics.memory_usage_std = statistics.stdev(memory_samples) if len(memory_samples) > 1 else 0.0
+        metrics.memory_usage_peak = max(memory_samples)
+    # Test accuracy on the same samples
+    try:
+        # Filter features for prediction
+        if model_feature_columns is not None:
+            available_features = [f for f in model_feature_columns if f in feature_columns]
+            if len(available_features) > 0:
+                # Convert column names to indices for numpy array
+                feature_indices = [feature_columns.index(f) for f in available_features]
+                test_features = sample_features[:, feature_indices]
+            else:
+                test_features = sample_features
+        else:
+            # model_feature_columns is None - likely A6 SVM pipeline
+            # Check if we need to drop duplicate NASM columns
+            if hasattr(model, 'steps') and len(model.steps) > 0:
+                first_step = model.steps[0][1]
+                n_expected = getattr(first_step, 'n_features_in_', None)
+                if n_expected is not None:
+                    # Identify indices of duplicate NASM columns
+                    dup_indices = [i for i, c in enumerate(feature_columns) if c in DUPLICATE_NASM_COLS]
+                    # Get all indices except duplicate NASM columns
+                    valid_indices = [i for i in range(len(feature_columns)) if i not in dup_indices]
+                    if len(valid_indices) == n_expected:
+                        # Select only the columns that match expected features
+                        test_features = sample_features[:, valid_indices]
+                    else:
+                        # Fallback: slice to expected number of features
+                        test_features = sample_features[:, :n_expected]
+                else:
+                    test_features = sample_features
+            else:
+                test_features = sample_features
+        # Handle A6 SVM pipeline
+        if model_feature_columns is None and hasattr(model, 'steps'):
+            scaler_to_use = None
+        else:
+            scaler_to_use = scaler
+        if scaler_to_use is not None:
+            features = scaler_to_use.transform(test_features)
+        else:
+            features = test_features
+        predictions = model.predict(features)
+        # Calculate accuracy
+        correct = np.sum(predictions == true_labels)
+        metrics.predictions_correct = int(correct)
+        metrics.predictions_total = len(true_labels)
+        metrics.accuracy = correct / len(true_labels)
+    except Exception as e:
+        print(f"    ⚠️  Accuracy calculation failed: {e}")
+    metrics.status = "SUCCESS"
+    return metrics
+def run_benchmark(
+    num_samples: int = DEFAULT_NUM_SAMPLES,
+    num_repeats: int = DEFAULT_NUM_REPEATES,
+    output_file: Optional[str] = None,
+    single_sample_mode: bool = False
+) -> BenchmarkResults:
+    """Run complete benchmark on all models.
+    Args:
+        num_samples: Number of samples to benchmark
+        num_repeats: Number of repetitions per sample
+        output_file: Optional output file path for results
+        single_sample_mode: If True, measure each sample individually (for single sample latency)
+    Returns:
+        BenchmarkResults object with all results
+    """
+    print("=" * 70)
+    print("STANDARDIZED TIMING BENCHMARKING FRAMEWORK")
+    print("=" * 70)
+    print(f"\nConfiguration:")
+    print(f"  Number of samples: {num_samples}")
+    print(f"  Number of repeats per sample: {num_repeats}")
+    print(f"  Total predictions per model: {num_samples * num_repeats}")
+    print()
+    # Load data
+    print("Loading data...")
+    data = load_and_prepare_data()
+    print()
+    # Create samples
+    sample_features, true_labels = create_samples_from_test_data(data, num_samples)
+    print(f"Created {num_samples} test samples for benchmarking")
+    print()
+    # Define models to benchmark
+    models_to_benchmark = [
+        ('A4 Random Forest', a4_rf),
+        ('A5 Ensemble', a5_ensemnble),
+        ('A5b Adaboost', a5b_adaboost),
+        ('A5b Bagging Trees', a5b_bagging_tree),
+        ('A6 SVM', a6_svm),
+    ]
+    # Initialize results
+    results = BenchmarkResults(
+        timestamp=datetime.now().isoformat(),
+        num_samples=num_samples,
+        num_repeats=num_repeats
+    )
+    # Benchmark each model
+    print("=" * 70)
+    print("Running Benchmarks")
+    print("=" * 70)
+    for model_name, model_path in models_to_benchmark:
+        metrics = benchmark_single_model(
+            model_name=model_name,
+            model_path=model_path,
+            sample_features=sample_features,
+            true_labels=true_labels,
+            feature_columns=data['feature_columns'],
+            num_repeats=num_repeats,
+            single_sample_mode=single_sample_mode
+        )
+        results.models[model_name] = metrics
+        # Print summary for this model
+        print(f"\n  {model_name} Results:")
+        print(f"    Status: {metrics.status}")
+        if metrics.status == "SUCCESS":
+            print(f"    Inference Time:")
+            print(f"      Mean: {metrics.inference_time_mean*1000:.3f} ms")
+            print(f"      Std:  {metrics.inference_time_std*1000:.3f} ms")
+            print(f"      P50:  {metrics.inference_time_p50*1000:.3f} ms")
+            print(f"      P95:  {metrics.inference_time_p95*1000:.3f} ms")
+            print(f"      P99:  {metrics.inference_time_p99*1000:.3f} ms")
+            print(f"    Memory Usage:")
+            print(f"      Mean: {metrics.memory_usage_mean/1024:.1f} KB")
+            print(f"      Peak: {metrics.memory_usage_peak/1024:.1f} KB")
+            print(f"    Accuracy: {metrics.accuracy*100:.1f}% ({metrics.predictions_correct}/{metrics.predictions_total})")
+            print(f"    Model Size: {metrics.model_size_bytes/1024:.1f} KB")
+            print(f"    Features: {metrics.num_features}")
+        else:
+            print(f"    Error: {metrics.error_message}")
+        print()
+    # Save results
+    if output_file is None:
+        output_file = os.path.join(OUTPUT_DIR, f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
+    json_output = results.to_json(output_file)
+    print(f"Results saved to: {output_file}")
+    return results
+def run_single_sample_benchmark(
+    num_samples: int = DEFAULT_NUM_SAMPLES,
+    num_repeats: int = DEFAULT_NUM_REPEATES,
+    output_file: Optional[str] = None
+) -> BenchmarkResults:
+    """Run benchmark with single sample prediction latency measurement.
+    This function measures the latency for individual predictions rather than
+    batch predictions, giving a more realistic view of single sample performance.
+    Args:
+        num_samples: Number of samples to benchmark
+        num_repeats: Number of repetitions per sample
+        output_file: Optional output file path for results
+    Returns:
+        BenchmarkResults object with all results
+    """
+    return run_benchmark(
+        num_samples=num_samples,
+        num_repeats=num_repeats,
+        output_file=output_file,
+        single_sample_mode=True
+    )
+# ============================================================================
+# Comparison and Analysis Functions
+# ============================================================================
+def print_comparison_table(results: BenchmarkResults):
+    """Print a formatted comparison table of all models."""
+    print("\n" + "=" * 90)
+    print("MODEL COMPARISON SUMMARY")
+    print("=" * 90)
+    # Header
+    print(f"{'Model':<20} {'Time (ms)':<15} {'Std':<10} {'P95':<10} {'Acc (%)':<10} {'Mem (KB)':<12} {'Size (KB)':<12}")
+    print("-" * 90)
+    # Sort by inference time for comparison
+    sorted_models = sorted(
+        results.models.items(),
+        key=lambda x: x[1].inference_time_mean if x[1].status == "SUCCESS" else float('inf')
+    )
+    for model_name, metrics in sorted_models:
+        if metrics.status == "SUCCESS":
+            time_ms = metrics.inference_time_mean * 1000
+            std_ms = metrics.inference_time_std * 1000
+            p95_ms = metrics.inference_time_p95 * 1000
+            acc = metrics.accuracy * 100
+            mem_kb = metrics.memory_usage_mean / 1024
+            size_kb = metrics.model_size_bytes / 1024
+            print(f"{model_name:<20} {time_ms:<15.3f} {std_ms:<10.3f} {p95_ms:<10.3f} {acc:<10.1f} {mem_kb:<12.1f} {size_kb:<12.1f}")
+        else:
+            print(f"{model_name:<20} {'ERROR':<15} {'-':<10} {'-':<10} {'-':<10} {'-':<12} {'-':<12}")
+    print("=" * 90)
+def find_optimal_model(results: BenchmarkResults, priority: str = "speed"):
+    """Find the optimal model based on specified criteria.
+    Args:
+        results: BenchmarkResults object
+        priority: Optimization priority ("speed", "accuracy", "memory", "balanced")
+    Returns:
+        Tuple of (best_model_name, best_metrics)
+    """
+    valid_models = {
+        name: metrics for name, metrics in results.models.items()
+        if metrics.status == "SUCCESS"
+    }
+    if not valid_models:
+        return None, None
+    if priority == "speed":
+        # Minimum inference time
+        best = min(valid_models.items(), key=lambda x: x[1].inference_time_mean)
+    elif priority == "accuracy":
+        # Maximum accuracy
+        best = max(valid_models.items(), key=lambda x: x[1].accuracy)
+    elif priority == "memory":
+        # Minimum memory usage
+        best = min(valid_models.items(), key=lambda x: x[1].memory_usage_mean)
+    elif priority == "balanced":
+        # Balanced score: weighted combination
+        def balanced_score(item):
+            metrics = item[1]
+            # Normalize and combine metrics
+            time_score = metrics.inference_time_mean
+            acc_score = 1 - metrics.accuracy
+            mem_score = metrics.memory_usage_mean / 1000000  # Scale down
+            # Weighted sum (weights can be adjusted)
+            return 0.5 * time_score + 0.3 * acc_score + 0.2 * mem_score
+        best = min(valid_models.items(), key=balanced_score)
+    else:
+        best = min(valid_models.items(), key=lambda x: x[1].inference_time_mean)
+    return best
+def print_recommendations(results: BenchmarkResults):
+    """Print model recommendations based on different criteria."""
+    print("\n" + "=" * 70)
+    print("MODEL RECOMMENDATIONS")
+    print("=" * 70)
+    criteria = [
+        ("Fastest Inference", "speed"),
+        ("Highest Accuracy", "accuracy"),
+        ("Lowest Memory Usage", "memory"),
+        ("Best Balanced Performance", "balanced"),
+    ]
+    for description, priority in criteria:
+        model_name, metrics = find_optimal_model(results, priority)
+        if model_name:
+            print(f"\n{description}:")
+            print(f"  Model: {model_name}")
+            if priority == "speed":
+                print(f"  Inference Time: {metrics.inference_time_mean*1000:.3f} ms")
+            elif priority == "accuracy":
+                print(f"  Accuracy: {metrics.accuracy*100:.1f}%")
+            elif priority == "memory":
+                print(f"  Memory Usage: {metrics.memory_usage_mean/1024:.1f} KB")
+            elif priority == "balanced":
+                print(f"  Inference Time: {metrics.inference_time_mean*1000:.3f} ms")
+                print(f"  Accuracy: {metrics.accuracy*100:.1f}%")
+                print(f"  Memory Usage: {metrics.memory_usage_mean/1024:.1f} KB")
+        else:
+            print(f"\n{description}:")
+            print("  No valid models found")
+# ============================================================================
+# Main Entry Point
+# ============================================================================
+def main():
+    """Main entry point for the benchmarking framework."""
+    import argparse
+    parser = argparse.ArgumentParser(
+        description='Standardized Timing Benchmarking Framework for Classification Models'
+    )
+    parser.add_argument(
+        '--samples', '-n',
+        type=int,
+        default=DEFAULT_NUM_SAMPLES,
+        help=f'Number of samples to benchmark (default: {DEFAULT_NUM_SAMPLES})'
+    )
+    parser.add_argument(
+        '--repeats', '-r',
+        type=int,
+        default=DEFAULT_NUM_REPEATES,
+        help=f'Number of repeats per sample (default: {DEFAULT_NUM_REPEATES})'
+    )
+    parser.add_argument(
+        '--output', '-o',
+        type=str,
+        default=DEFAULT_OUTPUT_FILE,
+        help='Output file for results (default: benchmark_results/timestamp.json)'
+    )
+    parser.add_argument(
+        '--compare', '-c',
+        action='store_true',
+        help='Print comparison table after benchmarking'
+    )
+    parser.add_argument(
+        '--recommend', '-R',
+        action='store_true',
+        help='Print model recommendations after benchmarking'
+    )
+    parser.add_argument(
+        '--single-sample', '-s',
+        action='store_true',
+        help='Measure single sample prediction latency (default: batch mode)'
+    )
+    args = parser.parse_args()
+    # Run benchmark
+    if args.single_sample:
+        results = run_single_sample_benchmark(
+            num_samples=args.samples,
+            num_repeats=args.repeats,
+            output_file=args.output
+        )
+    else:
+        results = run_benchmark(
+            num_samples=args.samples,
+            num_repeats=args.repeats,
+            output_file=args.output
+        )
+    # Print comparison table if requested
+    if args.compare:
+        print_comparison_table(results)
+    # Print recommendations if requested
+    if args.recommend:
+        print_recommendations(results)
+    # Return results for programmatic use
+    return results
+if __name__ == "__main__":
+    results = main()

A6/check_svm_model.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import pickle
+import os
+# Check A6 SVM model
+a6_path = './models/champion_svm.pkl'
+with open(a6_path, 'rb') as f:
+    artifact = pickle.load(f)
+    #print(artifact)
+    #print(artifact.get('feature_columns'))
+print('A6 SVM Model Structure:')
+print(f'  Type: {type(artifact)}')
+print(f'  Class name: {type(artifact).__name__}')
+if hasattr(artifact, 'steps'):
+    print(f'  Steps: {[step[0] for step in artifact.steps]}')
+    for step_name, step in artifact.steps:
+        print(f'    {step_name}: {type(step).__name__}')
+        if hasattr(step, 'feature_names_in_'):
+            print(f'      feature_names_in_: {step.feature_names_in_}')
+        if hasattr(step, 'get_feature_names_out'):
+            try:
+                fnames = step.get_feature_names_out()
+                print(f'      get_feature_names_out(): {fnames}')
+            except Exception as e:
+                print(f'      get_feature_names_out() error: {e}')
+if isinstance(artifact, dict):
+    print(f'  Keys: {artifact.keys()}')
+    if 'feature_columns' in artifact:
+        print(f'  feature_columns: {artifact["feature_columns"]}')

A6/test_classification_loading.py ADDED Viewed

	@@ -0,0 +1,380 @@

+#!/usr/bin/env python3
+"""
+Script to load and execute all classification models with one sample.
+Tests models from A4, A5, A5b, and A6.
+Data loading adapted from classification_baseline.py to use the same
+data processing pipeline for consistent feature extraction.
+NOTE: A4 Random Forest model was trained WITH the 5 duplicate NASM columns
+(No_1_NASM_Deviation through No_5_NASM_Deviation), while other models (A5, A5b, A6)
+were trained WITHOUT them. This script loads data WITH the duplicate columns
+to support the A4 model, and filters them out for other models as needed.
+"""
+import os
+import sys
+import pickle
+import warnings
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+# Suppress warnings for cleaner output
+warnings.filterwarnings('ignore')
+# Add project root to path
+project_root = os.path.abspath(os.path.dirname(__file__))
+sys.path.insert(0, project_root)
+# Import model paths from all_classification.py
+sys.path.insert(0, project_root)
+from all_classification import (
+    a4_rf,
+    a5_ensemnble,
+    a5b_adaboost,
+    a5b_bagging_tree,
+    a6_svm
+)
+# Import custom classes from A5b classification_adaboost.py
+# These are needed for unpickling the AdaBoost model
+#sys.path.insert(0, os.path.join(project_root, '..', 'A5b'))
+from adaboost_classes import (
+    AdaBoostEnsemble,
+    WeightedDecisionTree
+)
+# Data paths
+REPO_ROOT    = os.path.abspath(os.path.join(project_root, '..'))
+DATA_DIR     = os.path.join(REPO_ROOT, 'Datasets_all')
+# Weaklink categories (14 classes)
+WEAKLINK_CATEGORIES = [
+    'ExcessiveForwardLean', 'ForwardHead', 'LeftArmFallForward',
+    'LeftAsymmetricalWeightShift', 'LeftHeelRises', 'LeftKneeMovesInward',
+    'LeftKneeMovesOutward', 'LeftShoulderElevation', 'RightArmFallForward',
+    'RightAsymmetricalWeightShift', 'RightHeelRises', 'RightKneeMovesInward',
+    'RightKneeMovesOutward', 'RightShoulderElevation'
+]
+# Duplicate NASM columns to remove (as in classification_baseline.py)
+# NOTE: A4 Random Forest model was trained WITH these 5 duplicate columns,
+# so they must be kept in the data for A4 to work correctly
+DUPLICATE_NASM_COLS = [
+    'No_1_NASM_Deviation',
+    'No_2_NASM_Deviation',
+    'No_3_NASM_Deviation',
+    'No_4_NASM_Deviation',
+    'No_5_NASM_Deviation',
+]
+# Columns to exclude when extracting features
+EXCLUDE_COLS = ['ID', 'WeakestLink', 'EstimatedScore']
+# Expected classification classes (14 weaklink categories)
+EXPECTED_CLASSES = [
+    'ExcessiveForwardLean', 'ForwardHead', 'LeftArmFallForward',
+    'LeftAsymmetricalWeightShift', 'LeftHeelRises', 'LeftKneeMovesInward',
+    'LeftKneeMovesOutward', 'LeftShoulderElevation', 'RightArmFallForward',
+    'RightAsymmetricalWeightShift', 'RightHeelRises', 'RightKneeMovesInward',
+    'RightKneeMovesOutward', 'RightShoulderElevation'
+]
+def load_and_prepare_data():
+    """Load and prepare data following the same pipeline as classification_baseline.py.
+    NOTE: This function loads data WITH the 5 duplicate NASM columns because
+    the A4 Random Forest model was trained with those columns included.
+    Other models (A5, A5b, A6) will filter out these columns based on their feature_columns.
+    """
+    # Load datasets
+    movement_features_df = pd.read_csv(os.path.join(DATA_DIR, 'aimoscores.csv'))
+    weaklink_scores_df = pd.read_csv(os.path.join(DATA_DIR, 'scores_and_weaklink.csv'))
+    print('Movement features shape:', movement_features_df.shape)
+    print('Weak link scores shape:', weaklink_scores_df.shape)
+    # NOTE: We do NOT remove duplicate NASM columns here because
+    # the A4 Random Forest model was trained WITH these columns
+    # The other models (A5, A5b, A6) will filter them out based on their saved feature_columns
+    print('NOTE: Keeping duplicate NASM columns for A4 Random Forest model compatibility')
+    # Create WeakestLink target column
+    weaklink_scores_df['WeakestLink'] = (
+        weaklink_scores_df[WEAKLINK_CATEGORIES].idxmax(axis=1)
+    )
+    print('Weakest Link class distribution:')
+    print(weaklink_scores_df['WeakestLink'].value_counts())
+    # Merge datasets
+    target_df = weaklink_scores_df[['ID', 'WeakestLink']].copy()
+    merged_df = movement_features_df.merge(target_df, on='ID', how='inner')
+    print('Merged dataset shape:', merged_df.shape)
+    # Extract feature columns - include ALL columns except EXCLUDE_COLS
+    # This ensures the 5 duplicate NASM columns are included for A4
+    feature_columns = [c for c in merged_df.columns if c not in EXCLUDE_COLS]
+    X = merged_df[feature_columns].values
+    y = merged_df['WeakestLink'].values
+    print(f'Feature matrix shape : {X.shape}')
+    print(f'Number of features   : {len(feature_columns)}')
+    print(f'Number of classes    : {len(np.unique(y))}')
+    # Create train/test split (same as baseline)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    # Fit scaler on training data
+    scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)
+    X_test_scaled = scaler.transform(X_test)
+    return {
+        'feature_columns': feature_columns,
+        'scaler': scaler,
+        'X_train': X_train,
+        'X_train_scaled': X_train_scaled,
+        'y_train': y_train,
+        'X_test': X_test,
+        'X_test_scaled': X_test_scaled,
+        'y_test': y_test,
+        'merged_df': merged_df,
+    }
+def load_model(model_path, model_name):
+    """Load a model from a pickle file."""
+    full_path = os.path.join(project_root, model_path)
+    if not os.path.exists(full_path):
+        print(f"  ⚠️  Model file not found: {full_path}")
+        return None, None, None, None
+    try:
+        with open(full_path, 'rb') as f:
+            artifact = pickle.load(f)
+        # Extract model and scaler based on artifact structure
+        if isinstance(artifact, dict):
+            model = artifact.get('model')
+            scaler = artifact.get('scaler')
+            feature_columns = artifact.get('feature_columns')
+        else:
+            # A6 SVM is a Pipeline object
+            model = artifact
+            # Extract scaler from pipeline if it exists
+            if hasattr(model, 'steps') and len(model.steps) >= 1:
+                # Find the scaler in the pipeline
+                scaler = None
+                for step_name, step_obj in model.steps:
+                    if hasattr(step_obj, 'transform'):
+                        # Check if this is a scaler (has n_features_in_ attribute)
+                        if hasattr(step_obj, 'n_features_in_') and not hasattr(step_obj, 'predict'):
+                            scaler = step_obj
+                            break
+                # If no scaler found, try to get it from the first step
+                if scaler is None and len(model.steps) > 0:
+                    first_step = model.steps[0][1]
+                    if hasattr(first_step, 'transform') and hasattr(first_step, 'n_features_in_'):
+                        scaler = first_step
+            # For A6 SVM pipeline, extract feature columns from the scaler
+            feature_columns = None
+            if hasattr(model, 'steps') and len(model.steps) > 0:
+                # Get feature names from the first step (should be the scaler)
+                first_step = model.steps[0][1]
+                if hasattr(first_step, 'get_feature_names_out'):
+                    try:
+                        names = first_step.get_feature_names_out()
+                        # Only use feature names if they are real column names,
+                        # not generic placeholder names like x0, x1, ...
+                        import re
+                        if not all(re.fullmatch(r'x\d+', n) for n in names):
+                            feature_columns = names
+                        # else: leave feature_columns = None; handled below
+                    except:
+                        pass
+        print(f"  ✓ Loaded {model_name}")
+        #print(model, scaler, feature_columns, artifact)
+        return model, scaler, feature_columns, artifact
+    except Exception as e:
+        print(f"  ✗ Error loading {model_name}: {e}")
+        return None, None, None, None
+def predict_with_model(model, scaler, sample_features, model_name):
+    """Make a prediction using the model."""
+    try:
+        features = sample_features.copy()
+        # Apply scaler if available
+        if scaler is not None:
+            features = scaler.transform(features)
+        # Make prediction
+        prediction = model.predict(features)
+        prediction_proba = None
+        # Get prediction probabilities if available
+        if hasattr(model, 'predict_proba'):
+            prediction_proba = model.predict_proba(features)
+        return prediction, prediction_proba, None
+    except Exception as e:
+        return None, None, str(e)
+def create_sample_from_training_data(training_data, feature_columns, scaler):
+    """Create a sample from the training data for testing."""
+    # Get first sample from training data
+    sample = training_data['X_train'][0:1].copy()
+    sample_df = pd.DataFrame(sample, columns=feature_columns)
+    # Scale if scaler is available
+    if scaler is not None:
+        sample_df_scaled = scaler.transform(sample_df)
+        return sample_df, sample_df_scaled
+    return sample_df, sample_df
+def filter_features_for_model(sample_df, model_feature_columns):
+    """Filter sample data to only include features the model expects."""
+    available_features = [f for f in model_feature_columns if f in sample_df.columns]
+    if len(available_features) == 0:
+        print(f"  ⚠️  No matching features found, using all available")
+        available_features = sample_df.columns.tolist()
+    return sample_df[available_features]
+def main():
+    """Main function to test all models."""
+    print("=" * 60)
+    print("Testing All Classification Models with One Sample")
+    print("=" * 60)
+    print()
+    # Load and prepare data using the same pipeline as classification_baseline.py
+    # NOTE: Data is loaded WITH the 5 duplicate NASM columns for A4 compatibility
+    print("Loading data...")
+    data = load_and_prepare_data()
+    print()
+    # Create sample from training data
+    sample_features, sample_features_scaled = create_sample_from_training_data(
+        data, data['feature_columns'], data['scaler']
+    )
+    print(f"Sample data shape: {sample_features.shape}")
+    print(f"Number of features (including duplicates): {len(data['feature_columns'])}")
+    print()
+    # Define models to test
+    models_to_test = [
+        ('A4 Random Forest', a4_rf),
+        ('A5 Ensemble', a5_ensemnble),
+        ('A5b Adaboost', a5b_adaboost),
+        ('A5b Bagging Trees', a5b_bagging_tree),
+        ('A6 SVM', a6_svm),
+    ]
+    results = []
+    for model_name, model_path in models_to_test:
+        print(f"Testing {model_name}...")
+        # Load model
+        model, scaler, model_feature_columns, artifact = load_model(model_path, model_name)
+        if model is None:
+            print(f"  Skipping {model_name} due to load error")
+            results.append((model_name, 'LOAD_ERROR', None, None, None))
+            print()
+            continue
+        # Determine feature columns to use
+        if model_feature_columns is not None:
+            # Filter sample data to only include features the model expects
+            test_features = filter_features_for_model(sample_features, model_feature_columns)
+            print(f"  Model expects {len(model_feature_columns)} features, using {len(test_features.columns)} available")
+        elif hasattr(model, 'steps'):
+            # Pipeline with generic/unknown feature names (e.g. A6 SVM trained without
+            # the 5 duplicate NASM columns). Drop those duplicate columns so the number
+            # of features matches what the pipeline's scaler expects.
+            first_step = model.steps[0][1]
+            n_expected = getattr(first_step, 'n_features_in_', None)
+            cols_without_dupes = [c for c in sample_features.columns if c not in DUPLICATE_NASM_COLS]
+            if n_expected is not None and len(cols_without_dupes) == n_expected:
+                test_features = sample_features[cols_without_dupes]
+                print(f"  Pipeline expects {n_expected} features — dropped duplicate NASM cols, using {len(test_features.columns)} features")
+            else:
+                # Fallback: just take the first n_expected columns
+                test_features = sample_features.iloc[:, :n_expected] if n_expected else sample_features
+                print(f"  Pipeline expects {n_expected} features, sliced sample to {len(test_features.columns)} features")
+        else:
+            test_features = sample_features
+            print(f"  Using all {len(sample_features.columns)} available features")
+        # Make prediction
+        # For A6 SVM pipeline, don't pass the scaler separately since it's already in the pipeline
+        # For other models, pass the scaler if available
+        if model_feature_columns is None and hasattr(model, 'steps'):
+            # This is likely the A6 SVM pipeline - don't apply scaler separately
+            scaler_to_use = None
+        else:
+            scaler_to_use = scaler
+        prediction, prediction_proba, error = predict_with_model(
+            model, scaler_to_use, test_features, model_name
+        )
+        if error:
+            print(f"  ✗ Prediction error: {error}")
+            results.append((model_name, 'PREDICTION_ERROR', None, None, error))
+            print()
+            continue
+        # Display results
+        print(f"  ✓ Prediction: {prediction[0]}")
+        if prediction_proba is not None:
+            print(f"  ✓ Prediction probabilities shape: {prediction_proba.shape}")
+            top_classes_idx = np.argsort(prediction_proba[0])[-3:][::-1]
+            top_classes = [EXPECTED_CLASSES[i] for i in top_classes_idx]
+            top_probs = [prediction_proba[0][i] for i in top_classes_idx]
+            print(f"  ✓ Top 3 classes: {list(zip(top_classes, [f'{p:.3f}' for p in top_probs]))}")
+        print(f"  ✓ Model type: {type(model).__name__}")
+        # Check if model has classes attribute
+        if hasattr(model, 'classes_'):
+            print(f"  ✓ Model classes: {list(model.classes_)}")
+        results.append((model_name, 'SUCCESS', prediction, prediction_proba, None))
+        print()
+    # Summary
+    print("=" * 60)
+    print("Summary")
+    print("=" * 60)
+    for model_name, status, prediction, proba, error in results:
+        if status == 'SUCCESS':
+            pred_str = prediction[0] if prediction is not None else 'N/A'
+            print(f"  {model_name}: ✓ SUCCESS - Prediction: {pred_str}")
+        else:
+            print(f"  {model_name}: ✗ {status} - {error}")
+    print()
+    print("All models tested!")
+if __name__ == "__main__":
+    main()

A6/time_specification.md ADDED Viewed

	@@ -0,0 +1,22 @@

+Hardware Specifications
+| Component | Specification |
+|-----------|---------------|
+| **CPU** | AMD Ryzen 5 5600U with Radeon Graphics |
+| **CPU Cores/Threads** | 6 cores, 12 threads (2 threads per core) |
+| **CPU Frequency** | 400 MHz - 4289 MHz (max boost) |
+| **Architecture** | x86_64 |
+| **RAM** | 30 GiB (15 GiB available currently) |
+| **Swap** | 31 GiB |
+| **Integrated GPU** | AMD Radeon Vega Mobile Series (Cezanne) |
+| **Storage** | 469 GB NVMe SSD |
+| **Operating System** | Linux (Ubuntu-based, kernel 6.8.0-101-lowlatency) |
+---
+## Software Environment
+| Component | Version/Details |
+|-----------|-----------------|
+| **Python** | 3.12.3 |
+| **Key Packages** | numpy 2.4.2, scikit-learn 1.8.0, pandas 2.2.3 |