Upload 3 files

Browse files

Files changed (3) hide show

monitoring/evaluator.py +765 -0
monitoring/metrics.py +609 -0
monitoring/profiler.py +579 -0

monitoring/evaluator.py ADDED Viewed

	@@ -0,0 +1,765 @@

+"""
+Model Evaluator for Mamba Swarm
+Comprehensive evaluation system for model performance and quality
+"""
+import time
+import json
+import logging
+import torch
+import numpy as np
+from typing import Dict, List, Any, Optional, Tuple, Callable, Union
+from dataclasses import dataclass, field
+from collections import defaultdict
+import math
+import re
+from datetime import datetime
+from pathlib import Path
+import asyncio
+import concurrent.futures
+# Evaluation metrics
+@dataclass
+class EvaluationResult:
+    metric_name: str
+    score: float
+    details: Dict[str, Any] = field(default_factory=dict)
+    timestamp: float = field(default_factory=time.time)
+@dataclass
+class BenchmarkResult:
+    benchmark_name: str
+    overall_score: float
+    individual_metrics: List[EvaluationResult]
+    execution_time: float
+    model_info: Dict[str, Any]
+    timestamp: float = field(default_factory=time.time)
+"""
+Model Evaluator for Mamba Swarm
+Comprehensive evaluation system for model performance and quality
+"""
+import time
+import json
+import logging
+import torch
+import numpy as np
+from typing import Dict, List, Any, Optional, Tuple, Callable, Union
+from dataclasses import dataclass, field
+from collections import defaultdict
+import math
+import re
+from datetime import datetime
+from pathlib import Path
+import asyncio
+import concurrent.futures
+# Evaluation metrics
+@dataclass
+class EvaluationResult:
+    metric_name: str
+    score: float
+    details: Dict[str, Any] = field(default_factory=dict)
+    timestamp: float = field(default_factory=time.time)
+@dataclass
+class BenchmarkResult:
+    benchmark_name: str
+    overall_score: float
+    individual_metrics: List[EvaluationResult]
+    execution_time: float
+    model_info: Dict[str, Any]
+    timestamp: float = field(default_factory=time.time)
+class PerplexityCalculator:
+    """Calculate perplexity for language models"""
+    def __init__(self, model, tokenizer):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.device = next(model.parameters()).device
+    def calculate_perplexity(self, text: str, max_length: int = 512) -> float:
+        """Calculate perplexity for given text"""
+        # Tokenize text
+        tokens = self.tokenizer.encode(text, return_tensors="pt", max_length=max_length, truncation=True)
+        tokens = tokens.to(self.device)
+        with torch.no_grad():
+            # Get model outputs
+            outputs = self.model(tokens)
+            logits = outputs.logits if hasattr(outputs, 'logits') else outputs
+            # Calculate cross-entropy loss
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = tokens[..., 1:].contiguous()
+            loss_fn = torch.nn.CrossEntropyLoss()
+            loss = loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+            # Convert to perplexity
+            perplexity = torch.exp(loss)
+        return perplexity.item()
+class BLEUScore:
+    """BLEU score calculator for text generation"""
+    def __init__(self, n_grams: int = 4):
+        self.n_grams = n_grams
+    def calculate_bleu(self, reference: str, candidate: str) -> float:
+        """Calculate BLEU score between reference and candidate"""
+        ref_tokens = self._tokenize(reference)
+        cand_tokens = self._tokenize(candidate)
+        if len(cand_tokens) == 0:
+            return 0.0
+        # Calculate n-gram precisions
+        precisions = []
+        for n in range(1, self.n_grams + 1):
+            precision = self._calculate_n_gram_precision(ref_tokens, cand_tokens, n)
+            precisions.append(precision)
+        # Brevity penalty
+        bp = self._brevity_penalty(len(ref_tokens), len(cand_tokens))
+        # Calculate BLEU score
+        if 0 in precisions:
+            return 0.0
+        log_precisions = [math.log(p) for p in precisions]
+        bleu = bp * math.exp(sum(log_precisions) / len(log_precisions))
+        return bleu
+    def _tokenize(self, text: str) -> List[str]:
+        """Simple tokenization"""
+        return text.lower().split()
+    def _calculate_n_gram_precision(self, ref_tokens: List[str], cand_tokens: List[str], n: int) -> float:
+        """Calculate n-gram precision"""
+        if len(cand_tokens) < n:
+            return 0.0
+        # Get n-grams
+        ref_ngrams = self._get_ngrams(ref_tokens, n)
+        cand_ngrams = self._get_ngrams(cand_tokens, n)
+        if len(cand_ngrams) == 0:
+            return 0.0
+        # Count matches
+        matches = 0
+        for ngram in cand_ngrams:
+            if ngram in ref_ngrams:
+                matches += min(cand_ngrams[ngram], ref_ngrams[ngram])
+        return matches / sum(cand_ngrams.values())
+    def _get_ngrams(self, tokens: List[str], n: int) -> Dict[Tuple[str, ...], int]:
+        """Get n-gram counts"""
+        ngrams = defaultdict(int)
+        for i in range(len(tokens) - n + 1):
+            ngram = tuple(tokens[i:i+n])
+            ngrams[ngram] += 1
+        return ngrams
+    def _brevity_penalty(self, ref_len: int, cand_len: int) -> float:
+        """Calculate brevity penalty"""
+        if cand_len > ref_len:
+            return 1.0
+        elif cand_len == 0:
+            return 0.0
+        else:
+            return math.exp(1 - ref_len / cand_len)
+class ROUGEScore:
+    """ROUGE score calculator"""
+    def __init__(self):
+        pass
+    def calculate_rouge_l(self, reference: str, candidate: str) -> float:
+        """Calculate ROUGE-L score"""
+        ref_tokens = reference.lower().split()
+        cand_tokens = candidate.lower().split()
+        if not ref_tokens or not cand_tokens:
+            return 0.0
+        # Calculate LCS
+        lcs_length = self._lcs_length(ref_tokens, cand_tokens)
+        if lcs_length == 0:
+            return 0.0
+        # Calculate precision and recall
+        precision = lcs_length / len(cand_tokens)
+        recall = lcs_length / len(ref_tokens)
+        # Calculate F1 score
+        if precision + recall == 0:
+            return 0.0
+        f1 = 2 * precision * recall / (precision + recall)
+        return f1
+    def _lcs_length(self, seq1: List[str], seq2: List[str]) -> int:
+        """Calculate length of longest common subsequence"""
+        m, n = len(seq1), len(seq2)
+        dp = [[0] * (n + 1) for _ in range(m + 1)]
+        for i in range(1, m + 1):
+            for j in range(1, n + 1):
+                if seq1[i-1] == seq2[j-1]:
+                    dp[i][j] = dp[i-1][j-1] + 1
+                else:
+                    dp[i][j] = max(dp[i-1][j], dp[i][j-1])
+        return dp[m][n]
+class CoherenceAnalyzer:
+    """Analyze text coherence and quality"""
+    def __init__(self):
+        pass
+    def analyze_coherence(self, text: str) -> Dict[str, float]:
+        """Analyze text coherence"""
+        sentences = self._split_sentences(text)
+        if len(sentences) < 2:
+            return {"coherence_score": 1.0, "repetition_score": 1.0, "diversity_score": 0.5}
+        # Calculate coherence metrics
+        coherence_score = self._calculate_coherence(sentences)
+        repetition_score = self._calculate_repetition(text)
+        diversity_score = self._calculate_diversity(text)
+        return {
+            "coherence_score": coherence_score,
+            "repetition_score": repetition_score,
+            "diversity_score": diversity_score
+        }
+    def _split_sentences(self, text: str) -> List[str]:
+        """Split text into sentences"""
+        # Simple sentence splitting
+        sentences = re.split(r'[.!?]+', text)
+        return [s.strip() for s in sentences if s.strip()]
+    def _calculate_coherence(self, sentences: List[str]) -> float:
+        """Calculate coherence score based on sentence similarity"""
+        if len(sentences) < 2:
+            return 1.0
+        similarities = []
+        for i in range(len(sentences) - 1):
+            sim = self._sentence_similarity(sentences[i], sentences[i+1])
+            similarities.append(sim)
+        return sum(similarities) / len(similarities)
+    def _sentence_similarity(self, sent1: str, sent2: str) -> float:
+        """Calculate similarity between two sentences"""
+        words1 = set(sent1.lower().split())
+        words2 = set(sent2.lower().split())
+        if not words1 or not words2:
+            return 0.0
+        intersection = words1.intersection(words2)
+        union = words1.union(words2)
+        return len(intersection) / len(union)
+    def _calculate_repetition(self, text: str) -> float:
+        """Calculate repetition score (lower is better)"""
+        words = text.lower().split()
+        if len(words) < 2:
+            return 1.0
+        unique_words = set(words)
+        repetition_ratio = len(words) / len(unique_words)
+        # Normalize to 0-1 scale (1 is best, no repetition)
+        return 1.0 / repetition_ratio
+    def _calculate_diversity(self, text: str) -> float:
+        """Calculate lexical diversity"""
+        words = text.lower().split()
+        if len(words) == 0:
+            return 0.0
+        unique_words = set(words)
+        return len(unique_words) / len(words)
+class LatencyBenchmark:
+    """Benchmark model latency and throughput"""
+    def __init__(self, model, tokenizer):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.device = next(model.parameters()).device
+    def benchmark_inference_speed(self, prompts: List[str], max_length: int = 100, num_runs: int = 5) -> Dict[str, float]:
+        """Benchmark inference speed"""
+        latencies = []
+        token_counts = []
+        for _ in range(num_runs):
+            for prompt in prompts:
+                start_time = time.time()
+                # Tokenize input
+                inputs = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device)
+                # Generate
+                with torch.no_grad():
+                    outputs = self.model.generate(
+                        inputs,
+                        max_length=max_length,
+                        do_sample=False,
+                        pad_token_id=self.tokenizer.eos_token_id
+                    )
+                end_time = time.time()
+                # Calculate metrics
+                latency = end_time - start_time
+                generated_tokens = outputs.shape[1] - inputs.shape[1]
+                latencies.append(latency)
+                token_counts.append(generated_tokens)
+        # Calculate statistics
+        avg_latency = np.mean(latencies)
+        p95_latency = np.percentile(latencies, 95)
+        total_tokens = sum(token_counts)
+        total_time = sum(latencies)
+        throughput = total_tokens / total_time if total_time > 0 else 0
+        return {
+            "avg_latency_ms": avg_latency * 1000,
+            "p95_latency_ms": p95_latency * 1000,
+            "throughput_tokens_per_sec": throughput,
+            "total_runs": len(latencies)
+        }
+class QualityEvaluator:
+    """Comprehensive quality evaluation"""
+    def __init__(self, model, tokenizer):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.perplexity_calc = PerplexityCalculator(model, tokenizer)
+        self.bleu_calc = BLEUScore()
+        self.rouge_calc = ROUGEScore()
+        self.coherence_analyzer = CoherenceAnalyzer()
+        self.latency_benchmark = LatencyBenchmark(model, tokenizer)
+    def evaluate_generation_quality(self, prompts: List[str], references: Optional[List[str]] = None, max_length: int = 100) -> List[EvaluationResult]:
+        """Evaluate generation quality"""
+        results = []
+        for i, prompt in enumerate(prompts):
+            # Generate text
+            generated_text = self._generate_text(prompt, max_length)
+            # Calculate perplexity
+            try:
+                perplexity = self.perplexity_calc.calculate_perplexity(generated_text)
+                results.append(EvaluationResult(
+                    metric_name="perplexity",
+                    score=perplexity,
+                    details={"prompt_index": i, "generated_text": generated_text[:100]}
+                ))
+            except Exception as e:
+                logging.warning(f"Failed to calculate perplexity: {e}")
+            # Calculate coherence metrics
+            coherence_metrics = self.coherence_analyzer.analyze_coherence(generated_text)
+            for metric_name, score in coherence_metrics.items():
+                results.append(EvaluationResult(
+                    metric_name=metric_name,
+                    score=score,
+                    details={"prompt_index": i}
+                ))
+            # Calculate BLEU and ROUGE if references are provided
+            if references and i < len(references):
+                reference = references[i]
+                bleu_score = self.bleu_calc.calculate_bleu(reference, generated_text)
+                results.append(EvaluationResult(
+                    metric_name="bleu_score",
+                    score=bleu_score,
+                    details={"prompt_index": i, "reference": reference[:100]}
+                ))
+                rouge_score = self.rouge_calc.calculate_rouge_l(reference, generated_text)
+                results.append(EvaluationResult(
+                    metric_name="rouge_l",
+                    score=rouge_score,
+                    details={"prompt_index": i}
+                ))
+        return results
+    def _generate_text(self, prompt: str, max_length: int) -> str:
+        """Generate text from prompt"""
+        inputs = self.tokenizer.encode(prompt, return_tensors="pt").to(next(self.model.parameters()).device)
+        with torch.no_grad():
+            outputs = self.model.generate(
+                inputs,
+                max_length=max_length,
+                do_sample=True,
+                temperature=0.7,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Remove the original prompt
+        generated_text = generated_text[len(prompt):].strip()
+        return generated_text
+class MambaSwarmEvaluator:
+    """Main evaluator for Mamba Swarm models"""
+    def __init__(self, swarm_engine, config: Optional[Dict[str, Any]] = None):
+        self.swarm_engine = swarm_engine
+        self.config = config or {}
+        self.logger = logging.getLogger(__name__)
+        # Initialize evaluators
+        self.quality_evaluator = None
+        self._initialize_evaluators()
+        # Benchmark datasets
+        self.benchmark_prompts = [
+            "The future of artificial intelligence is",
+            "In a world where technology advances rapidly,",
+            "The most important challenge facing humanity today is",
+            "Scientific discoveries have always been driven by",
+            "The relationship between humans and machines will"
+        ]
+    def _initialize_evaluators(self):
+        """Initialize quality evaluators"""
+        try:
+            # Get model and tokenizer from swarm engine
+            model = self.swarm_engine.get_model()
+            tokenizer = self.swarm_engine.get_tokenizer()
+            if model and tokenizer:
+                self.quality_evaluator = QualityEvaluator(model, tokenizer)
+        except Exception as e:
+            self.logger.warning(f"Failed to initialize evaluators: {e}")
+    def run_comprehensive_evaluation(self) -> BenchmarkResult:
+        """Run comprehensive evaluation of the Mamba Swarm"""
+        start_time = time.time()
+        all_results = []
+        # Performance benchmarks
+        performance_results = self._evaluate_performance()
+        all_results.extend(performance_results)
+        # Quality benchmarks
+        if self.quality_evaluator:
+            quality_results = self._evaluate_quality()
+            all_results.extend(quality_results)
+        # Scalability benchmarks
+        scalability_results = self._evaluate_scalability()
+        all_results.extend(scalability_results)
+        # Resource utilization
+        resource_results = self._evaluate_resource_utilization()
+        all_results.extend(resource_results)
+        # Calculate overall score
+        overall_score = self._calculate_overall_score(all_results)
+        execution_time = time.time() - start_time
+        # Get model info
+        model_info = self.swarm_engine.get_model_info()
+        return BenchmarkResult(
+            benchmark_name="comprehensive_evaluation",
+            overall_score=overall_score,
+            individual_metrics=all_results,
+            execution_time=execution_time,
+            model_info=model_info
+        )
+    def _evaluate_performance(self) -> List[EvaluationResult]:
+        """Evaluate performance metrics"""
+        results = []
+        try:
+            # Latency benchmark
+            if self.quality_evaluator:
+                latency_metrics = self.quality_evaluator.latency_benchmark.benchmark_inference_speed(
+                    self.benchmark_prompts[:3]  # Use subset for speed
+                )
+                for metric_name, score in latency_metrics.items():
+                    results.append(EvaluationResult(
+                        metric_name=f"performance_{metric_name}",
+                        score=score,
+                        details={"category": "performance"}
+                    ))
+            # Throughput test
+            throughput = self._measure_throughput()
+            results.append(EvaluationResult(
+                metric_name="throughput_requests_per_sec",
+                score=throughput,
+                details={"category": "performance"}
+            ))
+        except Exception as e:
+            self.logger.error(f"Performance evaluation failed: {e}")
+        return results
+    def _evaluate_quality(self) -> List[EvaluationResult]:
+        """Evaluate generation quality"""
+        results = []
+        try:
+            # Quality evaluation
+            quality_results = self.quality_evaluator.evaluate_generation_quality(
+                self.benchmark_prompts
+            )
+            # Add category to results
+            for result in quality_results:
+                result.details["category"] = "quality"
+                results.append(result)
+        except Exception as e:
+            self.logger.error(f"Quality evaluation failed: {e}")
+        return results
+    def _evaluate_scalability(self) -> List[EvaluationResult]:
+        """Evaluate scalability metrics"""
+        results = []
+        try:
+            # Test with different loads
+            load_levels = [1, 5, 10]
+            for load in load_levels:
+                start_time = time.time()
+                # Simulate concurrent requests
+                tasks = []
+                for _ in range(load):
+                    task = self._simulate_inference_request()
+                    tasks.append(task)
+                # Wait for completion
+                success_count = sum(1 for task in tasks if task)
+                total_time = time.time() - start_time
+                # Calculate metrics
+                success_rate = success_count / load
+                avg_response_time = total_time / load
+                results.append(EvaluationResult(
+                    metric_name=f"scalability_success_rate_load_{load}",
+                    score=success_rate,
+                    details={"category": "scalability", "load_level": load}
+                ))
+                results.append(EvaluationResult(
+                    metric_name=f"scalability_avg_response_time_load_{load}",
+                    score=avg_response_time,
+                    details={"category": "scalability", "load_level": load}
+                ))
+        except Exception as e:
+            self.logger.error(f"Scalability evaluation failed: {e}")
+        return results
+    def _evaluate_resource_utilization(self) -> List[EvaluationResult]:
+        """Evaluate resource utilization"""
+        results = []
+        try:
+            # Get memory stats
+            memory_stats = self.swarm_engine.memory_manager.get_memory_stats()
+            results.append(EvaluationResult(
+                metric_name="memory_utilization_gb",
+                score=memory_stats.used_memory,
+                details={"category": "resources", "type": "memory"}
+            ))
+            results.append(EvaluationResult(
+                metric_name="gpu_memory_utilization_gb",
+                score=memory_stats.gpu_memory,
+                details={"category": "resources", "type": "gpu_memory"}
+            ))
+            # Encoder utilization
+            active_encoders = len(self.swarm_engine.get_active_encoders())
+            total_encoders = 100  # As specified in requirements
+            results.append(EvaluationResult(
+                metric_name="encoder_utilization_ratio",
+                score=active_encoders / total_encoders,
+                details={"category": "resources", "active": active_encoders, "total": total_encoders}
+            ))
+        except Exception as e:
+            self.logger.error(f"Resource evaluation failed: {e}")
+        return results
+    def _measure_throughput(self) -> float:
+        """Measure system throughput"""
+        try:
+            num_requests = 10
+            start_time = time.time()
+            for _ in range(num_requests):
+                self._simulate_inference_request()
+            total_time = time.time() - start_time
+            throughput = num_requests / total_time
+            return throughput
+        except Exception as e:
+            self.logger.error(f"Throughput measurement failed: {e}")
+            return 0.0
+    def _simulate_inference_request(self) -> bool:
+        """Simulate an inference request"""
+        try:
+            prompt = "This is a test prompt for evaluation."
+            result = self.swarm_engine.generate(prompt, max_length=50)
+            return result is not None
+        except Exception as e:
+            self.logger.error(f"Simulated request failed: {e}")
+            return False
+    def _calculate_overall_score(self, results: List[EvaluationResult]) -> float:
+        """Calculate overall benchmark score"""
+        if not results:
+            return 0.0
+        # Weight different categories
+        weights = {
+            "performance": 0.3,
+            "quality": 0.4,
+            "scalability": 0.2,
+            "resources": 0.1
+        }
+        category_scores = defaultdict(list)
+        for result in results:
+            category = result.details.get("category", "other")
+            # Normalize scores based on metric type
+            normalized_score = self._normalize_score(result)
+            category_scores[category].append(normalized_score)
+        # Calculate weighted average
+        total_score = 0.0
+        total_weight = 0.0
+        for category, scores in category_scores.items():
+            if category in weights and scores:
+                avg_score = sum(scores) / len(scores)
+                weight = weights[category]
+                total_score += avg_score * weight
+                total_weight += weight
+        return total_score / total_weight if total_weight > 0 else 0.0
+    def _normalize_score(self, result: EvaluationResult) -> float:
+        """Normalize score to 0-1 range"""
+        metric_name = result.metric_name
+        score = result.score
+        # Define normalization rules for different metrics
+        if "perplexity" in metric_name:
+            # Lower is better, normalize to 0-1 where 1 is best
+            return max(0.0, 1.0 - min(score / 100.0, 1.0))
+        elif "latency" in metric_name or "response_time" in metric_name:
+            # Lower is better, normalize based on reasonable thresholds
+            return max(0.0, 1.0 - min(score / 1000.0, 1.0))  # 1 second threshold
+        elif "throughput" in metric_name:
+            # Higher is better, normalize based on expected range
+            return min(score / 100.0, 1.0)  # 100 requests/sec as max
+        elif "success_rate" in metric_name or "utilization" in metric_name:
+            # Already in 0-1 range
+            return score
+        else:
+            # Default: assume higher is better and clamp to 0-1
+            return min(max(score, 0.0), 1.0)
+    def export_evaluation_report(self, result: BenchmarkResult, filename: Optional[str] = None) -> str:
+        """Export evaluation report to file"""
+        if not filename:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"mamba_swarm_evaluation_{timestamp}.json"
+        # Convert to serializable format
+        report = {
+            "benchmark_name": result.benchmark_name,
+            "overall_score": result.overall_score,
+            "execution_time": result.execution_time,
+            "timestamp": result.timestamp,
+            "model_info": result.model_info,
+            "metrics": [
+                {
+                    "name": metric.metric_name,
+                    "score": metric.score,
+                    "details": metric.details,
+                    "timestamp": metric.timestamp
+                }
+                for metric in result.individual_metrics
+            ]
+        }
+        with open(filename, 'w') as f:
+            json.dump(report, f, indent=2, default=str)
+        self.logger.info(f"Evaluation report saved to {filename}")
+        return filename
+# Example usage
+if __name__ == "__main__":
+    # This would be used with actual SwarmEngine instance
+    # evaluator = MambaSwarmEvaluator(swarm_engine)
+    # result = evaluator.run_comprehensive_evaluation()
+    # report_file = evaluator.export_evaluation_report(result)
+    # Demo of individual components
+    print("Mamba Swarm Evaluator components initialized successfully")
+    # Example BLEU calculation
+    bleu_calc = BLEUScore()
+    reference = "The quick brown fox jumps over the lazy dog"
+    candidate = "The fast brown fox leaps over the sleepy dog"
+    bleu_score = bleu_calc.calculate_bleu(reference, candidate)
+    print(f"BLEU score: {bleu_score:.3f}")
+    # Example coherence analysis
+    coherence_analyzer = CoherenceAnalyzer()
+    text = "This is a coherent text. It flows well from sentence to sentence. The ideas are connected logically."
+    coherence_metrics = coherence_analyzer.analyze_coherence(text)
+    print(f"Coherence metrics: {coherence_metrics}")

monitoring/metrics.py ADDED Viewed

	@@ -0,0 +1,609 @@

+"""
+Metrics Collection and Monitoring System for Mamba Swarm
+Tracks performance, resource usage, and model behavior
+"""
+import time
+import threading
+import json
+import logging
+from typing import Dict, List, Any, Optional, Callable
+from dataclasses import dataclass, field, asdict
+from collections import defaultdict, deque
+from enum import Enum
+import torch
+import psutil
+import numpy as np
+from datetime import datetime, timedelta
+class MetricType(Enum):
+    COUNTER = "counter"
+    GAUGE = "gauge"
+    HISTOGRAM = "histogram"
+    SUMMARY = "summary"
+@dataclass
+class MetricPoint:
+    timestamp: float
+    value: float
+    labels: Dict[str, str] = field(default_factory=dict)
+@dataclass
+class HistogramBucket:
+    upper_bound: float
+    count: int = 0
+class Metric:
+    """Base metric class"""
+    def __init__(self, name: str, description: str, labels: Optional[List[str]] = None):
+        self.name = name
+        self.description = description
+        self.labels = labels or []
+        self.lock = threading.Lock()
+        self.created_at = time.time()
+class Counter(Metric):
+    """Counter metric - monotonically increasing"""
+    def __init__(self, name: str, description: str, labels: Optional[List[str]] = None):
+        super().__init__(name, description, labels)
+        self.values = defaultdict(float)
+    def inc(self, value: float = 1.0, **label_values):
+        """Increment counter"""
+        label_key = self._make_label_key(label_values)
+        with self.lock:
+            self.values[label_key] += value
+    def get(self, **label_values) -> float:
+        """Get counter value"""
+        label_key = self._make_label_key(label_values)
+        return self.values.get(label_key, 0.0)
+    def _make_label_key(self, label_values: Dict[str, str]) -> str:
+        """Create key from label values"""
+        return "|".join(f"{k}={v}" for k, v in sorted(label_values.items()))
+class Gauge(Metric):
+    """Gauge metric - can go up and down"""
+    def __init__(self, name: str, description: str, labels: Optional[List[str]] = None):
+        super().__init__(name, description, labels)
+        self.values = defaultdict(float)
+    def set(self, value: float, **label_values):
+        """Set gauge value"""
+        label_key = self._make_label_key(label_values)
+        with self.lock:
+            self.values[label_key] = value
+    def inc(self, value: float = 1.0, **label_values):
+        """Increment gauge"""
+        label_key = self._make_label_key(label_values)
+        with self.lock:
+            self.values[label_key] += value
+    def dec(self, value: float = 1.0, **label_values):
+        """Decrement gauge"""
+        self.inc(-value, **label_values)
+    def get(self, **label_values) -> float:
+        """Get gauge value"""
+        label_key = self._make_label_key(label_values)
+        return self.values.get(label_key, 0.0)
+    def _make_label_key(self, label_values: Dict[str, str]) -> str:
+        return "|".join(f"{k}={v}" for k, v in sorted(label_values.items()))
+class Histogram(Metric):
+    """Histogram metric - tracks distribution of values"""
+    def __init__(self, name: str, description: str, buckets: Optional[List[float]] = None, labels: Optional[List[str]] = None):
+        super().__init__(name, description, labels)
+        self.buckets = buckets or [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, float('inf')]
+        self.bucket_counts = defaultdict(lambda: defaultdict(int))
+        self.sums = defaultdict(float)
+        self.counts = defaultdict(int)
+    def observe(self, value: float, **label_values):
+        """Observe a value"""
+        label_key = self._make_label_key(label_values)
+        with self.lock:
+            self.sums[label_key] += value
+            self.counts[label_key] += 1
+            for bucket in self.buckets:
+                if value <= bucket:
+                    self.bucket_counts[label_key][bucket] += 1
+    def get_buckets(self, **label_values) -> Dict[float, int]:
+        """Get bucket counts"""
+        label_key = self._make_label_key(label_values)
+        return dict(self.bucket_counts[label_key])
+    def get_sum(self, **label_values) -> float:
+        """Get sum of observed values"""
+        label_key = self._make_label_key(label_values)
+        return self.sums[label_key]
+    def get_count(self, **label_values) -> int:
+        """Get count of observations"""
+        label_key = self._make_label_key(label_values)
+        return self.counts[label_key]
+    def _make_label_key(self, label_values: Dict[str, str]) -> str:
+        return "|".join(f"{k}={v}" for k, v in sorted(label_values.items()))
+class Summary(Metric):
+    """Summary metric - tracks quantiles"""
+    def __init__(self, name: str, description: str, quantiles: Optional[List[float]] = None, labels: Optional[List[str]] = None, max_age: float = 300.0):
+        super().__init__(name, description, labels)
+        self.quantiles = quantiles or [0.5, 0.9, 0.95, 0.99]
+        self.max_age = max_age
+        self.observations = defaultdict(lambda: deque())
+        self.sums = defaultdict(float)
+        self.counts = defaultdict(int)
+    def observe(self, value: float, **label_values):
+        """Observe a value"""
+        label_key = self._make_label_key(label_values)
+        timestamp = time.time()
+        with self.lock:
+            self.observations[label_key].append((timestamp, value))
+            self.sums[label_key] += value
+            self.counts[label_key] += 1
+            # Clean old observations
+            self._clean_old_observations(label_key, timestamp)
+    def get_quantile(self, quantile: float, **label_values) -> float:
+        """Get quantile value"""
+        label_key = self._make_label_key(label_values)
+        with self.lock:
+            obs = self.observations[label_key]
+            if not obs:
+                return 0.0
+            values = [v for _, v in obs]
+            values.sort()
+            index = int(quantile * len(values))
+            return values[min(index, len(values) - 1)]
+    def get_sum(self, **label_values) -> float:
+        """Get sum of observed values"""
+        label_key = self._make_label_key(label_values)
+        return self.sums[label_key]
+    def get_count(self, **label_values) -> int:
+        """Get count of observations"""
+        label_key = self._make_label_key(label_values)
+        return self.counts[label_key]
+    def _clean_old_observations(self, label_key: str, current_time: float):
+        """Remove old observations"""
+        cutoff_time = current_time - self.max_age
+        obs = self.observations[label_key]
+        while obs and obs[0][0] < cutoff_time:
+            _, value = obs.popleft()
+            self.sums[label_key] -= value
+            self.counts[label_key] -= 1
+    def _make_label_key(self, label_values: Dict[str, str]) -> str:
+        return "|".join(f"{k}={v}" for k, v in sorted(label_values.items()))
+class MetricsRegistry:
+    """Registry for all metrics"""
+    def __init__(self):
+        self.metrics: Dict[str, Metric] = {}
+        self.lock = threading.Lock()
+    def register(self, metric: Metric):
+        """Register a metric"""
+        with self.lock:
+            if metric.name in self.metrics:
+                raise ValueError(f"Metric {metric.name} already registered")
+            self.metrics[metric.name] = metric
+    def get_metric(self, name: str) -> Optional[Metric]:
+        """Get metric by name"""
+        return self.metrics.get(name)
+    def get_all_metrics(self) -> Dict[str, Metric]:
+        """Get all metrics"""
+        return self.metrics.copy()
+class MambaSwarmMetrics:
+    """Metrics collector for Mamba Swarm"""
+    def __init__(self):
+        self.registry = MetricsRegistry()
+        self.logger = logging.getLogger(__name__)
+        self._setup_default_metrics()
+        # System monitoring
+        self.monitoring_thread = None
+        self.monitoring_interval = 10.0  # seconds
+        self.should_monitor = False
+    def _setup_default_metrics(self):
+        """Setup default metrics"""
+        # Request metrics
+        self.requests_total = Counter("requests_total", "Total number of requests", ["method", "endpoint", "status"])
+        self.request_duration = Histogram("request_duration_seconds", "Request duration in seconds", labels=["method", "endpoint"])
+        # Model metrics
+        self.inference_duration = Histogram("inference_duration_seconds", "Inference duration in seconds", labels=["model_unit"])
+        self.tokens_generated = Counter("tokens_generated_total", "Total tokens generated", ["model_unit"])
+        self.model_load = Gauge("model_load", "Current model load", ["model_unit"])
+        # System metrics
+        self.memory_usage = Gauge("memory_usage_bytes", "Memory usage in bytes", ["type"])
+        self.gpu_utilization = Gauge("gpu_utilization_percent", "GPU utilization percentage", ["device"])
+        self.active_connections = Gauge("active_connections", "Number of active connections")
+        # Swarm metrics
+        self.encoder_utilization = Gauge("encoder_utilization", "Encoder utilization", ["encoder_id"])
+        self.routing_decisions = Counter("routing_decisions_total", "Routing decisions", ["strategy", "target"])
+        self.load_balancing_decisions = Counter("load_balancing_decisions_total", "Load balancing decisions", ["algorithm"])
+        # Error metrics
+        self.errors_total = Counter("errors_total", "Total number of errors", ["type", "component"])
+        # Register all metrics
+        for attr_name in dir(self):
+            attr = getattr(self, attr_name)
+            if isinstance(attr, Metric):
+                self.registry.register(attr)
+    def start_monitoring(self):
+        """Start system monitoring"""
+        if self.monitoring_thread is not None:
+            return
+        self.should_monitor = True
+        self.monitoring_thread = threading.Thread(target=self._monitoring_loop, daemon=True)
+        self.monitoring_thread.start()
+        self.logger.info("Started metrics monitoring")
+    def stop_monitoring(self):
+        """Stop system monitoring"""
+        self.should_monitor = False
+        if self.monitoring_thread:
+            self.monitoring_thread.join(timeout=5.0)
+            self.monitoring_thread = None
+        self.logger.info("Stopped metrics monitoring")
+    def _monitoring_loop(self):
+        """System monitoring loop"""
+        while self.should_monitor:
+            try:
+                self._collect_system_metrics()
+                time.sleep(self.monitoring_interval)
+            except Exception as e:
+                self.logger.error(f"Error in monitoring loop: {e}")
+    def _collect_system_metrics(self):
+        """Collect system metrics"""
+        # Memory metrics
+        memory = psutil.virtual_memory()
+        self.memory_usage.set(memory.used, type="system")
+        self.memory_usage.set(memory.available, type="available")
+        # GPU metrics
+        if torch.cuda.is_available():
+            for i in range(torch.cuda.device_count()):
+                # GPU memory
+                gpu_memory = torch.cuda.memory_allocated(i)
+                self.memory_usage.set(gpu_memory, type=f"gpu_{i}")
+                # GPU utilization (simplified)
+                # In practice, you might use nvidia-ml-py for more detailed metrics
+                utilization = min(100.0, (gpu_memory / torch.cuda.max_memory_allocated(i)) * 100) if torch.cuda.max_memory_allocated(i) > 0 else 0.0
+                self.gpu_utilization.set(utilization, device=f"cuda:{i}")
+    def record_request(self, method: str, endpoint: str, status_code: int, duration: float):
+        """Record request metrics"""
+        self.requests_total.inc(method=method, endpoint=endpoint, status=str(status_code))
+        self.request_duration.observe(duration, method=method, endpoint=endpoint)
+    def record_inference(self, model_unit: str, duration: float, tokens: int):
+        """Record inference metrics"""
+        self.inference_duration.observe(duration, model_unit=model_unit)
+        self.tokens_generated.inc(tokens, model_unit=model_unit)
+    def record_error(self, error_type: str, component: str):
+        """Record error metrics"""
+        self.errors_total.inc(type=error_type, component=component)
+    def update_model_load(self, model_unit: str, load: float):
+        """Update model load"""
+        self.model_load.set(load, model_unit=model_unit)
+    def update_encoder_utilization(self, encoder_id: str, utilization: float):
+        """Update encoder utilization"""
+        self.encoder_utilization.set(utilization, encoder_id=encoder_id)
+    def record_routing_decision(self, strategy: str, target: str):
+        """Record routing decision"""
+        self.routing_decisions.inc(strategy=strategy, target=target)
+    def get_metrics_summary(self) -> Dict[str, Any]:
+        """Get metrics summary"""
+        summary = {}
+        for name, metric in self.registry.get_all_metrics().items():
+            if isinstance(metric, Counter):
+                summary[name] = {
+                    "type": "counter",
+                    "values": dict(metric.values)
+                }
+            elif isinstance(metric, Gauge):
+                summary[name] = {
+                    "type": "gauge",
+                    "values": dict(metric.values)
+                }
+            elif isinstance(metric, Histogram):
+                summary[name] = {
+                    "type": "histogram",
+                    "buckets": {k: dict(v) for k, v in metric.bucket_counts.items()},
+                    "sums": dict(metric.sums),
+                    "counts": dict(metric.counts)
+                }
+            elif isinstance(metric, Summary):
+                summary[name] = {
+                    "type": "summary",
+                    "sums": dict(metric.sums),
+                    "counts": dict(metric.counts),
+                    "quantiles": {
+                        q: {k: metric.get_quantile(q, **self._parse_label_key(k)) for k in metric.observations.keys()}
+                        for q in metric.quantiles
+                    }
+                }
+        return summary
+    def _parse_label_key(self, label_key: str) -> Dict[str, str]:
+        """Parse label key back to dictionary"""
+        if not label_key:
+            return {}
+        labels = {}
+        for pair in label_key.split("|"):
+            if "=" in pair:
+                k, v = pair.split("=", 1)
+                labels[k] = v
+        return labels
+    def export_prometheus_format(self) -> str:
+        """Export metrics in Prometheus format"""
+        output = []
+        for name, metric in self.registry.get_all_metrics().items():
+            # Help text
+            output.append(f"# HELP {name} {metric.description}")
+            if isinstance(metric, Counter):
+                output.append(f"# TYPE {name} counter")
+                for label_key, value in metric.values.items():
+                    labels = self._format_prometheus_labels(label_key)
+                    output.append(f"{name}{labels} {value}")
+            elif isinstance(metric, Gauge):
+                output.append(f"# TYPE {name} gauge")
+                for label_key, value in metric.values.items():
+                    labels = self._format_prometheus_labels(label_key)
+                    output.append(f"{name}{labels} {value}")
+            elif isinstance(metric, Histogram):
+                output.append(f"# TYPE {name} histogram")
+                for label_key in metric.bucket_counts.keys():
+                    labels_dict = self._parse_label_key(label_key)
+                    # Buckets
+                    for bucket, count in metric.bucket_counts[label_key].items():
+                        bucket_labels = {**labels_dict, "le": str(bucket)}
+                        bucket_label_str = self._format_prometheus_labels_dict(bucket_labels)
+                        output.append(f"{name}_bucket{bucket_label_str} {count}")
+                    # Sum and count
+                    base_labels = self._format_prometheus_labels(label_key)
+                    output.append(f"{name}_sum{base_labels} {metric.sums[label_key]}")
+                    output.append(f"{name}_count{base_labels} {metric.counts[label_key]}")
+            elif isinstance(metric, Summary):
+                output.append(f"# TYPE {name} summary")
+                for label_key in metric.observations.keys():
+                    labels_dict = self._parse_label_key(label_key)
+                    # Quantiles
+                    for quantile in metric.quantiles:
+                        quantile_labels = {**labels_dict, "quantile": str(quantile)}
+                        quantile_label_str = self._format_prometheus_labels_dict(quantile_labels)
+                        quantile_value = metric.get_quantile(quantile, **labels_dict)
+                        output.append(f"{name}{quantile_label_str} {quantile_value}")
+                    # Sum and count
+                    base_labels = self._format_prometheus_labels(label_key)
+                    output.append(f"{name}_sum{base_labels} {metric.sums[label_key]}")
+                    output.append(f"{name}_count{base_labels} {metric.counts[label_key]}")
+            output.append("")  # Empty line between metrics
+        return "\n".join(output)
+    def _format_prometheus_labels(self, label_key: str) -> str:
+        """Format labels for Prometheus"""
+        if not label_key:
+            return ""
+        labels = self._parse_label_key(label_key)
+        return self._format_prometheus_labels_dict(labels)
+    def _format_prometheus_labels_dict(self, labels: Dict[str, str]) -> str:
+        """Format label dictionary for Prometheus"""
+        if not labels:
+            return ""
+        formatted_labels = []
+        for k, v in sorted(labels.items()):
+            # Escape quotes and backslashes
+            escaped_value = v.replace("\\", "\\\\").replace('"', '\\"')
+            formatted_labels.append(f'{k}="{escaped_value}"')
+        return "{" + ",".join(formatted_labels) + "}"
+# Context managers for timing
+class timer:
+    """Context manager for timing operations"""
+    def __init__(self, metric: Histogram, **labels):
+        self.metric = metric
+        self.labels = labels
+        self.start_time = None
+    def __enter__(self):
+        self.start_time = time.time()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.start_time is not None:
+            duration = time.time() - self.start_time
+            self.metric.observe(duration, **self.labels)
+class request_timer:
+    """Context manager for timing requests"""
+    def __init__(self, metrics: MambaSwarmMetrics, method: str, endpoint: str):
+        self.metrics = metrics
+        self.method = method
+        self.endpoint = endpoint
+        self.start_time = None
+        self.status_code = 200
+    def __enter__(self):
+        self.start_time = time.time()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type is not None:
+            self.status_code = 500
+        if self.start_time is not None:
+            duration = time.time() - self.start_time
+            self.metrics.record_request(self.method, self.endpoint, self.status_code, duration)
+    def set_status(self, status_code: int):
+        """Set the response status code"""
+        self.status_code = status_code
+# Decorator for automatic metrics collection
+def measure_time(metric_name: str, **labels):
+    """Decorator to measure function execution time"""
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            # Assume first argument is self and has metrics attribute
+            if args and hasattr(args[0], 'metrics'):
+                metrics = args[0].metrics
+                metric = metrics.registry.get_metric(metric_name)
+                if metric and isinstance(metric, Histogram):
+                    with timer(metric, **labels):
+                        return func(*args, **kwargs)
+            return func(*args, **kwargs)
+        return wrapper
+    return decorator
+# Metrics aggregator for multiple instances
+class MetricsAggregator:
+    """Aggregates metrics from multiple Mamba Swarm instances"""
+    def __init__(self):
+        self.instances: Dict[str, MambaSwarmMetrics] = {}
+        self.lock = threading.Lock()
+    def add_instance(self, instance_id: str, metrics: MambaSwarmMetrics):
+        """Add metrics instance"""
+        with self.lock:
+            self.instances[instance_id] = metrics
+    def remove_instance(self, instance_id: str):
+        """Remove metrics instance"""
+        with self.lock:
+            self.instances.pop(instance_id, None)
+    def get_aggregated_summary(self) -> Dict[str, Any]:
+        """Get aggregated metrics summary"""
+        aggregated = defaultdict(lambda: defaultdict(float))
+        with self.lock:
+            for instance_id, metrics in self.instances.items():
+                summary = metrics.get_metrics_summary()
+                for metric_name, metric_data in summary.items():
+                    if metric_data["type"] in ["counter", "gauge"]:
+                        for label_key, value in metric_data["values"].items():
+                            key = f"{metric_name}_{label_key}" if label_key else metric_name
+                            if metric_data["type"] == "counter":
+                                aggregated[key]["sum"] += value
+                            else:  # gauge
+                                aggregated[key]["avg"] = (aggregated[key].get("avg", 0) + value) / 2
+                                aggregated[key]["instances"] = aggregated[key].get("instances", 0) + 1
+        return dict(aggregated)
+# FastAPI integration
+from fastapi import FastAPI, Response
+from fastapi.responses import PlainTextResponse
+def add_metrics_endpoints(app: FastAPI, metrics: MambaSwarmMetrics):
+    """Add metrics endpoints to FastAPI app"""
+    @app.get("/metrics")
+    async def get_metrics():
+        """Get metrics in JSON format"""
+        return metrics.get_metrics_summary()
+    @app.get("/metrics/prometheus")
+    async def get_prometheus_metrics():
+        """Get metrics in Prometheus format"""
+        prometheus_data = metrics.export_prometheus_format()
+        return PlainTextResponse(prometheus_data, media_type="text/plain")
+    @app.middleware("http")
+    async def metrics_middleware(request, call_next):
+        """Middleware to collect request metrics"""
+        method = request.method
+        path = request.url.path
+        with request_timer(metrics, method, path) as timer_ctx:
+            response = await call_next(request)
+            timer_ctx.set_status(response.status_code)
+            return response
+# Example usage
+if __name__ == "__main__":
+    # Create metrics instance
+    metrics = MambaSwarmMetrics()
+    metrics.start_monitoring()
+    # Example metric recording
+    metrics.record_request("POST", "/generate", 200, 0.5)
+    metrics.record_inference("encoder_1", 0.3, 50)
+    metrics.update_encoder_utilization("encoder_1", 0.8)
+    # Get summary
+    summary = metrics.get_metrics_summary()
+    print(json.dumps(summary, indent=2))
+    # Export Prometheus format
+    prometheus_data = metrics.export_prometheus_format()
+    print("\nPrometheus format:")
+    print(prometheus_data)
+    metrics.stop_monitoring()

monitoring/profiler.py ADDED Viewed

	@@ -0,0 +1,579 @@

+"""
+Performance Profiler for Mamba Swarm
+Advanced profiling tools for performance analysis and optimization
+"""
+import time
+import cProfile
+import pstats
+import io
+import threading
+import functools
+import traceback
+import psutil
+import torch
+import numpy as np
+from typing import Dict, List, Any, Optional, Callable, Union
+from dataclasses import dataclass, field
+from collections import defaultdict, deque
+from contextlib import contextmanager
+import logging
+import json
+from datetime import datetime
+import os
+import gc
+@dataclass
+class ProfileResult:
+    function_name: str
+    total_time: float
+    cumulative_time: float
+    call_count: int
+    per_call_time: float
+    filename: str
+    line_number: int
+@dataclass
+class MemorySnapshot:
+    timestamp: float
+    total_memory: float
+    gpu_memory: float
+    python_objects: int
+    tensor_count: int
+    cache_size: float
+@dataclass
+class PerformanceProfile:
+    timestamp: float
+    duration: float
+    cpu_usage: float
+    memory_usage: float
+    gpu_usage: float
+    function_calls: List[ProfileResult]
+    memory_snapshots: List[MemorySnapshot]
+    bottlenecks: List[str]
+    recommendations: List[str]
+class FunctionTimer:
+    """Timer for individual function calls"""
+    def __init__(self, name: str):
+        self.name = name
+        self.calls = []
+        self.total_time = 0.0
+        self.call_count = 0
+        self.min_time = float('inf')
+        self.max_time = 0.0
+        self.lock = threading.Lock()
+    def add_call(self, duration: float):
+        """Add a function call duration"""
+        with self.lock:
+            self.calls.append(duration)
+            self.total_time += duration
+            self.call_count += 1
+            self.min_time = min(self.min_time, duration)
+            self.max_time = max(self.max_time, duration)
+            # Keep only recent calls
+            if len(self.calls) > 1000:
+                old_call = self.calls.pop(0)
+                self.total_time -= old_call
+                self.call_count -= 1
+    @property
+    def avg_time(self) -> float:
+        return self.total_time / max(self.call_count, 1)
+    @property
+    def percentile_95(self) -> float:
+        if not self.calls:
+            return 0.0
+        sorted_calls = sorted(self.calls)
+        index = int(0.95 * len(sorted_calls))
+        return sorted_calls[min(index, len(sorted_calls) - 1)]
+    def get_stats(self) -> Dict[str, Any]:
+        return {
+            "name": self.name,
+            "total_time": self.total_time,
+            "call_count": self.call_count,
+            "avg_time": self.avg_time,
+            "min_time": self.min_time if self.min_time != float('inf') else 0.0,
+            "max_time": self.max_time,
+            "percentile_95": self.percentile_95
+        }
+class MemoryProfiler:
+    """Memory usage profiler"""
+    def __init__(self, sample_interval: float = 0.1):
+        self.sample_interval = sample_interval
+        self.snapshots = deque(maxlen=1000)
+        self.monitoring = False
+        self.monitor_thread = None
+        self.lock = threading.Lock()
+    def start_monitoring(self):
+        """Start memory monitoring"""
+        if self.monitoring:
+            return
+        self.monitoring = True
+        self.monitor_thread = threading.Thread(target=self._monitor_loop, daemon=True)
+        self.monitor_thread.start()
+    def stop_monitoring(self):
+        """Stop memory monitoring"""
+        self.monitoring = False
+        if self.monitor_thread:
+            self.monitor_thread.join(timeout=1.0)
+    def _monitor_loop(self):
+        """Memory monitoring loop"""
+        while self.monitoring:
+            try:
+                snapshot = self._take_snapshot()
+                with self.lock:
+                    self.snapshots.append(snapshot)
+                time.sleep(self.sample_interval)
+            except Exception as e:
+                logging.error(f"Memory monitoring error: {e}")
+    def _take_snapshot(self) -> MemorySnapshot:
+        """Take a memory snapshot"""
+        # System memory
+        memory = psutil.virtual_memory()
+        total_memory = memory.used / (1024**3)  # GB
+        # GPU memory
+        gpu_memory = 0.0
+        if torch.cuda.is_available():
+            gpu_memory = torch.cuda.memory_allocated() / (1024**3)
+        # Python objects
+        python_objects = len(gc.get_objects())
+        # Tensor count
+        tensor_count = 0
+        for obj in gc.get_objects():
+            if isinstance(obj, torch.Tensor):
+                tensor_count += 1
+        # Cache size estimation
+        cache_size = 0.0  # Could be calculated based on specific cache implementations
+        return MemorySnapshot(
+            timestamp=time.time(),
+            total_memory=total_memory,
+            gpu_memory=gpu_memory,
+            python_objects=python_objects,
+            tensor_count=tensor_count,
+            cache_size=cache_size
+        )
+    def get_peak_memory(self) -> float:
+        """Get peak memory usage"""
+        with self.lock:
+            if not self.snapshots:
+                return 0.0
+            return max(snapshot.total_memory + snapshot.gpu_memory for snapshot in self.snapshots)
+    def get_memory_trend(self) -> List[float]:
+        """Get memory usage trend"""
+        with self.lock:
+            return [snapshot.total_memory + snapshot.gpu_memory for snapshot in self.snapshots]
+class CPUProfiler:
+    """CPU profiling using cProfile"""
+    def __init__(self):
+        self.profiler = None
+        self.profiling = False
+        self.lock = threading.Lock()
+    def start_profiling(self):
+        """Start CPU profiling"""
+        with self.lock:
+            if self.profiling:
+                return
+            self.profiler = cProfile.Profile()
+            self.profiler.enable()
+            self.profiling = True
+    def stop_profiling(self) -> List[ProfileResult]:
+        """Stop CPU profiling and return results"""
+        with self.lock:
+            if not self.profiling or not self.profiler:
+                return []
+            self.profiler.disable()
+            self.profiling = False
+            # Analyze results
+            s = io.StringIO()
+            stats = pstats.Stats(self.profiler, stream=s)
+            stats.sort_stats('cumulative')
+            results = []
+            for func, (call_count, total_time, cumulative_time, callers) in stats.stats.items():
+                filename, line_number, function_name = func
+                result = ProfileResult(
+                    function_name=function_name,
+                    total_time=total_time,
+                    cumulative_time=cumulative_time,
+                    call_count=call_count,
+                    per_call_time=total_time / call_count if call_count > 0 else 0.0,
+                    filename=filename,
+                    line_number=line_number
+                )
+                results.append(result)
+            # Sort by cumulative time
+            results.sort(key=lambda x: x.cumulative_time, reverse=True)
+            return results
+class GPUProfiler:
+    """GPU profiling for CUDA operations"""
+    def __init__(self):
+        self.events = []
+        self.profiling = False
+        self.lock = threading.Lock()
+    def start_profiling(self):
+        """Start GPU profiling"""
+        if not torch.cuda.is_available():
+            return
+        with self.lock:
+            if self.profiling:
+                return
+            self.events = []
+            self.profiling = True
+            torch.cuda.synchronize()
+    def stop_profiling(self) -> Dict[str, Any]:
+        """Stop GPU profiling"""
+        if not torch.cuda.is_available():
+            return {}
+        with self.lock:
+            if not self.profiling:
+                return {}
+            torch.cuda.synchronize()
+            self.profiling = False
+            # Calculate GPU metrics
+            total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
+            allocated_memory = torch.cuda.memory_allocated() / (1024**3)
+            cached_memory = torch.cuda.memory_reserved() / (1024**3)
+            return {
+                "total_memory_gb": total_memory,
+                "allocated_memory_gb": allocated_memory,
+                "cached_memory_gb": cached_memory,
+                "memory_utilization": allocated_memory / total_memory * 100,
+                "events": len(self.events)
+            }
+    @contextmanager
+    def profile_operation(self, name: str):
+        """Context manager for profiling GPU operations"""
+        if not torch.cuda.is_available() or not self.profiling:
+            yield
+            return
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+        start_event.record()
+        try:
+            yield
+        finally:
+            end_event.record()
+            torch.cuda.synchronize()
+            elapsed_time = start_event.elapsed_time(end_event)
+            with self.lock:
+                self.events.append({
+                    "name": name,
+                    "duration_ms": elapsed_time,
+                    "timestamp": time.time()
+                })
+class MambaSwarmProfiler:
+    """Comprehensive profiler for Mamba Swarm"""
+    def __init__(self, enable_memory_monitoring: bool = True):
+        self.logger = logging.getLogger(__name__)
+        # Initialize profilers
+        self.cpu_profiler = CPUProfiler()
+        self.memory_profiler = MemoryProfiler()
+        self.gpu_profiler = GPUProfiler()
+        # Function timers
+        self.function_timers: Dict[str, FunctionTimer] = {}
+        self.timer_lock = threading.Lock()
+        # Profiling state
+        self.profiling_active = False
+        self.profile_start_time = 0.0
+        # Performance tracking
+        self.performance_history = deque(maxlen=100)
+        # Start memory monitoring if enabled
+        if enable_memory_monitoring:
+            self.memory_profiler.start_monitoring()
+    def start_profiling(self, include_cpu: bool = True, include_gpu: bool = True):
+        """Start comprehensive profiling"""
+        if self.profiling_active:
+            self.logger.warning("Profiling already active")
+            return
+        self.profile_start_time = time.time()
+        self.profiling_active = True
+        if include_cpu:
+            self.cpu_profiler.start_profiling()
+        if include_gpu:
+            self.gpu_profiler.start_profiling()
+        self.logger.info("Started performance profiling")
+    def stop_profiling(self) -> PerformanceProfile:
+        """Stop profiling and return results"""
+        if not self.profiling_active:
+            self.logger.warning("Profiling not active")
+            return None
+        end_time = time.time()
+        duration = end_time - self.profile_start_time
+        self.profiling_active = False
+        # Get CPU profile
+        cpu_results = self.cpu_profiler.stop_profiling()
+        # Get GPU profile
+        gpu_results = self.gpu_profiler.stop_profiling()
+        # Get system metrics
+        cpu_percent = psutil.cpu_percent()
+        memory_info = psutil.virtual_memory()
+        memory_percent = memory_info.percent
+        gpu_usage = 0.0
+        if torch.cuda.is_available():
+            gpu_usage = torch.cuda.memory_allocated() / torch.cuda.max_memory_allocated() * 100
+        # Get memory snapshots
+        memory_snapshots = list(self.memory_profiler.snapshots)
+        # Analyze bottlenecks
+        bottlenecks = self._analyze_bottlenecks(cpu_results, gpu_results)
+        # Generate recommendations
+        recommendations = self._generate_recommendations(cpu_results, gpu_results, memory_snapshots)
+        profile = PerformanceProfile(
+            timestamp=end_time,
+            duration=duration,
+            cpu_usage=cpu_percent,
+            memory_usage=memory_percent,
+            gpu_usage=gpu_usage,
+            function_calls=cpu_results,
+            memory_snapshots=memory_snapshots,
+            bottlenecks=bottlenecks,
+            recommendations=recommendations
+        )
+        self.performance_history.append(profile)
+        self.logger.info(f"Completed performance profiling (duration: {duration:.2f}s)")
+        return profile
+    def _analyze_bottlenecks(self, cpu_results: List[ProfileResult], gpu_results: Dict[str, Any]) -> List[str]:
+        """Analyze performance bottlenecks"""
+        bottlenecks = []
+        # CPU bottlenecks
+        if cpu_results:
+            top_cpu_functions = cpu_results[:5]
+            for func in top_cpu_functions:
+                if func.cumulative_time > 1.0:  # More than 1 second
+                    bottlenecks.append(f"CPU: {func.function_name} ({func.cumulative_time:.2f}s)")
+        # Memory bottlenecks
+        peak_memory = self.memory_profiler.get_peak_memory()
+        if peak_memory > 8.0:  # More than 8GB
+            bottlenecks.append(f"Memory: High usage ({peak_memory:.2f}GB)")
+        # GPU bottlenecks
+        if gpu_results and gpu_results.get("memory_utilization", 0) > 90:
+            bottlenecks.append("GPU: High memory utilization")
+        return bottlenecks
+    def _generate_recommendations(self, cpu_results: List[ProfileResult],
+                                gpu_results: Dict[str, Any],
+                                memory_snapshots: List[MemorySnapshot]) -> List[str]:
+        """Generate optimization recommendations"""
+        recommendations = []
+        # CPU recommendations
+        if cpu_results:
+            slow_functions = [f for f in cpu_results if f.per_call_time > 0.1]
+            if slow_functions:
+                recommendations.append("Consider optimizing slow functions or using caching")
+        # Memory recommendations
+        if memory_snapshots:
+            tensor_counts = [s.tensor_count for s in memory_snapshots]
+            if tensor_counts and max(tensor_counts) > 10000:
+                recommendations.append("High tensor count detected - consider tensor cleanup")
+        # GPU recommendations
+        if gpu_results:
+            if gpu_results.get("memory_utilization", 0) > 85:
+                recommendations.append("Consider reducing batch size or using gradient checkpointing")
+        return recommendations
+    def profile_function(self, func_name: str):
+        """Decorator for profiling individual functions"""
+        def decorator(func):
+            @functools.wraps(func)
+            def wrapper(*args, **kwargs):
+                start_time = time.time()
+                try:
+                    result = func(*args, **kwargs)
+                    return result
+                finally:
+                    duration = time.time() - start_time
+                    with self.timer_lock:
+                        if func_name not in self.function_timers:
+                            self.function_timers[func_name] = FunctionTimer(func_name)
+                        self.function_timers[func_name].add_call(duration)
+            return wrapper
+        return decorator
+    @contextmanager
+    def profile_block(self, block_name: str):
+        """Context manager for profiling code blocks"""
+        start_time = time.time()
+        try:
+            yield
+        finally:
+            duration = time.time() - start_time
+            with self.timer_lock:
+                if block_name not in self.function_timers:
+                    self.function_timers[block_name] = FunctionTimer(block_name)
+                self.function_timers[block_name].add_call(duration)
+    def get_function_stats(self) -> Dict[str, Dict[str, Any]]:
+        """Get statistics for all profiled functions"""
+        with self.timer_lock:
+            return {name: timer.get_stats() for name, timer in self.function_timers.items()}
+    def export_profile_report(self, filename: Optional[str] = None) -> str:
+        """Export comprehensive profile report"""
+        if not filename:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"mamba_swarm_profile_{timestamp}.json"
+        report = {
+            "timestamp": time.time(),
+            "profiler_stats": {
+                "function_timers": self.get_function_stats(),
+                "peak_memory_gb": self.memory_profiler.get_peak_memory(),
+                "memory_trend": self.memory_profiler.get_memory_trend()[-50:],  # Last 50 samples
+            },
+            "performance_history": [
+                {
+                    "timestamp": p.timestamp,
+                    "duration": p.duration,
+                    "cpu_usage": p.cpu_usage,
+                    "memory_usage": p.memory_usage,
+                    "gpu_usage": p.gpu_usage,
+                    "bottlenecks": p.bottlenecks,
+                    "recommendations": p.recommendations
+                }
+                for p in list(self.performance_history)[-10:]  # Last 10 profiles
+            ]
+        }
+        with open(filename, 'w') as f:
+            json.dump(report, f, indent=2)
+        self.logger.info(f"Profile report exported to {filename}")
+        return filename
+    def cleanup(self):
+        """Cleanup profiler resources"""
+        self.memory_profiler.stop_monitoring()
+        if self.profiling_active:
+            self.stop_profiling()
+# Utility functions and decorators
+def profile_inference(profiler: MambaSwarmProfiler):
+    """Decorator for profiling inference functions"""
+    return profiler.profile_function("inference")
+def profile_training_step(profiler: MambaSwarmProfiler):
+    """Decorator for profiling training steps"""
+    return profiler.profile_function("training_step")
+def profile_forward_pass(profiler: MambaSwarmProfiler):
+    """Decorator for profiling forward passes"""
+    return profiler.profile_function("forward_pass")
+# Example usage
+if __name__ == "__main__":
+    # Create profiler
+    profiler = MambaSwarmProfiler()
+    # Start profiling
+    profiler.start_profiling()
+    # Simulate some work
+    @profiler.profile_function("test_function")
+    def test_function():
+        time.sleep(0.1)
+        return "result"
+    # Run test
+    for i in range(10):
+        test_function()
+    # Use context manager
+    with profiler.profile_block("test_block"):
+        time.sleep(0.05)
+    # Stop profiling
+    profile_result = profiler.stop_profiling()
+    # Print results
+    if profile_result:
+        print(f"Profile duration: {profile_result.duration:.2f}s")
+        print(f"CPU usage: {profile_result.cpu_usage:.1f}%")
+        print(f"Memory usage: {profile_result.memory_usage:.1f}%")
+        print(f"Bottlenecks: {profile_result.bottlenecks}")
+        print(f"Recommendations: {profile_result.recommendations}")
+    # Export report
+    report_file = profiler.export_profile_report()
+    print(f"Report saved to: {report_file}")
+    # Cleanup
+    profiler.cleanup()