Spaces:

Suhasdev
/

Universal-prompt-Optimizer

Running

File size: 6,930 Bytes

cacd4d0

"""
Comprehensive metrics calculations for GEPA Optimizer
"""

from typing import Dict, List, Optional, Any
import re
import time
from collections import Counter

def calculate_metrics(original_prompt: str, 
                     optimized_prompt: str,
                     performance_data: Optional[Dict[str, Any]] = None) -> Dict[str, float]:
    """
    Calculate comprehensive improvement metrics between original and optimized prompts
    
    Args:
        original_prompt: Original seed prompt
        optimized_prompt: GEPA-optimized prompt
        performance_data: Optional performance metrics from GEPA
        
    Returns:
        Dict[str, float]: Comprehensive metrics dictionary
    """
    metrics = {}
    
    # Basic length metrics
    orig_len = len(original_prompt)
    opt_len = len(optimized_prompt)
    
    if orig_len > 0:
        metrics['length_change_percent'] = ((opt_len - orig_len) / orig_len) * 100
    else:
        metrics['length_change_percent'] = 0.0
    
    metrics['original_length'] = orig_len
    metrics['optimized_length'] = opt_len
    
    # Word count metrics
    orig_words = len(original_prompt.split())
    opt_words = len(optimized_prompt.split())
    
    if orig_words > 0:
        metrics['word_change_percent'] = ((opt_words - orig_words) / orig_words) * 100
    else:
        metrics['word_change_percent'] = 0.0
    
    metrics['original_words'] = orig_words
    metrics['optimized_words'] = opt_words
    
    # Complexity metrics
    metrics['original_complexity'] = calculate_text_complexity(original_prompt)
    metrics['optimized_complexity'] = calculate_text_complexity(optimized_prompt)
    metrics['complexity_change'] = metrics['optimized_complexity'] - metrics['original_complexity']
    
    # Similarity metrics
    metrics['similarity_score'] = calculate_similarity(original_prompt, optimized_prompt)
    
    # Include GEPA performance data if available
    if performance_data:
        for key, value in performance_data.items():
            if isinstance(value, (int, float)):
                metrics[f'gepa_{key}'] = float(value)
    
    return metrics

def calculate_text_complexity(text: str) -> float:
    """
    Calculate a simple complexity score for text
    
    Args:
        text: Text to analyze
        
    Returns:
        float: Complexity score (higher = more complex)
    """
    if not text:
        return 0.0
    
    # Count various complexity indicators
    sentence_count = len(re.findall(r'[.!?]+', text))
    word_count = len(text.split())
    char_count = len(text)
    unique_words = len(set(text.lower().split()))
    
    # Avoid division by zero
    if word_count == 0:
        return 0.0
    
    # Simple complexity calculation
    avg_word_length = char_count / word_count
    lexical_diversity = unique_words / word_count
    avg_sentence_length = word_count / max(sentence_count, 1)
    
    # Weighted complexity score
    complexity = (
        avg_word_length * 0.3 +
        lexical_diversity * 0.4 +
        avg_sentence_length * 0.3
    )
    
    return round(complexity, 3)

def calculate_similarity(text1: str, text2: str) -> float:
    """
    Calculate similarity between two texts using simple word overlap
    
    Args:
        text1: First text
        text2: Second text
        
    Returns:
        float: Similarity score between 0 and 1
    """
    if not text1 or not text2:
        return 0.0
    
    # Convert to lowercase and split into words
    words1 = set(text1.lower().split())
    words2 = set(text2.lower().split())
    
    # Calculate Jaccard similarity
    intersection = len(words1.intersection(words2))
    union = len(words1.union(words2))
    
    if union == 0:
        return 0.0
    
    similarity = intersection / union
    return round(similarity, 3)

def track_optimization_progress(iteration: int, 
                               score: float, 
                               improvement: float,
                               time_elapsed: float) -> Dict[str, Any]:
    """
    Track progress during optimization iterations
    
    Args:
        iteration: Current iteration number
        score: Current performance score
        improvement: Improvement over baseline
        time_elapsed: Time elapsed in seconds
        
    Returns:
        Dict[str, Any]: Progress metrics
    """
    return {
        'iteration': iteration,
        'score': round(score, 4),
        'improvement': round(improvement, 4),
        'time_elapsed': round(time_elapsed, 2),
        'score_per_second': round(score / max(time_elapsed, 0.001), 4)
    }

def calculate_cost_efficiency(improvement_percent: float, 
                            estimated_cost: float) -> Dict[str, float]:
    """
    Calculate cost efficiency metrics
    
    Args:
        improvement_percent: Performance improvement percentage
        estimated_cost: Estimated cost in USD
        
    Returns:
        Dict[str, float]: Cost efficiency metrics
    """
    if estimated_cost <= 0:
        return {'improvement_per_dollar': 0.0, 'cost_efficiency': 0.0}
    
    improvement_per_dollar = improvement_percent / estimated_cost
    
    # Cost efficiency score (higher is better)
    cost_efficiency = min(improvement_per_dollar / 10.0, 1.0)  # Normalized to 0-1
    
    return {
        'improvement_per_dollar': round(improvement_per_dollar, 3),
        'cost_efficiency': round(cost_efficiency, 3),
        'estimated_cost': estimated_cost
    }

def summarize_optimization_results(metrics: Dict[str, float]) -> str:
    """
    Create a human-readable summary of optimization results
    
    Args:
        metrics: Metrics dictionary from calculate_metrics
        
    Returns:
        str: Human-readable summary
    """
    summary_parts = []
    
    # Length changes
    length_change = metrics.get('length_change_percent', 0)
    if length_change > 5:
        summary_parts.append(f"Prompt expanded by {length_change:.1f}%")
    elif length_change < -5:
        summary_parts.append(f"Prompt condensed by {abs(length_change):.1f}%")
    else:
        summary_parts.append("Prompt length remained similar")
    
    # Complexity changes
    complexity_change = metrics.get('complexity_change', 0)
    if complexity_change > 0.1:
        summary_parts.append("increased complexity")
    elif complexity_change < -0.1:
        summary_parts.append("reduced complexity")
    else:
        summary_parts.append("maintained similar complexity")
    
    # Similarity
    similarity = metrics.get('similarity_score', 1.0)
    if similarity > 0.8:
        summary_parts.append(f"high similarity to original ({similarity:.2f})")
    elif similarity > 0.5:
        summary_parts.append(f"moderate changes from original ({similarity:.2f})")
    else:
        summary_parts.append(f"significant changes from original ({similarity:.2f})")
    
    return f"Optimization results: {', '.join(summary_parts)}"