File size: 2,537 Bytes
cacd4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""
Data models for LLEGO genetic operators.

Contains the core data structures used across all genetic operators.
"""

from typing import List, Optional, Literal
from dataclasses import dataclass, field
from datetime import datetime
from typing import TypedDict


class PromptMetadata(TypedDict, total=False):
    """
    Metadata for tracking prompt evolution history and performance.
    
    This enables debugging, analysis, and visualization of the genetic algorithm's
    evolution process by tracking how each prompt was created and its characteristics.
    """
    generation: int                                      # Which iteration created this prompt
    operator: Literal['seed', 'crossover', 'mutation']  # How the prompt was created
    parent_indices: Optional[List[int]]                  # Indices of parent prompts
    parent_prompts: Optional[List[str]]                  # Actual parent prompt texts
    target_fitness: Optional[float]                      # Target fitness for crossover
    diversity_score: Optional[float]                     # Diversity from population
    sample_scores: Optional[List[float]]                 # Performance per sample
    num_diverse_parents: Optional[int]                   # Diverse parents count (mutation)
    created_at: str                                      # Creation timestamp
    prompt_length: int                                   # Character count
    word_count: int                                      # Word count
    candidate_type: Optional[str]                        # Type for GEPA notation


@dataclass
class PromptCandidate:
    """
    Represents a prompt candidate with fitness score and evolution metadata.
    
    Attributes:
        prompt: The actual prompt text
        fitness: Fitness score (0-1) from evaluation
        metadata: Tracking information about prompt creation and performance
    """
    prompt: str
    fitness: float
    metadata: Optional[PromptMetadata] = field(default_factory=dict)
    
    def __post_init__(self):
        """Initialize metadata if not provided."""
        if self.metadata is None:
            self.metadata = {}
        
        # Auto-populate prompt statistics
        if 'prompt_length' not in self.metadata:
            self.metadata['prompt_length'] = len(self.prompt)
        if 'word_count' not in self.metadata:
            self.metadata['word_count'] = len(self.prompt.split())
        if 'created_at' not in self.metadata:
            self.metadata['created_at'] = datetime.now().isoformat()