Spaces:
Sleeping
Sleeping
| """ | |
| Candidate and Feedback Collector for Presentation | |
| This module collects all candidates generated during optimization along with | |
| their feedback, scores, and metadata for presentation purposes. | |
| """ | |
| import json | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Dict, List, Any, Optional | |
| from dataclasses import dataclass, asdict, field | |
| class CandidateInfo: | |
| """Information about a single candidate prompt""" | |
| iteration: int | |
| candidate_id: str | |
| source: str # "GEPA_Reflection", "LLEGO_Crossover", "LLEGO_Mutation", "Seed" | |
| prompt: str | |
| score: Optional[float] = None | |
| feedback: Optional[str] = None | |
| feedback_details: Optional[Dict[str, Any]] = None | |
| timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) | |
| class IterationInfo: | |
| """Information about a single optimization iteration""" | |
| iteration: int | |
| candidates: List[CandidateInfo] = field(default_factory=list) | |
| best_candidate: Optional[CandidateInfo] = None | |
| best_score: Optional[float] = None | |
| timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) | |
| class CandidateCollector: | |
| """ | |
| Collects all candidates and feedback during optimization for presentation. | |
| """ | |
| def __init__(self, output_dir: str = "presentation_data"): | |
| """ | |
| Initialize the collector. | |
| Args: | |
| output_dir: Directory to save collected data | |
| """ | |
| self.output_dir = Path(output_dir) | |
| self.output_dir.mkdir(exist_ok=True) | |
| self.iterations: List[IterationInfo] = [] | |
| self.current_iteration: Optional[IterationInfo] = None | |
| self.all_candidates: List[CandidateInfo] = [] | |
| # Track seed prompt | |
| self.seed_prompt: Optional[str] = None | |
| def set_seed_prompt(self, seed_prompt: str): | |
| """Set the seed prompt for reference""" | |
| self.seed_prompt = seed_prompt | |
| def start_iteration(self, iteration: int): | |
| """Start tracking a new iteration""" | |
| self.current_iteration = IterationInfo(iteration=iteration) | |
| self.iterations.append(self.current_iteration) | |
| def add_candidate( | |
| self, | |
| iteration: int, | |
| candidate_id: str, | |
| source: str, | |
| prompt: str, | |
| score: Optional[float] = None, | |
| feedback: Optional[str] = None, | |
| feedback_details: Optional[Dict[str, Any]] = None | |
| ): | |
| """ | |
| Add a candidate to the collection. | |
| Args: | |
| iteration: Iteration number | |
| candidate_id: Unique identifier for the candidate | |
| source: Source of the candidate ("GEPA_Reflection", "LLEGO_Crossover", etc.) | |
| prompt: The candidate prompt text | |
| score: Evaluation score (if available) | |
| feedback: Feedback text (if available) | |
| feedback_details: Additional feedback details (if available) | |
| """ | |
| candidate = CandidateInfo( | |
| iteration=iteration, | |
| candidate_id=candidate_id, | |
| source=source, | |
| prompt=prompt, | |
| score=score, | |
| feedback=feedback, | |
| feedback_details=feedback_details | |
| ) | |
| # Add to current iteration | |
| if self.current_iteration and self.current_iteration.iteration == iteration: | |
| self.current_iteration.candidates.append(candidate) | |
| # Update best candidate if this is better | |
| if score is not None: | |
| if (self.current_iteration.best_score is None or | |
| score > self.current_iteration.best_score): | |
| self.current_iteration.best_candidate = candidate | |
| self.current_iteration.best_score = score | |
| # Add to all candidates list | |
| self.all_candidates.append(candidate) | |
| def add_feedback( | |
| self, | |
| candidate_id: str, | |
| feedback: str, | |
| feedback_details: Optional[Dict[str, Any]] = None | |
| ): | |
| """ | |
| Add feedback to an existing candidate. | |
| Args: | |
| candidate_id: ID of the candidate to update | |
| feedback: Feedback text | |
| feedback_details: Additional feedback details | |
| """ | |
| for candidate in self.all_candidates: | |
| if candidate.candidate_id == candidate_id: | |
| candidate.feedback = feedback | |
| candidate.feedback_details = feedback_details | |
| break | |
| # Also update in iterations | |
| for iteration in self.iterations: | |
| for candidate in iteration.candidates: | |
| if candidate.candidate_id == candidate_id: | |
| candidate.feedback = feedback | |
| candidate.feedback_details = feedback_details | |
| break | |
| def add_score( | |
| self, | |
| candidate_id: str, | |
| score: float | |
| ): | |
| """ | |
| Add score to an existing candidate. | |
| Args: | |
| candidate_id: ID of the candidate to update | |
| score: Evaluation score | |
| """ | |
| for candidate in self.all_candidates: | |
| if candidate.candidate_id == candidate_id: | |
| candidate.score = score | |
| break | |
| # Also update in iterations | |
| for iteration in self.iterations: | |
| for candidate in iteration.candidates: | |
| if candidate.candidate_id == candidate_id: | |
| candidate.score = score | |
| # Update best candidate if needed | |
| if (iteration.best_score is None or score > iteration.best_score): | |
| iteration.best_candidate = candidate | |
| iteration.best_score = score | |
| break | |
| def save_to_json(self, filename: Optional[str] = None) -> Path: | |
| """ | |
| Save collected data to JSON file. | |
| Args: | |
| filename: Optional filename (auto-generated if not provided) | |
| Returns: | |
| Path to saved file | |
| """ | |
| if filename is None: | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| filename = f"candidates_and_feedback_{timestamp}.json" | |
| filepath = self.output_dir / filename | |
| data = { | |
| "seed_prompt": self.seed_prompt, | |
| "total_iterations": len(self.iterations), | |
| "total_candidates": len(self.all_candidates), | |
| "iterations": [asdict(iter_info) for iter_info in self.iterations], | |
| "all_candidates": [asdict(candidate) for candidate in self.all_candidates], | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| with open(filepath, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, indent=2, ensure_ascii=False) | |
| return filepath | |
| def save_to_markdown(self, filename: Optional[str] = None) -> Path: | |
| """ | |
| Save collected data to Markdown file (presentation-ready format). | |
| Args: | |
| filename: Optional filename (auto-generated if not provided) | |
| Returns: | |
| Path to saved file | |
| """ | |
| if filename is None: | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| filename = f"candidates_and_feedback_{timestamp}.md" | |
| filepath = self.output_dir / filename | |
| with open(filepath, 'w', encoding='utf-8') as f: | |
| # Header | |
| f.write("# Optimization Candidates and Feedback\n\n") | |
| f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") | |
| f.write(f"**Total Iterations:** {len(self.iterations)}\n") | |
| f.write(f"**Total Candidates:** {len(self.all_candidates)}\n\n") | |
| # Seed Prompt | |
| if self.seed_prompt: | |
| f.write("---\n\n") | |
| f.write("## π± Seed Prompt\n\n") | |
| f.write("```\n") | |
| f.write(self.seed_prompt) | |
| f.write("\n```\n\n") | |
| # Iterations | |
| for iter_info in self.iterations: | |
| f.write("---\n\n") | |
| f.write(f"## π Iteration {iter_info.iteration}\n\n") | |
| # Best candidate for this iteration | |
| if iter_info.best_candidate: | |
| f.write(f"### π Best Candidate (Score: {iter_info.best_score:.4f})\n\n") | |
| f.write(f"**Source:** {iter_info.best_candidate.source}\n\n") | |
| f.write(f"**Prompt:**\n```\n") | |
| f.write(iter_info.best_candidate.prompt) | |
| f.write("\n```\n\n") | |
| if iter_info.best_candidate.feedback: | |
| f.write(f"**Feedback:**\n\n") | |
| f.write(f"{iter_info.best_candidate.feedback}\n\n") | |
| # All candidates in this iteration | |
| f.write(f"### π All Candidates ({len(iter_info.candidates)})\n\n") | |
| for idx, candidate in enumerate(iter_info.candidates, 1): | |
| f.write(f"#### Candidate {idx}: {candidate.source}\n\n") | |
| f.write(f"**ID:** `{candidate.candidate_id}`\n\n") | |
| if candidate.score is not None: | |
| f.write(f"**Score:** `{candidate.score:.4f}`\n\n") | |
| f.write(f"**Prompt:**\n```\n") | |
| f.write(candidate.prompt) | |
| f.write("\n```\n\n") | |
| if candidate.feedback: | |
| f.write(f"**Feedback:**\n\n") | |
| f.write(f"{candidate.feedback}\n\n") | |
| if candidate.feedback_details: | |
| f.write(f"**Feedback Details:**\n\n") | |
| f.write("```json\n") | |
| f.write(json.dumps(candidate.feedback_details, indent=2)) | |
| f.write("\n```\n\n") | |
| f.write("---\n\n") | |
| # Summary by source | |
| f.write("---\n\n") | |
| f.write("## π Summary by Source\n\n") | |
| sources = {} | |
| for candidate in self.all_candidates: | |
| if candidate.source not in sources: | |
| sources[candidate.source] = [] | |
| sources[candidate.source].append(candidate) | |
| for source, candidates in sources.items(): | |
| f.write(f"### {source} ({len(candidates)} candidates)\n\n") | |
| for candidate in candidates: | |
| score_str = f"Score: {candidate.score:.4f}" if candidate.score else "No score" | |
| f.write(f"- **{candidate.candidate_id}** (Iteration {candidate.iteration}, {score_str})\n") | |
| f.write("\n") | |
| return filepath | |
| def get_summary(self) -> Dict[str, Any]: | |
| """Get a summary of collected data""" | |
| sources = {} | |
| for candidate in self.all_candidates: | |
| if candidate.source not in sources: | |
| sources[candidate.source] = 0 | |
| sources[candidate.source] += 1 | |
| scored_candidates = [c for c in self.all_candidates if c.score is not None] | |
| avg_score = sum(c.score for c in scored_candidates) / len(scored_candidates) if scored_candidates else None | |
| return { | |
| "total_iterations": len(self.iterations), | |
| "total_candidates": len(self.all_candidates), | |
| "candidates_by_source": sources, | |
| "candidates_with_scores": len(scored_candidates), | |
| "average_score": avg_score, | |
| "candidates_with_feedback": len([c for c in self.all_candidates if c.feedback]) | |
| } | |