Spaces:
No application file
No application file
mpboyer
microsoft c'est les plus grosses merdes que j'ai vu de ma vie niveau code, et j'ai déjà codé des trucs
75629a4 | """ | |
| Evaluation Metrics for Text Adventure Agents | |
| Tracks scores across multiple trials and computes statistics. | |
| """ | |
| import statistics | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| class TrialResult: | |
| """Result of a single evaluation trial.""" | |
| trial_number: int | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: int | |
| game_completed: bool | |
| error: Optional[str] = None | |
| def score_percentage(self) -> float: | |
| """Score as percentage of max possible.""" | |
| if self.max_score == 0: | |
| return 0.0 | |
| return (self.final_score / self.max_score) * 100 | |
| def to_dict(self) -> dict: | |
| """Convert to dictionary for JSON serialization.""" | |
| return { | |
| "trial_number": self.trial_number, | |
| "final_score": self.final_score, | |
| "max_score": self.max_score, | |
| "score_percentage": round(self.score_percentage, 2), | |
| "moves": self.moves, | |
| "locations_visited": self.locations_visited, | |
| "game_completed": self.game_completed, | |
| "error": self.error, | |
| } | |
| class EvaluationResult: | |
| """Aggregated results across all trials.""" | |
| student_id: str | |
| game: str | |
| num_trials: int | |
| max_steps: int | |
| trials: list[TrialResult] = field(default_factory=list) | |
| def scores(self) -> list[int]: | |
| """List of final scores from all trials.""" | |
| return [t.final_score for t in self.trials if t.error is None] | |
| def mean_score(self) -> float: | |
| """Average score across trials.""" | |
| if not self.scores: | |
| return 0.0 | |
| return statistics.mean(self.scores) | |
| def std_score(self) -> float: | |
| """Standard deviation of scores.""" | |
| if len(self.scores) < 2: | |
| return 0.0 | |
| return statistics.stdev(self.scores) | |
| def min_score(self) -> int: | |
| """Minimum score achieved.""" | |
| if not self.scores: | |
| return 0 | |
| return min(self.scores) | |
| def max_score_achieved(self) -> int: | |
| """Maximum score achieved.""" | |
| if not self.scores: | |
| return 0 | |
| return max(self.scores) | |
| def successful_trials(self) -> int: | |
| """Number of trials that completed without error.""" | |
| return len([t for t in self.trials if t.error is None]) | |
| def mean_moves(self) -> float: | |
| """Average number of moves across trials.""" | |
| moves = [t.moves for t in self.trials if t.error is None] | |
| if not moves: | |
| return 0.0 | |
| return statistics.mean(moves) | |
| def mean_locations(self) -> float: | |
| """Average number of locations visited.""" | |
| locs = [t.locations_visited for t in self.trials if t.error is None] | |
| if not locs: | |
| return 0.0 | |
| return statistics.mean(locs) | |
| def add_trial(self, trial: TrialResult) -> None: | |
| """Add a trial result.""" | |
| self.trials.append(trial) | |
| def to_dict(self) -> dict: | |
| """Convert to dictionary for JSON serialization.""" | |
| return { | |
| "student_id": self.student_id, | |
| "game": self.game, | |
| "num_trials": self.num_trials, | |
| "max_steps": self.max_steps, | |
| "successful_trials": self.successful_trials, | |
| "summary": { | |
| "mean_score": round(self.mean_score, 2), | |
| "std_score": round(self.std_score, 2), | |
| "min_score": self.min_score, | |
| "max_score": self.max_score_achieved, | |
| "mean_moves": round(self.mean_moves, 2), | |
| "mean_locations": round(self.mean_locations, 2), | |
| }, | |
| "trials": [t.to_dict() for t in self.trials], | |
| } | |
| def summary_str(self) -> str: | |
| """Human-readable summary.""" | |
| lines = [ | |
| f"Evaluation Results: {self.student_id}", | |
| f"{'=' * 50}", | |
| f"Game: {self.game}", | |
| f"Trials: {self.successful_trials}/{self.num_trials} successful", | |
| f"Max steps per trial: {self.max_steps}", | |
| f"", | |
| f"Score Statistics:", | |
| f" Mean: {self.mean_score:.2f}", | |
| f" Std: {self.std_score:.2f}", | |
| f" Min: {self.min_score}", | |
| f" Max: {self.max_score_achieved}", | |
| f"", | |
| f"Exploration:", | |
| f" Mean moves: {self.mean_moves:.1f}", | |
| f" Mean locations: {self.mean_locations:.1f}", | |
| f"", | |
| f"Per-Trial Scores: {self.scores}", | |
| ] | |
| return "\n".join(lines) | |