TITLE = """

AMA-Bench Leaderboard

""" INTRODUCTION_TEXT = """ AMA-Bench evaluates the memory capabilities of LLMs and memory-augmented agents across four cognitive dimensions: **Recall** (retrieving stored information), **Causal Inference** (cause-and-effect reasoning), **State Updating** (tracking evolving states), and **State Abstraction** (forming higher-level representations). ## Leaderboard Our leaderboard presents results for the multiple-choice subset, which provides objective and easier-to-score evaluation. See below for submission details. """ SUBMISSION_TEXT = """ ## Submissions Results can be submitted for evaluation. Each submission should contain answers for all questions in the benchmark. We expect submissions to be JSON Lines files with the following format: ``` {"episode_id": "trajectory_id", "question_uuid_list": ["uuid-1", "uuid-2", "uuid-3"], "answer_list": ["The agent moved right.", "..."], "llm_as_judge_score_list": [true, false, true]} ``` **Required fields:** - `episode_id`: The episode identifier - `question_uuid_list`: List of question UUIDs corresponding to each answer (e.g., `["uuid-1", "uuid-2"]`) - `answer_list`: Your model's answers, in the same order as `question_uuid_list` - `llm_as_judge_score_list`: Boolean scores for each answer (e.g., `[true, false, true]`) - `reasoning_trace`: (Optional) The reasoning process or explanation for the answers """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r"""@misc{ama-bench, title={AMA-Bench: Agent Memory Assessment Benchmark}, author={AMA-Bench Team}, year={2024} }""" def format_error(msg): """Format error message with red styling.""" return f"

{msg}

" def format_warning(msg): """Format warning message with orange styling.""" return f"

{msg}

" def format_log(msg): """Format success message with green styling.""" return f"

{msg}

" def model_hyperlink(link, model_name): """Create a hyperlink to the model information.""" if not link or link.strip() == "": return model_name return f'{model_name}'