| { | |
| "input": "https://arxiv.org/abs/2508.13142", | |
| "resolved_source_url": "https://arxiv.org/pdf/2508.13142.pdf", | |
| "analysis": { | |
| "benchmark_name": "EASI", | |
| "benchmark_aliases": [ | |
| "EASI-Leaderboard" | |
| ], | |
| "source_type": "paper", | |
| "table_hint": "Table 3", | |
| "likely_metrics": [ | |
| "MRA", | |
| "Acc", | |
| "CAA", | |
| "F1" | |
| ], | |
| "search_terms": [ | |
| "EASI leaderboard", | |
| "EASI benchmark", | |
| "Spatial Intelligence benchmark" | |
| ], | |
| "notes": "The paper introduces the EASI framework for evaluating Multimodal LLMs on Spatial Intelligence. It includes an accompanying leaderboard. Table 3 presents results for various models on eight key spatial benchmarks under the 'Official Protocol'." | |
| }, | |
| "seed_work_openalex_id": "https://openalex.org/W2117359358", | |
| "seed_work_title": "The eczema area and severity index (EASI): assessment of reliability in atopic dermatitis", | |
| "notes": [ | |
| "Fetched source URL content for extraction planning." | |
| ], | |
| "plan_steps": [ | |
| "Extract the seed benchmark leaderboard from the provided source.", | |
| "Find possible newer benchmark results from citing works.", | |
| "Screen citations for benchmark-relevant score tables.", | |
| "Merge, deduplicate, and store leaderboard rows with citations.", | |
| "Generate Gradio app files for Hugging Face deployment." | |
| ] | |
| } |