{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating test split: 100%|██████████| 4/4 [00:00<00:00, 860.15 examples/s]\n" ] } ], "source": [ "import datasets\n", "\n", "data = datasets.load_dataset(\"lmms-lab/LiveBenchResults\", \"2024-09\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame(data[\"test\"])\n", "df = df.drop(columns=\"__index_level_0__\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model NameTotalConcrete RecognitionAnalytical QuestionsDivergent ThinkingReal-world Assistance
0LLaVA-1.5-7B30.150009.40036.445.429.400
1GPT-4o-mini91.9047594.64493.495.384.275
2LLaVA-OneVision-0.5B32.3630025.05233.640.230.600
3LLaVA-OneVision-7B64.8577557.20667.076.259.025
\n", "
" ], "text/plain": [ " Model Name Total Concrete Recognition Analytical Questions \\\n", "0 LLaVA-1.5-7B 30.15000 9.400 36.4 \n", "1 GPT-4o-mini 91.90475 94.644 93.4 \n", "2 LLaVA-OneVision-0.5B 32.36300 25.052 33.6 \n", "3 LLaVA-OneVision-7B 64.85775 57.206 67.0 \n", "\n", " Divergent Thinking Real-world Assistance \n", "0 45.4 29.400 \n", "1 95.3 84.275 \n", "2 40.2 30.600 \n", "3 76.2 59.025 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "'__index_level_0__'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# data = data.filter(lambda x: x[\"Model Name\"] != \"LLaVA-OneVision-1.5B\")\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtest\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop_index\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m__index_level_0__\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/data/pufanyi/anaconda3/anacondabin/envs/live_bench/lib/python3.12/site-packages/datasets/search.py:691\u001b[0m, in \u001b[0;36mIndexableMixin.drop_index\u001b[0;34m(self, index_name)\u001b[0m\n\u001b[1;32m 684\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdrop_index\u001b[39m(\u001b[38;5;28mself\u001b[39m, index_name: \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 685\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Drop the index with the specified column.\u001b[39;00m\n\u001b[1;32m 686\u001b[0m \n\u001b[1;32m 687\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 688\u001b[0m \u001b[38;5;124;03m index_name (`str`):\u001b[39;00m\n\u001b[1;32m 689\u001b[0m \u001b[38;5;124;03m The `index_name`/identifier of the index.\u001b[39;00m\n\u001b[1;32m 690\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 691\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_indexes\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindex_name\u001b[49m\u001b[43m]\u001b[49m\n", "\u001b[0;31mKeyError\u001b[0m: '__index_level_0__'" ] } ], "source": [ "# data = data.filter(lambda x: x[\"Model Name\"] != \"LLaVA-OneVision-1.5B\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "data = datasets.Dataset.from_pandas(df, split=\"test\", preserve_index=False)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['Model Name', 'Total', 'Concrete Recognition', 'Analytical Questions', 'Divergent Thinking', 'Real-world Assistance'],\n", " num_rows: 4\n", "})" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 634.64ba/s]\n", "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00, 1.24s/it]\n" ] }, { "data": { "text/plain": [ "CommitInfo(commit_url='https://huggingface.co/datasets/lmms-lab/LiveBenchResults/commit/e8e81e7a3ddd5611340c25235c9c73ce40b0bed1', commit_message='Upload dataset', commit_description='', oid='e8e81e7a3ddd5611340c25235c9c73ce40b0bed1', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/lmms-lab/LiveBenchResults', endpoint='https://huggingface.co', repo_type='dataset', repo_id='lmms-lab/LiveBenchResults'), pr_revision=None, pr_num=None)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.push_to_hub(\"lmms-lab/LiveBenchResults\", \"2024-09\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "live_bench", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }