{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/data/pufanyi/anaconda3/anacondabin/envs/live_bench/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import datasets\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data = datasets.load_dataset(\"lmms-lab/LiveBench\", \"2024-09\", split=\"test\")\n", "\n", "\n", "def get():\n", " for item in data:\n", " if item[\"subtask\"] == \"Concrete Recognition\":\n", " item[\"subtask\"] = \"Concrete Recognition\"\n", " yield item" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating train split: 200 examples [01:38, 2.03 examples/s]\n" ] } ], "source": [ "new_data = datasets.Dataset.from_generator(get, features=data.features)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Map: 100%|██████████| 200/200 [00:00<00:00, 364.40 examples/s]it/s]\n", "Creating parquet from Arrow format: 100%|██████████| 2/2 [00:00<00:00, 3.45ba/s]\n", "Uploading the dataset shards: 100%|██████████| 1/1 [00:17<00:00, 17.75s/it]\n" ] }, { "data": { "text/plain": [ "CommitInfo(commit_url='https://huggingface.co/datasets/lmms-lab/LiveBench/commit/e8be87798d7db2e22ee3b5aeedf16e2a460ac7b3', commit_message='Upload dataset', commit_description='', oid='e8be87798d7db2e22ee3b5aeedf16e2a460ac7b3', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/lmms-lab/LiveBench', endpoint='https://huggingface.co', repo_type='dataset', repo_id='lmms-lab/LiveBench'), pr_revision=None, pr_num=None)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.push_to_hub(\"lmms-lab/LiveBench\", \"2024-09\", split=\"test\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "live_bench", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }