import json import os import pandas as pd from datasets import Dataset, Features, Image, Sequence, Value from tqdm import tqdm # Define the features for the dataset features = Features( { "video": Value(dtype="string"), "caption": Value(dtype="string"), "timestamp": Sequence(Value(dtype="float16")), # Use Sequence for lists } ) df_items = { "video": [], "caption": [], "timestamp": [], } # Load json file json_path = "/opt/tiger/lmms-eval/lmms_eval/tasks/charades_sta/temporal_grounding_charades.json" with open(json_path, "r") as f: data = json.load(f) # Iterate over the rows of the data for cur_meta in data: video = cur_meta["video"] caption = cur_meta["caption"] timestamp = cur_meta["timestamp"] # import pdb;pdb.set_trace() df_items["video"].append(video) df_items["caption"].append(caption) df_items["timestamp"].append(timestamp) import pdb pdb.set_trace() df_items = pd.DataFrame(df_items) dataset = Dataset.from_pandas(df_items, features=features) hub_dataset_path = "lmms-lab/charades_sta" dataset.push_to_hub(repo_id=hub_dataset_path, split="test") # # upload the *zip to huggingface # from huggingface_hub import HfApi # def upload_zip_to_huggingface(repo_id, zip_path, commit_message="Upload ZIP file"): # """ # Uploads a ZIP file to a Hugging Face dataset repository. # Args: # repo_id (str): The dataset repository ID (e.g., "your-username/your-dataset"). # zip_path (str): Path to the ZIP file to upload. # commit_message (str): Commit message for the upload. # """ # api = HfApi() # # Upload file to the dataset repo # api.upload_file( # path_or_fileobj=zip_path, # path_in_repo=zip_path.split("/")[-1], # Store with the same filename # repo_id=repo_id, # repo_type="dataset", # commit_message=commit_message # ) # print(f"Successfully uploaded {zip_path} to {repo_id}") # # Example Usage for upload all zip in directory # import os # directory_path = "/home/tiger/split_zips" # # Iterate over all files in the directory # for filename in os.listdir(directory_path): # if filename.endswith(".zip"): # file_path = os.path.join(directory_path, filename) # upload_zip_to_huggingface("lmms-lab/charades_sta", file_path)