Spaces:

GeorgeIbrahim
/

Data_Collection

Sleeping

GeorgeSherif commited on Nov 6, 2024

Commit

77215fd

1 Parent(s): 1349bfa

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,16 +13,18 @@ else:
     print("HUGGINGFACE_TOKEN environment variable not set.")
 dataset_name = "GeorgeIbrahim/EGYCOCO"  # Replace with your dataset name
 # Load or create the dataset with train and val splits
 try:
     dataset = load_dataset(dataset_name)
     print("Loaded existing dataset:", dataset)
 except Exception as e:
     # Create empty datasets for train and val splits if they don't exist
-    features = Features({
-        'image_id': Value(dtype='string'),
-        'caption': Value(dtype='string'),
-    })
     train_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     val_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     dataset = {"train": train_dataset, "val": val_dataset}
@@ -74,7 +76,7 @@ def save_annotation(caption, session_data):
             caption = "skipped"
         # Add the new annotation as a new row to the appropriate split
-        new_data = Dataset.from_dict({"image_id": [image_id], "caption": [caption]})
         global dataset
         dataset[split] = concatenate_datasets([dataset[split], new_data])

     print("HUGGINGFACE_TOKEN environment variable not set.")
 dataset_name = "GeorgeIbrahim/EGYCOCO"  # Replace with your dataset name
+# Define the features schema once at the beginning
+features = Features({
+    'image_id': Value(dtype='string'),
+    'caption': Value(dtype='string'),
+})
 # Load or create the dataset with train and val splits
 try:
     dataset = load_dataset(dataset_name)
     print("Loaded existing dataset:", dataset)
 except Exception as e:
     # Create empty datasets for train and val splits if they don't exist
     train_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     val_dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
     dataset = {"train": train_dataset, "val": val_dataset}
             caption = "skipped"
         # Add the new annotation as a new row to the appropriate split
+        new_data = Dataset.from_dict({"image_id": [image_id], "caption": [caption]}, features=features)
         global dataset
         dataset[split] = concatenate_datasets([dataset[split], new_data])