Spaces:

GeorgeIbrahim
/

Data_Collection

Sleeping

App Files Files Community

GeorgeIbrahim commited on Nov 8, 2024

Commit

7971cd8

1 Parent(s): c5ec5d8

updates

Browse files

Files changed (1) hide show

app.py +19 -4

app.py CHANGED Viewed

@@ -22,7 +22,22 @@ with open('nearest_neighbors_with_captions.json', 'r') as f:
 try:
     dataset = load_dataset(dataset_name, split="train")
     print("Loaded existing dataset:", dataset)
     # Create a dictionary to keep track of the highest annotation count for each image
     annotation_counts = {}
     for example in dataset:
@@ -40,9 +55,9 @@ except Exception as e:
         'image_id': Value(dtype='string'),
         'caption': Value(dtype='string'),
         'annotation_count': Value(dtype='int32'),
-        'spl': Value(dtype='string')
     })
-    dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'spl': []}, features=features)
     annotation_counts = {}
     dataset.push_to_hub(dataset_name)  # Push the empty dataset to Hugging Face
@@ -122,12 +137,12 @@ def save_annotation(caption, session_data):
             "image_id": [image_id],
             "caption": [caption],
             "annotation_count": [annotation_count + 1],
-            "spl": [split]
         }, features=Features({
             'image_id': Value(dtype='string'),
             'caption': Value(dtype='string'),
             'annotation_count': Value(dtype='int32'),
-            'spl': Value(dtype='string')
         }))
         # Update the annotation count in the dictionary

 try:
     dataset = load_dataset(dataset_name, split="train")
     print("Loaded existing dataset:", dataset)
+    print("Dataset features:", dataset.features)  # Check if 'split' is part of features
+    # Check if the 'split' column exists; if not, add it
+    if 'split' not in dataset.column_names:
+        # Define the 'split' values based on `image_id`
+        split_values = [
+            "dev" if example["image_id"] in results else "train"
+            for example in dataset
+        ]
+        # Add 'split' column to the dataset
+        dataset = dataset.add_column("split", split_values)
+        print("Added 'split' column to dataset.")
+    else:
+        print("'split' column already exists.")
     # Create a dictionary to keep track of the highest annotation count for each image
     annotation_counts = {}
     for example in dataset:
         'image_id': Value(dtype='string'),
         'caption': Value(dtype='string'),
         'annotation_count': Value(dtype='int32'),
+        'split': Value(dtype='string')
     })
+    dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
     annotation_counts = {}
     dataset.push_to_hub(dataset_name)  # Push the empty dataset to Hugging Face
             "image_id": [image_id],
             "caption": [caption],
             "annotation_count": [annotation_count + 1],
+            "split": [split]
         }, features=Features({
             'image_id': Value(dtype='string'),
             'caption': Value(dtype='string'),
             'annotation_count': Value(dtype='int32'),
+            'split': Value(dtype='string')
         }))
         # Update the annotation count in the dictionary