Spaces:

GeorgeIbrahim
/

Data_Collection

Sleeping

App Files Files Community

GeorgeIbrahim commited on Nov 8, 2024

Commit

33c417d

1 Parent(s): bd6fc64

updates

Browse files

Files changed (1) hide show

app.py +13 -22

app.py CHANGED Viewed

@@ -15,10 +15,18 @@ else:
     print("HUGGINGFACE_TOKEN environment variable not set.")
 dataset_name = "GeorgeIbrahim/EGYCOCO"  # Replace with your dataset name
-# Load or create the dataset with a new 'split' column
 try:
-    dataset = load_dataset(dataset_name, split="train")
-    print("Loaded existing dataset:", dataset)
     # Initialize annotation counts
     annotation_counts = {}
@@ -26,24 +34,12 @@ try:
         image_id = example["image_id"]
         count = example["annotation_count"]
         annotation_counts[image_id] = count
-    print("Annotation counts:", annotation_counts)
 except Exception as e:
     print(f"Error loading dataset: {e}")
     # Create an empty dataset if it doesn't exist
-    features = Features({
-        'image_id': Value(dtype='string'),
-        'caption': Value(dtype='string'),
-        'annotation_count': Value(dtype='int32'),  # Add annotation count feature
-        'split': Value(dtype='string')  # Add split column to mark as "dev" or "train"
-    })
     dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
     annotation_counts = {}
-    dataset.push_to_hub(dataset_name)  # Push the empty dataset to Hugging Face
-image_folder = "images"
-image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
-lock = threading.Lock()
 # Load the nearest neighbors JSON file
 with open('nearest_neighbors_with_captions.json', 'r') as f:
@@ -122,12 +118,7 @@ def save_annotation(caption, session_data):
             "caption": [caption],
             "annotation_count": [annotation_count + 1],
             "split": [split_type]
-        }, features=Features({
-            'image_id': Value(dtype='string'),
-            'caption': Value(dtype='string'),
-            'annotation_count': Value(dtype='int32'),
-            'split': Value(dtype='string')
-        }))
         annotation_counts[image_id] = annotation_count + 1

     print("HUGGINGFACE_TOKEN environment variable not set.")
 dataset_name = "GeorgeIbrahim/EGYCOCO"  # Replace with your dataset name
+# Define the updated features including the new 'split' column
+features = Features({
+    'image_id': Value(dtype='string'),
+    'caption': Value(dtype='string'),
+    'annotation_count': Value(dtype='int32'),  # Annotation count
+    'split': Value(dtype='string')  # New 'split' column
+})
+# Load the dataset or create it if it doesn’t exist, with updated features
 try:
+    dataset = load_dataset(dataset_name, split="train").cast(features)
+    print("Loaded existing dataset with updated features:", dataset)
     # Initialize annotation counts
     annotation_counts = {}
         image_id = example["image_id"]
         count = example["annotation_count"]
         annotation_counts[image_id] = count
 except Exception as e:
     print(f"Error loading dataset: {e}")
     # Create an empty dataset if it doesn't exist
     dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
     annotation_counts = {}
+    dataset.push_to_hub(dataset_name)  # Push the empty dataset to Hugging Face if it's new
 # Load the nearest neighbors JSON file
 with open('nearest_neighbors_with_captions.json', 'r') as f:
             "caption": [caption],
             "annotation_count": [annotation_count + 1],
             "split": [split_type]
+        }, features=features)
         annotation_counts[image_id] = annotation_count + 1