Spaces:

GeorgeIbrahim
/

Data_Collection

Build error

App Files Files Community

GeorgeSherif commited on Nov 7, 2024

Commit

4701f2e

1 Parent(s): 30f604d

updates

Browse files

Files changed (1) hide show

app.py +23 -23

app.py CHANGED Viewed

@@ -20,15 +20,15 @@ try:
     dataset = load_dataset(dataset_name, split="train")
     print("Loaded existing dataset:", dataset)
-    # Update annotation_count for existing images
-    def update_annotation_count(example):
-        if example["annotation_count"] == 0:
-            example["annotation_count"] = 1
-        return example
-    dataset = dataset.map(update_annotation_count)
-    print("Updated annotation counts for existing images.")
-    dataset.push_to_hub(dataset_name)  # Push the updated dataset to Hugging Face
 except Exception as e:
     print(f"Error loading dataset: {e}")
     # Create an empty dataset if it doesn't exist
@@ -38,6 +38,7 @@ except Exception as e:
         'annotation_count': Value(dtype='int32')  # Add annotation count feature
     })
     dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
     dataset.push_to_hub(dataset_name)  # Push the empty dataset to Hugging Face
 image_folder = "images"
@@ -78,16 +79,16 @@ def get_caption_for_image_id(image_path):
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
-        annotated_images = {item["image_id"]: item["annotation_count"] for item in dataset}
         # Available images filter
         available_images = [
             img for img in image_files
-            if img not in annotated_images or
-               ("val" in img and annotated_images[img] < 2) or
-               ("val" not in img and annotated_images[img] == 0)
         ]
         # Check if the user already has an image
         if session_data["current_image"] is None and available_images:
             # Assign a new random image to the user
@@ -99,7 +100,7 @@ def get_next_image(session_data):
 # Function to save the annotation to Hugging Face dataset and fetch the next image
 def save_annotation(caption, session_data):
-    global dataset  # Declare global dataset at the start of the function
     if session_data["current_image"] is None:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
@@ -111,13 +112,8 @@ def save_annotation(caption, session_data):
         if caption.strip().lower() == "skip":
             caption = "skipped"
-        # Check if image is already in dataset to update count
-        existing_image = dataset.filter(lambda x: x["image_id"] == image_id)
-        if len(existing_image) > 0:
-            # Get current annotation count
-            annotation_count = existing_image[0]["annotation_count"]
-        else:
-            annotation_count = 0
         # Add the new annotation as a new row to the dataset
         new_data = Dataset.from_dict({
@@ -130,19 +126,23 @@ def save_annotation(caption, session_data):
             'annotation_count': Value(dtype='int32')  # Ensure int32 type
         }))
         # Concatenate with the existing dataset and push the updated dataset to Hugging Face
         dataset = concatenate_datasets([dataset, new_data])
         dataset.push_to_hub(dataset_name)
         print("Pushed updated dataset")
         # Clear user's current image if the validation image has been annotated twice
-        if ("val" not in image_id) or (annotation_count >= 2):
             session_data["current_image"] = None
     # Fetch the next image
     next_image = get_next_image(session_data)
     if next_image:
         next_caption = get_caption_for_image_id(os.path.basename(next_image))  # Retrieve the caption for the new image
         return gr.update(value=next_image), gr.update(value=""), gr.update(value=next_caption or "")
     else:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")

     dataset = load_dataset(dataset_name, split="train")
     print("Loaded existing dataset:", dataset)
+    # Create a dictionary to keep track of the highest annotation count for each image
+    annotation_counts = {}
+    for example in dataset:
+        image_id = example["image_id"]
+        count = example["annotation_count"]
+        if image_id not in annotation_counts or count > annotation_counts[image_id]:
+            annotation_counts[image_id] = count
+    print("Annotation counts:", annotation_counts)
 except Exception as e:
     print(f"Error loading dataset: {e}")
     # Create an empty dataset if it doesn't exist
         'annotation_count': Value(dtype='int32')  # Add annotation count feature
     })
     dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
+    annotation_counts = {}
     dataset.push_to_hub(dataset_name)  # Push the empty dataset to Hugging Face
 image_folder = "images"
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
         # Available images filter
         available_images = [
             img for img in image_files
+            if img not in annotation_counts or
+               ("val" in img and annotation_counts.get(img, 0) < 2) or
+               ("val" not in img and annotation_counts.get(img, 0) == 0)
         ]
+        print("Available images:", available_images)  # Debugging line
         # Check if the user already has an image
         if session_data["current_image"] is None and available_images:
             # Assign a new random image to the user
 # Function to save the annotation to Hugging Face dataset and fetch the next image
 def save_annotation(caption, session_data):
+    global dataset, annotation_counts  # Declare global dataset and annotation_counts at the start of the function
     if session_data["current_image"] is None:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
         if caption.strip().lower() == "skip":
             caption = "skipped"
+        # Get current annotation count
+        annotation_count = annotation_counts.get(image_id, 0)
         # Add the new annotation as a new row to the dataset
         new_data = Dataset.from_dict({
             'annotation_count': Value(dtype='int32')  # Ensure int32 type
         }))
+        # Update the annotation count in the dictionary
+        annotation_counts[image_id] = annotation_count + 1
         # Concatenate with the existing dataset and push the updated dataset to Hugging Face
         dataset = concatenate_datasets([dataset, new_data])
         dataset.push_to_hub(dataset_name)
         print("Pushed updated dataset")
         # Clear user's current image if the validation image has been annotated twice
+        if ("val" not in image_id) or (annotation_count + 1 >= 2):
             session_data["current_image"] = None
     # Fetch the next image
     next_image = get_next_image(session_data)
     if next_image:
         next_caption = get_caption_for_image_id(os.path.basename(next_image))  # Retrieve the caption for the new image
+        print("Next image_id:", os.path.basename(next_image))  # Debugging line
         return gr.update(value=next_image), gr.update(value=""), gr.update(value=next_caption or "")
     else:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")