Spaces:

GeorgeIbrahim
/

Data_Collection

Sleeping

App Files Files Community

GeorgeIbrahim commited on Nov 8, 2024

Commit

35eb21a

1 Parent(s): 10a1f44

updates

Browse files

Files changed (1) hide show

app.py +15 -43

app.py CHANGED Viewed

@@ -95,59 +95,38 @@ def get_caption_for_image_id(image_path):
     # Return None if the image_id is not found
     print("Caption not found for image_id:", image_id)  # Debugging line
     return None
-# Initialize a dictionary to keep track of how many times each 'dev' image has been shown
-shown_counts = {}
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
-        # Filter available images based on annotation counts and the 'split' column
         available_images = [
             img for img in image_files
             if img not in annotation_counts or
-               (dataset[img]['split'] == "dev" and annotation_counts.get(img, 0) < 2) or
-               (dataset[img]['split'] == "train" and annotation_counts.get(img, 0) == 0)
         ]
         print("Available images:", available_images)  # Debugging line
-        # Select an image to show based on the 'split' column
         if session_data["current_image"] is None and available_images:
-            random.shuffle(available_images)  # Shuffle for randomness
-            for img in available_images:
-                split = dataset[img]['split']  # Get the split value from the dataset
-                # Show 'dev' images twice
-                if split == "dev":
-                    if shown_counts.get(img, 0) < 2:
-                        shown_counts[img] = shown_counts.get(img, 0) + 1
-                        session_data["current_image"] = img
-                        print("Selected 'dev' image_id:", session_data["current_image"])
-                        break
-                # Show 'train' images once
-                else:
-                    session_data["current_image"] = img
-                    print("Selected 'train' image_id:", session_data["current_image"])
-                    break
     return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
-# Update save_annotation function to reset the shown count if 'dev' image is shown twice
 def save_annotation(caption, session_data):
-    global dataset, annotation_counts, shown_counts  # Include shown_counts
     if session_data["current_image"] is None:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
     with lock:
         image_id = session_data["current_image"]
-        print("Image ID before: ", image_id)
-        # Determine the split directly from the dataset
-        split = dataset[image_id]['split']
-        print("Split for image:", split)
         # Save caption or "skipped" based on user input
         if caption.strip().lower() == "skip":
@@ -160,13 +139,11 @@ def save_annotation(caption, session_data):
         new_data = Dataset.from_dict({
             "image_id": [image_id],
             "caption": [caption],
-            "annotation_count": [annotation_count + 1],
-            "split": [split]
         }, features=Features({
             'image_id': Value(dtype='string'),
             'caption': Value(dtype='string'),
-            'annotation_count': Value(dtype='int32'),
-            'split': Value(dtype='string')
         }))
         # Update the annotation count in the dictionary
@@ -177,11 +154,8 @@ def save_annotation(caption, session_data):
         dataset.push_to_hub(dataset_name)
         print("Pushed updated dataset")
-        # Reset shown count if the 'dev' image has been shown twice
-        if split == "dev" and shown_counts.get(image_id, 0) >= 2:
-            shown_counts[image_id] = 0  # Reset count for 'dev' images shown twice
-            session_data["current_image"] = None
-        elif split == "train":
             session_data["current_image"] = None
     # Fetch the next image
@@ -193,8 +167,6 @@ def save_annotation(caption, session_data):
     else:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
 def initialize_interface(session_data):
     next_image = get_next_image(session_data)
     if next_image:

     # Return None if the image_id is not found
     print("Caption not found for image_id:", image_id)  # Debugging line
     return None
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
+        # Available images filter
         available_images = [
             img for img in image_files
             if img not in annotation_counts or
+               ("val" in img and annotation_counts.get(img, 0) < 2) or
+               ("val" not in img and annotation_counts.get(img, 0) == 0)
         ]
         print("Available images:", available_images)  # Debugging line
+        # Check if the user already has an image
         if session_data["current_image"] is None and available_images:
+            # Assign a new random image to the user
+            session_data["current_image"] = random.choice(available_images)
+            print("Current image_id:", session_data["current_image"])  # Print the current image_id
     return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
+# Function to save the annotation to Hugging Face dataset and fetch the next image
 def save_annotation(caption, session_data):
+    global dataset, annotation_counts  # Declare global dataset and annotation_counts at the start of the function
     if session_data["current_image"] is None:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
     with lock:
         image_id = session_data["current_image"]
         # Save caption or "skipped" based on user input
         if caption.strip().lower() == "skip":
         new_data = Dataset.from_dict({
             "image_id": [image_id],
             "caption": [caption],
+            "annotation_count": [annotation_count + 1]  # Increment the annotation count
         }, features=Features({
             'image_id': Value(dtype='string'),
             'caption': Value(dtype='string'),
+            'annotation_count': Value(dtype='int32')  # Ensure int32 type
         }))
         # Update the annotation count in the dictionary
         dataset.push_to_hub(dataset_name)
         print("Pushed updated dataset")
+        # Clear user's current image if the validation image has been annotated twice
+        if ("val" not in image_id) or (annotation_count + 1 >= 2):
             session_data["current_image"] = None
     # Fetch the next image
     else:
         return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
 def initialize_interface(session_data):
     next_image = get_next_image(session_data)
     if next_image: