Spaces:

GeorgeIbrahim
/

Data_Collection

Sleeping

App Files Files Community

GeorgeIbrahim commited on Nov 8, 2024

Commit

c08af9f

1 Parent(s): 1749fc6

updates

Browse files

Files changed (1) hide show

app.py +25 -22

app.py CHANGED Viewed

@@ -27,22 +27,22 @@ try:
     print("Dataset features:", dataset.features)  # Check if 'split' is part of features
-    # # Check if the 'split' column exists; if not, add it
-    # if 'split' not in dataset.column_names:
-    split_values = []
-    for example in dataset:
-        match = re.search(r'_(\d+)\.', example["image_id"])
-        image_id = match.group(1).lstrip('0')
-        if image_id in results:
-            split_values.append("dev")
-        else:
-            split_values.append("train")
-    dataset = dataset.replace_column("split", split_values)
-    # print("Added 'split' column to dataset.")
-    # else:
-        # print("'split' column already exists.")
     # Create a dictionary to keep track of the highest annotation count for each image
     annotation_counts = {}
@@ -102,25 +102,28 @@ def get_caption_for_image_id(image_path):
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
-        # Available images filter
         available_images = [
             img for img in image_files
             if img not in annotation_counts or
-               ("val" in img and annotation_counts.get(img, 0) < 2) or
-               ("val" not in img and annotation_counts.get(img, 0) == 0)
         ]
-        print("Available images:", available_images)  # Debugging line
-        # Check if the user already has an image
         if session_data["current_image"] is None and available_images:
-            # Assign a new random image to the user
-            session_data["current_image"] = random.choice(available_images)
             print("Current image_id:", session_data["current_image"])  # Print the current image_id
     return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
 # Function to save the annotation to Hugging Face dataset and fetch the next image
 def save_annotation(caption, session_data):
     global dataset, annotation_counts  # Declare global dataset and annotation_counts at the start of the function

     print("Dataset features:", dataset.features)  # Check if 'split' is part of features
+    # Check if the 'split' column exists; if not, add it
+    if 'split' not in dataset.column_names:
+        split_values = []
+        for example in dataset:
+            match = re.search(r'_(\d+)\.', example["image_id"])
+            image_id = match.group(1).lstrip('0')
+            if image_id in results:
+                split_values.append("dev")
+            else:
+                split_values.append("train")
+        dataset = dataset.replace_column("split", split_values)
+        print("Added 'split' column to dataset.")
+    else:
+        print("'split' column already exists.")
     # Create a dictionary to keep track of the highest annotation count for each image
     annotation_counts = {}
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
+        # Available images filter based on the 'split' column instead of checking filename
         available_images = [
             img for img in image_files
             if img not in annotation_counts or
+               (dataset[annotation_counts[img]]["split"] == "dev" and annotation_counts.get(img, 0) < 2) or
+               (dataset[annotation_counts[img]]["split"] != "dev" and annotation_counts.get(img, 0) == 0)
         ]
+        print("Available images before shuffle:", available_images)  # Debugging line
+        # Shuffle available images to randomize the order
+        random.shuffle(available_images)
+        print("Available images after shuffle:", available_images)  # Debugging line
+        # Check if the user already has an image and assign a new one if they don't
         if session_data["current_image"] is None and available_images:
+            # Assign a new random image to the user from shuffled available images
+            session_data["current_image"] = available_images[0]  # Take the first from shuffled list
             print("Current image_id:", session_data["current_image"])  # Print the current image_id
     return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
 # Function to save the annotation to Hugging Face dataset and fetch the next image
 def save_annotation(caption, session_data):
     global dataset, annotation_counts  # Declare global dataset and annotation_counts at the start of the function