Spaces:

GeorgeIbrahim
/

Data_Collection

Sleeping

GeorgeIbrahim commited on Nov 8, 2024

Commit

9aec593

1 Parent(s): 35eb21a

updates

Files changed (1) hide show

app.py CHANGED Viewed

@@ -95,7 +95,7 @@ def get_caption_for_image_id(image_path):
     # Return None if the image_id is not found
     print("Caption not found for image_id:", image_id)  # Debugging line
     return None
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
@@ -103,8 +103,8 @@ def get_next_image(session_data):
         available_images = [
             img for img in image_files
             if img not in annotation_counts or
-               ("val" in img and annotation_counts.get(img, 0) < 2) or
-               ("val" not in img and annotation_counts.get(img, 0) == 0)
         ]
         print("Available images:", available_images)  # Debugging line
@@ -127,6 +127,10 @@ def save_annotation(caption, session_data):
     with lock:
         image_id = session_data["current_image"]
         # Save caption or "skipped" based on user input
         if caption.strip().lower() == "skip":
@@ -139,11 +143,13 @@ def save_annotation(caption, session_data):
         new_data = Dataset.from_dict({
             "image_id": [image_id],
             "caption": [caption],
-            "annotation_count": [annotation_count + 1]  # Increment the annotation count
         }, features=Features({
             'image_id': Value(dtype='string'),
             'caption': Value(dtype='string'),
-            'annotation_count': Value(dtype='int32')  # Ensure int32 type
         }))
         # Update the annotation count in the dictionary

     # Return None if the image_id is not found
     print("Caption not found for image_id:", image_id)  # Debugging line
     return None
 # Function to get a random image that hasn’t been fully annotated
 def get_next_image(session_data):
     with lock:
         available_images = [
             img for img in image_files
             if img not in annotation_counts or
+               (split.get(img, 0) == "dev" and annotation_counts.get(img, 0) < 2) or
+               (split.get(img, 0) == "train" and annotation_counts.get(img, 0) == 0)
         ]
         print("Available images:", available_images)  # Debugging line
     with lock:
         image_id = session_data["current_image"]
+        match = re.search(r'_(\d+)\.', image_id)
+        image_id = match.group(1).lstrip('0')
+        split = "dev" if image_id in results else "train"
         # Save caption or "skipped" based on user input
         if caption.strip().lower() == "skip":
         new_data = Dataset.from_dict({
             "image_id": [image_id],
             "caption": [caption],
+            "annotation_count": [annotation_count + 1],
+            "split": [split]
         }, features=Features({
             'image_id': Value(dtype='string'),
             'caption': Value(dtype='string'),
+            'annotation_count': Value(dtype='int32'),
+            'split': Value(dtype='string')
         }))
         # Update the annotation count in the dictionary