GeorgeIbrahim commited on
Commit
c08af9f
·
1 Parent(s): 1749fc6
Files changed (1) hide show
  1. app.py +25 -22
app.py CHANGED
@@ -27,22 +27,22 @@ try:
27
  print("Dataset features:", dataset.features) # Check if 'split' is part of features
28
 
29
 
30
- # # Check if the 'split' column exists; if not, add it
31
- # if 'split' not in dataset.column_names:
32
- split_values = []
33
- for example in dataset:
34
- match = re.search(r'_(\d+)\.', example["image_id"])
35
- image_id = match.group(1).lstrip('0')
36
- if image_id in results:
37
- split_values.append("dev")
38
- else:
39
- split_values.append("train")
40
 
41
- dataset = dataset.replace_column("split", split_values)
42
- # print("Added 'split' column to dataset.")
43
 
44
- # else:
45
- # print("'split' column already exists.")
46
 
47
  # Create a dictionary to keep track of the highest annotation count for each image
48
  annotation_counts = {}
@@ -102,25 +102,28 @@ def get_caption_for_image_id(image_path):
102
  # Function to get a random image that hasn’t been fully annotated
103
  def get_next_image(session_data):
104
  with lock:
105
- # Available images filter
106
  available_images = [
107
  img for img in image_files
108
  if img not in annotation_counts or
109
- ("val" in img and annotation_counts.get(img, 0) < 2) or
110
- ("val" not in img and annotation_counts.get(img, 0) == 0)
111
  ]
112
 
113
- print("Available images:", available_images) # Debugging line
114
 
115
- # Check if the user already has an image
 
 
 
 
116
  if session_data["current_image"] is None and available_images:
117
- # Assign a new random image to the user
118
- session_data["current_image"] = random.choice(available_images)
119
  print("Current image_id:", session_data["current_image"]) # Print the current image_id
120
 
121
  return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
122
 
123
-
124
  # Function to save the annotation to Hugging Face dataset and fetch the next image
125
  def save_annotation(caption, session_data):
126
  global dataset, annotation_counts # Declare global dataset and annotation_counts at the start of the function
 
27
  print("Dataset features:", dataset.features) # Check if 'split' is part of features
28
 
29
 
30
+ # Check if the 'split' column exists; if not, add it
31
+ if 'split' not in dataset.column_names:
32
+ split_values = []
33
+ for example in dataset:
34
+ match = re.search(r'_(\d+)\.', example["image_id"])
35
+ image_id = match.group(1).lstrip('0')
36
+ if image_id in results:
37
+ split_values.append("dev")
38
+ else:
39
+ split_values.append("train")
40
 
41
+ dataset = dataset.replace_column("split", split_values)
42
+ print("Added 'split' column to dataset.")
43
 
44
+ else:
45
+ print("'split' column already exists.")
46
 
47
  # Create a dictionary to keep track of the highest annotation count for each image
48
  annotation_counts = {}
 
102
  # Function to get a random image that hasn’t been fully annotated
103
  def get_next_image(session_data):
104
  with lock:
105
+ # Available images filter based on the 'split' column instead of checking filename
106
  available_images = [
107
  img for img in image_files
108
  if img not in annotation_counts or
109
+ (dataset[annotation_counts[img]]["split"] == "dev" and annotation_counts.get(img, 0) < 2) or
110
+ (dataset[annotation_counts[img]]["split"] != "dev" and annotation_counts.get(img, 0) == 0)
111
  ]
112
 
113
+ print("Available images before shuffle:", available_images) # Debugging line
114
 
115
+ # Shuffle available images to randomize the order
116
+ random.shuffle(available_images)
117
+ print("Available images after shuffle:", available_images) # Debugging line
118
+
119
+ # Check if the user already has an image and assign a new one if they don't
120
  if session_data["current_image"] is None and available_images:
121
+ # Assign a new random image to the user from shuffled available images
122
+ session_data["current_image"] = available_images[0] # Take the first from shuffled list
123
  print("Current image_id:", session_data["current_image"]) # Print the current image_id
124
 
125
  return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
126
 
 
127
  # Function to save the annotation to Hugging Face dataset and fetch the next image
128
  def save_annotation(caption, session_data):
129
  global dataset, annotation_counts # Declare global dataset and annotation_counts at the start of the function