GeorgeIbrahim commited on
Commit
df87278
·
1 Parent(s): 0a274b3
Files changed (1) hide show
  1. app.py +41 -29
app.py CHANGED
@@ -25,22 +25,20 @@ try:
25
  dataset = load_dataset(dataset_name, split="train")
26
  print("Loaded existing dataset:", dataset)
27
  print("Dataset features:", dataset.features) # Check if 'split' is part of features
28
-
29
-
30
  # Check if the 'split' column exists; if not, add it
31
  if 'split' not in dataset.column_names:
32
- split_values = []
33
- for example in dataset:
34
- match = re.search(r'_(\d+)\.', example["image_id"])
35
- image_id = match.group(1).lstrip('0')
36
- if image_id in results:
37
- split_values.append("dev")
38
- else:
39
- split_values.append("train")
40
-
41
  dataset = dataset.add_column("split", split_values)
42
  print("Added 'split' column to dataset.")
43
-
44
  else:
45
  print("'split' column already exists.")
46
 
@@ -98,35 +96,46 @@ def get_caption_for_image_id(image_path):
98
  print("Caption not found for image_id:", image_id) # Debugging line
99
  return None
100
 
 
101
 
102
  # Function to get a random image that hasn’t been fully annotated
103
  def get_next_image(session_data):
104
  with lock:
105
- # Available images filter based on the 'split' column instead of checking filename
106
  available_images = [
107
  img for img in image_files
108
  if img not in annotation_counts or
109
- (dataset[annotation_counts[img]]["split"] == "dev" and annotation_counts.get(img, 0) < 2) or
110
- (dataset[annotation_counts[img]]["split"] != "dev" and annotation_counts.get(img, 0) == 0)
111
  ]
112
 
113
- print("Available images before shuffle:", available_images) # Debugging line
114
-
115
- # Shuffle available images to randomize the order
116
- random.shuffle(available_images)
117
- print("Available images after shuffle:", available_images) # Debugging line
118
 
119
- # Check if the user already has an image and assign a new one if they don't
120
  if session_data["current_image"] is None and available_images:
121
- # Assign a new random image to the user from shuffled available images
122
- session_data["current_image"] = available_images[0] # Take the first from shuffled list
123
- print("Current image_id:", session_data["current_image"]) # Print the current image_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
126
 
127
- # Function to save the annotation to Hugging Face dataset and fetch the next image
128
  def save_annotation(caption, session_data):
129
- global dataset, annotation_counts # Declare global dataset and annotation_counts at the start of the function
130
 
131
  if session_data["current_image"] is None:
132
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
@@ -168,8 +177,11 @@ def save_annotation(caption, session_data):
168
  dataset.push_to_hub(dataset_name)
169
  print("Pushed updated dataset")
170
 
171
- # Clear user's current image if the validation image has been annotated twice
172
- if ("val" not in image_id) or (annotation_count + 1 >= 2):
 
 
 
173
  session_data["current_image"] = None
174
 
175
  # Fetch the next image
@@ -181,7 +193,7 @@ def save_annotation(caption, session_data):
181
  else:
182
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
183
 
184
-
185
  def initialize_interface(session_data):
186
  next_image = get_next_image(session_data)
187
  if next_image:
 
25
  dataset = load_dataset(dataset_name, split="train")
26
  print("Loaded existing dataset:", dataset)
27
  print("Dataset features:", dataset.features) # Check if 'split' is part of features
28
+
 
29
  # Check if the 'split' column exists; if not, add it
30
  if 'split' not in dataset.column_names:
31
+ # Define the 'split' values based on `image_id`
32
+ print(results)
33
+ print(example["image_id"] in results)
34
+ split_values = [
35
+ "dev" if example["image_id"] in results else "train"
36
+ for example in dataset
37
+ ]
38
+
39
+ # Add 'split' column to the dataset
40
  dataset = dataset.add_column("split", split_values)
41
  print("Added 'split' column to dataset.")
 
42
  else:
43
  print("'split' column already exists.")
44
 
 
96
  print("Caption not found for image_id:", image_id) # Debugging line
97
  return None
98
 
99
+ shown_counts = {}
100
 
101
  # Function to get a random image that hasn’t been fully annotated
102
  def get_next_image(session_data):
103
  with lock:
104
+ # Filter available images based on annotation counts and split
105
  available_images = [
106
  img for img in image_files
107
  if img not in annotation_counts or
108
+ ("val" in img and annotation_counts.get(img, 0) < 2) or
109
+ ("val" not in img and annotation_counts.get(img, 0) == 0)
110
  ]
111
 
112
+ print("Available images:", available_images) # Debugging line
 
 
 
 
113
 
114
+ # Select an image to show based on split type
115
  if session_data["current_image"] is None and available_images:
116
+ random.shuffle(available_images) # Shuffle for randomness
117
+ for img in available_images:
118
+ image_id = re.search(r'_(\d+)\.', img).group(1).lstrip('0') # Extract image ID
119
+ split = "dev" if image_id in results else "train"
120
+
121
+ # Show 'dev' images twice
122
+ if split == "dev":
123
+ if shown_counts.get(img, 0) < 2:
124
+ shown_counts[img] = shown_counts.get(img, 0) + 1
125
+ session_data["current_image"] = img
126
+ print("Selected 'dev' image_id:", session_data["current_image"])
127
+ break
128
+ # Show 'train' images once
129
+ else:
130
+ session_data["current_image"] = img
131
+ print("Selected 'train' image_id:", session_data["current_image"])
132
+ break
133
 
134
  return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
135
 
136
+ # Update save_annotation function to reset the shown count if 'dev' image is shown twice
137
  def save_annotation(caption, session_data):
138
+ global dataset, annotation_counts, shown_counts # Include shown_counts
139
 
140
  if session_data["current_image"] is None:
141
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
 
177
  dataset.push_to_hub(dataset_name)
178
  print("Pushed updated dataset")
179
 
180
+ # Reset shown count if the 'dev' image has been shown twice
181
+ if split == "dev" and shown_counts.get(image_id, 0) >= 2:
182
+ shown_counts[image_id] = 0 # Reset count for 'dev' images shown twice
183
+ session_data["current_image"] = None
184
+ elif split == "train":
185
  session_data["current_image"] = None
186
 
187
  # Fetch the next image
 
193
  else:
194
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
195
 
196
+
197
  def initialize_interface(session_data):
198
  next_image = get_next_image(session_data)
199
  if next_image: