GeorgeIbrahim commited on
Commit
35eb21a
·
1 Parent(s): 10a1f44
Files changed (1) hide show
  1. app.py +15 -43
app.py CHANGED
@@ -95,59 +95,38 @@ def get_caption_for_image_id(image_path):
95
  # Return None if the image_id is not found
96
  print("Caption not found for image_id:", image_id) # Debugging line
97
  return None
98
-
99
-
100
- # Initialize a dictionary to keep track of how many times each 'dev' image has been shown
101
- shown_counts = {}
102
-
103
  # Function to get a random image that hasn’t been fully annotated
104
  def get_next_image(session_data):
105
  with lock:
106
- # Filter available images based on annotation counts and the 'split' column
107
  available_images = [
108
  img for img in image_files
109
  if img not in annotation_counts or
110
- (dataset[img]['split'] == "dev" and annotation_counts.get(img, 0) < 2) or
111
- (dataset[img]['split'] == "train" and annotation_counts.get(img, 0) == 0)
112
  ]
113
 
114
  print("Available images:", available_images) # Debugging line
115
 
116
- # Select an image to show based on the 'split' column
117
  if session_data["current_image"] is None and available_images:
118
- random.shuffle(available_images) # Shuffle for randomness
119
- for img in available_images:
120
- split = dataset[img]['split'] # Get the split value from the dataset
121
-
122
- # Show 'dev' images twice
123
- if split == "dev":
124
- if shown_counts.get(img, 0) < 2:
125
- shown_counts[img] = shown_counts.get(img, 0) + 1
126
- session_data["current_image"] = img
127
- print("Selected 'dev' image_id:", session_data["current_image"])
128
- break
129
- # Show 'train' images once
130
- else:
131
- session_data["current_image"] = img
132
- print("Selected 'train' image_id:", session_data["current_image"])
133
- break
134
 
135
  return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
136
 
137
- # Update save_annotation function to reset the shown count if 'dev' image is shown twice
 
138
  def save_annotation(caption, session_data):
139
- global dataset, annotation_counts, shown_counts # Include shown_counts
140
 
141
  if session_data["current_image"] is None:
142
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
143
 
144
  with lock:
145
  image_id = session_data["current_image"]
146
- print("Image ID before: ", image_id)
147
-
148
- # Determine the split directly from the dataset
149
- split = dataset[image_id]['split']
150
- print("Split for image:", split)
151
 
152
  # Save caption or "skipped" based on user input
153
  if caption.strip().lower() == "skip":
@@ -160,13 +139,11 @@ def save_annotation(caption, session_data):
160
  new_data = Dataset.from_dict({
161
  "image_id": [image_id],
162
  "caption": [caption],
163
- "annotation_count": [annotation_count + 1],
164
- "split": [split]
165
  }, features=Features({
166
  'image_id': Value(dtype='string'),
167
  'caption': Value(dtype='string'),
168
- 'annotation_count': Value(dtype='int32'),
169
- 'split': Value(dtype='string')
170
  }))
171
 
172
  # Update the annotation count in the dictionary
@@ -177,11 +154,8 @@ def save_annotation(caption, session_data):
177
  dataset.push_to_hub(dataset_name)
178
  print("Pushed updated dataset")
179
 
180
- # Reset shown count if the 'dev' image has been shown twice
181
- if split == "dev" and shown_counts.get(image_id, 0) >= 2:
182
- shown_counts[image_id] = 0 # Reset count for 'dev' images shown twice
183
- session_data["current_image"] = None
184
- elif split == "train":
185
  session_data["current_image"] = None
186
 
187
  # Fetch the next image
@@ -193,8 +167,6 @@ def save_annotation(caption, session_data):
193
  else:
194
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
195
 
196
-
197
-
198
  def initialize_interface(session_data):
199
  next_image = get_next_image(session_data)
200
  if next_image:
 
95
  # Return None if the image_id is not found
96
  print("Caption not found for image_id:", image_id) # Debugging line
97
  return None
98
+
 
 
 
 
99
  # Function to get a random image that hasn’t been fully annotated
100
  def get_next_image(session_data):
101
  with lock:
102
+ # Available images filter
103
  available_images = [
104
  img for img in image_files
105
  if img not in annotation_counts or
106
+ ("val" in img and annotation_counts.get(img, 0) < 2) or
107
+ ("val" not in img and annotation_counts.get(img, 0) == 0)
108
  ]
109
 
110
  print("Available images:", available_images) # Debugging line
111
 
112
+ # Check if the user already has an image
113
  if session_data["current_image"] is None and available_images:
114
+ # Assign a new random image to the user
115
+ session_data["current_image"] = random.choice(available_images)
116
+ print("Current image_id:", session_data["current_image"]) # Print the current image_id
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
119
 
120
+
121
+ # Function to save the annotation to Hugging Face dataset and fetch the next image
122
  def save_annotation(caption, session_data):
123
+ global dataset, annotation_counts # Declare global dataset and annotation_counts at the start of the function
124
 
125
  if session_data["current_image"] is None:
126
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
127
 
128
  with lock:
129
  image_id = session_data["current_image"]
 
 
 
 
 
130
 
131
  # Save caption or "skipped" based on user input
132
  if caption.strip().lower() == "skip":
 
139
  new_data = Dataset.from_dict({
140
  "image_id": [image_id],
141
  "caption": [caption],
142
+ "annotation_count": [annotation_count + 1] # Increment the annotation count
 
143
  }, features=Features({
144
  'image_id': Value(dtype='string'),
145
  'caption': Value(dtype='string'),
146
+ 'annotation_count': Value(dtype='int32') # Ensure int32 type
 
147
  }))
148
 
149
  # Update the annotation count in the dictionary
 
154
  dataset.push_to_hub(dataset_name)
155
  print("Pushed updated dataset")
156
 
157
+ # Clear user's current image if the validation image has been annotated twice
158
+ if ("val" not in image_id) or (annotation_count + 1 >= 2):
 
 
 
159
  session_data["current_image"] = None
160
 
161
  # Fetch the next image
 
167
  else:
168
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
169
 
 
 
170
  def initialize_interface(session_data):
171
  next_image = get_next_image(session_data)
172
  if next_image: