GeorgeSherif commited on
Commit
4701f2e
·
1 Parent(s): 30f604d
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -20,15 +20,15 @@ try:
20
  dataset = load_dataset(dataset_name, split="train")
21
  print("Loaded existing dataset:", dataset)
22
 
23
- # Update annotation_count for existing images
24
- def update_annotation_count(example):
25
- if example["annotation_count"] == 0:
26
- example["annotation_count"] = 1
27
- return example
28
-
29
- dataset = dataset.map(update_annotation_count)
30
- print("Updated annotation counts for existing images.")
31
- dataset.push_to_hub(dataset_name) # Push the updated dataset to Hugging Face
32
  except Exception as e:
33
  print(f"Error loading dataset: {e}")
34
  # Create an empty dataset if it doesn't exist
@@ -38,6 +38,7 @@ except Exception as e:
38
  'annotation_count': Value(dtype='int32') # Add annotation count feature
39
  })
40
  dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
 
41
  dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
42
 
43
  image_folder = "images"
@@ -78,16 +79,16 @@ def get_caption_for_image_id(image_path):
78
  # Function to get a random image that hasn’t been fully annotated
79
  def get_next_image(session_data):
80
  with lock:
81
- annotated_images = {item["image_id"]: item["annotation_count"] for item in dataset}
82
-
83
  # Available images filter
84
  available_images = [
85
  img for img in image_files
86
- if img not in annotated_images or
87
- ("val" in img and annotated_images[img] < 2) or
88
- ("val" not in img and annotated_images[img] == 0)
89
  ]
90
 
 
 
91
  # Check if the user already has an image
92
  if session_data["current_image"] is None and available_images:
93
  # Assign a new random image to the user
@@ -99,7 +100,7 @@ def get_next_image(session_data):
99
 
100
  # Function to save the annotation to Hugging Face dataset and fetch the next image
101
  def save_annotation(caption, session_data):
102
- global dataset # Declare global dataset at the start of the function
103
 
104
  if session_data["current_image"] is None:
105
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
@@ -111,13 +112,8 @@ def save_annotation(caption, session_data):
111
  if caption.strip().lower() == "skip":
112
  caption = "skipped"
113
 
114
- # Check if image is already in dataset to update count
115
- existing_image = dataset.filter(lambda x: x["image_id"] == image_id)
116
- if len(existing_image) > 0:
117
- # Get current annotation count
118
- annotation_count = existing_image[0]["annotation_count"]
119
- else:
120
- annotation_count = 0
121
 
122
  # Add the new annotation as a new row to the dataset
123
  new_data = Dataset.from_dict({
@@ -130,19 +126,23 @@ def save_annotation(caption, session_data):
130
  'annotation_count': Value(dtype='int32') # Ensure int32 type
131
  }))
132
 
 
 
 
133
  # Concatenate with the existing dataset and push the updated dataset to Hugging Face
134
  dataset = concatenate_datasets([dataset, new_data])
135
  dataset.push_to_hub(dataset_name)
136
  print("Pushed updated dataset")
137
 
138
  # Clear user's current image if the validation image has been annotated twice
139
- if ("val" not in image_id) or (annotation_count >= 2):
140
  session_data["current_image"] = None
141
 
142
  # Fetch the next image
143
  next_image = get_next_image(session_data)
144
  if next_image:
145
  next_caption = get_caption_for_image_id(os.path.basename(next_image)) # Retrieve the caption for the new image
 
146
  return gr.update(value=next_image), gr.update(value=""), gr.update(value=next_caption or "")
147
  else:
148
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
 
20
  dataset = load_dataset(dataset_name, split="train")
21
  print("Loaded existing dataset:", dataset)
22
 
23
+ # Create a dictionary to keep track of the highest annotation count for each image
24
+ annotation_counts = {}
25
+ for example in dataset:
26
+ image_id = example["image_id"]
27
+ count = example["annotation_count"]
28
+ if image_id not in annotation_counts or count > annotation_counts[image_id]:
29
+ annotation_counts[image_id] = count
30
+
31
+ print("Annotation counts:", annotation_counts)
32
  except Exception as e:
33
  print(f"Error loading dataset: {e}")
34
  # Create an empty dataset if it doesn't exist
 
38
  'annotation_count': Value(dtype='int32') # Add annotation count feature
39
  })
40
  dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
41
+ annotation_counts = {}
42
  dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
43
 
44
  image_folder = "images"
 
79
  # Function to get a random image that hasn’t been fully annotated
80
  def get_next_image(session_data):
81
  with lock:
 
 
82
  # Available images filter
83
  available_images = [
84
  img for img in image_files
85
+ if img not in annotation_counts or
86
+ ("val" in img and annotation_counts.get(img, 0) < 2) or
87
+ ("val" not in img and annotation_counts.get(img, 0) == 0)
88
  ]
89
 
90
+ print("Available images:", available_images) # Debugging line
91
+
92
  # Check if the user already has an image
93
  if session_data["current_image"] is None and available_images:
94
  # Assign a new random image to the user
 
100
 
101
  # Function to save the annotation to Hugging Face dataset and fetch the next image
102
  def save_annotation(caption, session_data):
103
+ global dataset, annotation_counts # Declare global dataset and annotation_counts at the start of the function
104
 
105
  if session_data["current_image"] is None:
106
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
 
112
  if caption.strip().lower() == "skip":
113
  caption = "skipped"
114
 
115
+ # Get current annotation count
116
+ annotation_count = annotation_counts.get(image_id, 0)
 
 
 
 
 
117
 
118
  # Add the new annotation as a new row to the dataset
119
  new_data = Dataset.from_dict({
 
126
  'annotation_count': Value(dtype='int32') # Ensure int32 type
127
  }))
128
 
129
+ # Update the annotation count in the dictionary
130
+ annotation_counts[image_id] = annotation_count + 1
131
+
132
  # Concatenate with the existing dataset and push the updated dataset to Hugging Face
133
  dataset = concatenate_datasets([dataset, new_data])
134
  dataset.push_to_hub(dataset_name)
135
  print("Pushed updated dataset")
136
 
137
  # Clear user's current image if the validation image has been annotated twice
138
+ if ("val" not in image_id) or (annotation_count + 1 >= 2):
139
  session_data["current_image"] = None
140
 
141
  # Fetch the next image
142
  next_image = get_next_image(session_data)
143
  if next_image:
144
  next_caption = get_caption_for_image_id(os.path.basename(next_image)) # Retrieve the caption for the new image
145
+ print("Next image_id:", os.path.basename(next_image)) # Debugging line
146
  return gr.update(value=next_image), gr.update(value=""), gr.update(value=next_caption or "")
147
  else:
148
  return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")