Spaces:
Sleeping
Sleeping
Commit
·
35eb21a
1
Parent(s):
10a1f44
updates
Browse files
app.py
CHANGED
|
@@ -95,59 +95,38 @@ def get_caption_for_image_id(image_path):
|
|
| 95 |
# Return None if the image_id is not found
|
| 96 |
print("Caption not found for image_id:", image_id) # Debugging line
|
| 97 |
return None
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
# Initialize a dictionary to keep track of how many times each 'dev' image has been shown
|
| 101 |
-
shown_counts = {}
|
| 102 |
-
|
| 103 |
# Function to get a random image that hasn’t been fully annotated
|
| 104 |
def get_next_image(session_data):
|
| 105 |
with lock:
|
| 106 |
-
#
|
| 107 |
available_images = [
|
| 108 |
img for img in image_files
|
| 109 |
if img not in annotation_counts or
|
| 110 |
-
(
|
| 111 |
-
(
|
| 112 |
]
|
| 113 |
|
| 114 |
print("Available images:", available_images) # Debugging line
|
| 115 |
|
| 116 |
-
#
|
| 117 |
if session_data["current_image"] is None and available_images:
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
# Show 'dev' images twice
|
| 123 |
-
if split == "dev":
|
| 124 |
-
if shown_counts.get(img, 0) < 2:
|
| 125 |
-
shown_counts[img] = shown_counts.get(img, 0) + 1
|
| 126 |
-
session_data["current_image"] = img
|
| 127 |
-
print("Selected 'dev' image_id:", session_data["current_image"])
|
| 128 |
-
break
|
| 129 |
-
# Show 'train' images once
|
| 130 |
-
else:
|
| 131 |
-
session_data["current_image"] = img
|
| 132 |
-
print("Selected 'train' image_id:", session_data["current_image"])
|
| 133 |
-
break
|
| 134 |
|
| 135 |
return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
|
| 136 |
|
| 137 |
-
|
|
|
|
| 138 |
def save_annotation(caption, session_data):
|
| 139 |
-
global dataset, annotation_counts
|
| 140 |
|
| 141 |
if session_data["current_image"] is None:
|
| 142 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
| 143 |
|
| 144 |
with lock:
|
| 145 |
image_id = session_data["current_image"]
|
| 146 |
-
print("Image ID before: ", image_id)
|
| 147 |
-
|
| 148 |
-
# Determine the split directly from the dataset
|
| 149 |
-
split = dataset[image_id]['split']
|
| 150 |
-
print("Split for image:", split)
|
| 151 |
|
| 152 |
# Save caption or "skipped" based on user input
|
| 153 |
if caption.strip().lower() == "skip":
|
|
@@ -160,13 +139,11 @@ def save_annotation(caption, session_data):
|
|
| 160 |
new_data = Dataset.from_dict({
|
| 161 |
"image_id": [image_id],
|
| 162 |
"caption": [caption],
|
| 163 |
-
"annotation_count": [annotation_count + 1]
|
| 164 |
-
"split": [split]
|
| 165 |
}, features=Features({
|
| 166 |
'image_id': Value(dtype='string'),
|
| 167 |
'caption': Value(dtype='string'),
|
| 168 |
-
'annotation_count': Value(dtype='int32')
|
| 169 |
-
'split': Value(dtype='string')
|
| 170 |
}))
|
| 171 |
|
| 172 |
# Update the annotation count in the dictionary
|
|
@@ -177,11 +154,8 @@ def save_annotation(caption, session_data):
|
|
| 177 |
dataset.push_to_hub(dataset_name)
|
| 178 |
print("Pushed updated dataset")
|
| 179 |
|
| 180 |
-
#
|
| 181 |
-
if
|
| 182 |
-
shown_counts[image_id] = 0 # Reset count for 'dev' images shown twice
|
| 183 |
-
session_data["current_image"] = None
|
| 184 |
-
elif split == "train":
|
| 185 |
session_data["current_image"] = None
|
| 186 |
|
| 187 |
# Fetch the next image
|
|
@@ -193,8 +167,6 @@ def save_annotation(caption, session_data):
|
|
| 193 |
else:
|
| 194 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
def initialize_interface(session_data):
|
| 199 |
next_image = get_next_image(session_data)
|
| 200 |
if next_image:
|
|
|
|
| 95 |
# Return None if the image_id is not found
|
| 96 |
print("Caption not found for image_id:", image_id) # Debugging line
|
| 97 |
return None
|
| 98 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
# Function to get a random image that hasn’t been fully annotated
|
| 100 |
def get_next_image(session_data):
|
| 101 |
with lock:
|
| 102 |
+
# Available images filter
|
| 103 |
available_images = [
|
| 104 |
img for img in image_files
|
| 105 |
if img not in annotation_counts or
|
| 106 |
+
("val" in img and annotation_counts.get(img, 0) < 2) or
|
| 107 |
+
("val" not in img and annotation_counts.get(img, 0) == 0)
|
| 108 |
]
|
| 109 |
|
| 110 |
print("Available images:", available_images) # Debugging line
|
| 111 |
|
| 112 |
+
# Check if the user already has an image
|
| 113 |
if session_data["current_image"] is None and available_images:
|
| 114 |
+
# Assign a new random image to the user
|
| 115 |
+
session_data["current_image"] = random.choice(available_images)
|
| 116 |
+
print("Current image_id:", session_data["current_image"]) # Print the current image_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
return os.path.join(image_folder, session_data["current_image"]) if session_data["current_image"] else None
|
| 119 |
|
| 120 |
+
|
| 121 |
+
# Function to save the annotation to Hugging Face dataset and fetch the next image
|
| 122 |
def save_annotation(caption, session_data):
|
| 123 |
+
global dataset, annotation_counts # Declare global dataset and annotation_counts at the start of the function
|
| 124 |
|
| 125 |
if session_data["current_image"] is None:
|
| 126 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
| 127 |
|
| 128 |
with lock:
|
| 129 |
image_id = session_data["current_image"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
# Save caption or "skipped" based on user input
|
| 132 |
if caption.strip().lower() == "skip":
|
|
|
|
| 139 |
new_data = Dataset.from_dict({
|
| 140 |
"image_id": [image_id],
|
| 141 |
"caption": [caption],
|
| 142 |
+
"annotation_count": [annotation_count + 1] # Increment the annotation count
|
|
|
|
| 143 |
}, features=Features({
|
| 144 |
'image_id': Value(dtype='string'),
|
| 145 |
'caption': Value(dtype='string'),
|
| 146 |
+
'annotation_count': Value(dtype='int32') # Ensure int32 type
|
|
|
|
| 147 |
}))
|
| 148 |
|
| 149 |
# Update the annotation count in the dictionary
|
|
|
|
| 154 |
dataset.push_to_hub(dataset_name)
|
| 155 |
print("Pushed updated dataset")
|
| 156 |
|
| 157 |
+
# Clear user's current image if the validation image has been annotated twice
|
| 158 |
+
if ("val" not in image_id) or (annotation_count + 1 >= 2):
|
|
|
|
|
|
|
|
|
|
| 159 |
session_data["current_image"] = None
|
| 160 |
|
| 161 |
# Fetch the next image
|
|
|
|
| 167 |
else:
|
| 168 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
| 169 |
|
|
|
|
|
|
|
| 170 |
def initialize_interface(session_data):
|
| 171 |
next_image = get_next_image(session_data)
|
| 172 |
if next_image:
|