Spaces:
Sleeping
Sleeping
GeorgeSherif
commited on
Commit
·
4701f2e
1
Parent(s):
30f604d
updates
Browse files
app.py
CHANGED
|
@@ -20,15 +20,15 @@ try:
|
|
| 20 |
dataset = load_dataset(dataset_name, split="train")
|
| 21 |
print("Loaded existing dataset:", dataset)
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
except Exception as e:
|
| 33 |
print(f"Error loading dataset: {e}")
|
| 34 |
# Create an empty dataset if it doesn't exist
|
|
@@ -38,6 +38,7 @@ except Exception as e:
|
|
| 38 |
'annotation_count': Value(dtype='int32') # Add annotation count feature
|
| 39 |
})
|
| 40 |
dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
|
|
|
|
| 41 |
dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
|
| 42 |
|
| 43 |
image_folder = "images"
|
|
@@ -78,16 +79,16 @@ def get_caption_for_image_id(image_path):
|
|
| 78 |
# Function to get a random image that hasn’t been fully annotated
|
| 79 |
def get_next_image(session_data):
|
| 80 |
with lock:
|
| 81 |
-
annotated_images = {item["image_id"]: item["annotation_count"] for item in dataset}
|
| 82 |
-
|
| 83 |
# Available images filter
|
| 84 |
available_images = [
|
| 85 |
img for img in image_files
|
| 86 |
-
if img not in
|
| 87 |
-
("val" in img and
|
| 88 |
-
("val" not in img and
|
| 89 |
]
|
| 90 |
|
|
|
|
|
|
|
| 91 |
# Check if the user already has an image
|
| 92 |
if session_data["current_image"] is None and available_images:
|
| 93 |
# Assign a new random image to the user
|
|
@@ -99,7 +100,7 @@ def get_next_image(session_data):
|
|
| 99 |
|
| 100 |
# Function to save the annotation to Hugging Face dataset and fetch the next image
|
| 101 |
def save_annotation(caption, session_data):
|
| 102 |
-
global dataset # Declare global dataset at the start of the function
|
| 103 |
|
| 104 |
if session_data["current_image"] is None:
|
| 105 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
|
@@ -111,13 +112,8 @@ def save_annotation(caption, session_data):
|
|
| 111 |
if caption.strip().lower() == "skip":
|
| 112 |
caption = "skipped"
|
| 113 |
|
| 114 |
-
#
|
| 115 |
-
|
| 116 |
-
if len(existing_image) > 0:
|
| 117 |
-
# Get current annotation count
|
| 118 |
-
annotation_count = existing_image[0]["annotation_count"]
|
| 119 |
-
else:
|
| 120 |
-
annotation_count = 0
|
| 121 |
|
| 122 |
# Add the new annotation as a new row to the dataset
|
| 123 |
new_data = Dataset.from_dict({
|
|
@@ -130,19 +126,23 @@ def save_annotation(caption, session_data):
|
|
| 130 |
'annotation_count': Value(dtype='int32') # Ensure int32 type
|
| 131 |
}))
|
| 132 |
|
|
|
|
|
|
|
|
|
|
| 133 |
# Concatenate with the existing dataset and push the updated dataset to Hugging Face
|
| 134 |
dataset = concatenate_datasets([dataset, new_data])
|
| 135 |
dataset.push_to_hub(dataset_name)
|
| 136 |
print("Pushed updated dataset")
|
| 137 |
|
| 138 |
# Clear user's current image if the validation image has been annotated twice
|
| 139 |
-
if ("val" not in image_id) or (annotation_count >= 2):
|
| 140 |
session_data["current_image"] = None
|
| 141 |
|
| 142 |
# Fetch the next image
|
| 143 |
next_image = get_next_image(session_data)
|
| 144 |
if next_image:
|
| 145 |
next_caption = get_caption_for_image_id(os.path.basename(next_image)) # Retrieve the caption for the new image
|
|
|
|
| 146 |
return gr.update(value=next_image), gr.update(value=""), gr.update(value=next_caption or "")
|
| 147 |
else:
|
| 148 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
|
|
|
| 20 |
dataset = load_dataset(dataset_name, split="train")
|
| 21 |
print("Loaded existing dataset:", dataset)
|
| 22 |
|
| 23 |
+
# Create a dictionary to keep track of the highest annotation count for each image
|
| 24 |
+
annotation_counts = {}
|
| 25 |
+
for example in dataset:
|
| 26 |
+
image_id = example["image_id"]
|
| 27 |
+
count = example["annotation_count"]
|
| 28 |
+
if image_id not in annotation_counts or count > annotation_counts[image_id]:
|
| 29 |
+
annotation_counts[image_id] = count
|
| 30 |
+
|
| 31 |
+
print("Annotation counts:", annotation_counts)
|
| 32 |
except Exception as e:
|
| 33 |
print(f"Error loading dataset: {e}")
|
| 34 |
# Create an empty dataset if it doesn't exist
|
|
|
|
| 38 |
'annotation_count': Value(dtype='int32') # Add annotation count feature
|
| 39 |
})
|
| 40 |
dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
|
| 41 |
+
annotation_counts = {}
|
| 42 |
dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
|
| 43 |
|
| 44 |
image_folder = "images"
|
|
|
|
| 79 |
# Function to get a random image that hasn’t been fully annotated
|
| 80 |
def get_next_image(session_data):
|
| 81 |
with lock:
|
|
|
|
|
|
|
| 82 |
# Available images filter
|
| 83 |
available_images = [
|
| 84 |
img for img in image_files
|
| 85 |
+
if img not in annotation_counts or
|
| 86 |
+
("val" in img and annotation_counts.get(img, 0) < 2) or
|
| 87 |
+
("val" not in img and annotation_counts.get(img, 0) == 0)
|
| 88 |
]
|
| 89 |
|
| 90 |
+
print("Available images:", available_images) # Debugging line
|
| 91 |
+
|
| 92 |
# Check if the user already has an image
|
| 93 |
if session_data["current_image"] is None and available_images:
|
| 94 |
# Assign a new random image to the user
|
|
|
|
| 100 |
|
| 101 |
# Function to save the annotation to Hugging Face dataset and fetch the next image
|
| 102 |
def save_annotation(caption, session_data):
|
| 103 |
+
global dataset, annotation_counts # Declare global dataset and annotation_counts at the start of the function
|
| 104 |
|
| 105 |
if session_data["current_image"] is None:
|
| 106 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|
|
|
|
| 112 |
if caption.strip().lower() == "skip":
|
| 113 |
caption = "skipped"
|
| 114 |
|
| 115 |
+
# Get current annotation count
|
| 116 |
+
annotation_count = annotation_counts.get(image_id, 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# Add the new annotation as a new row to the dataset
|
| 119 |
new_data = Dataset.from_dict({
|
|
|
|
| 126 |
'annotation_count': Value(dtype='int32') # Ensure int32 type
|
| 127 |
}))
|
| 128 |
|
| 129 |
+
# Update the annotation count in the dictionary
|
| 130 |
+
annotation_counts[image_id] = annotation_count + 1
|
| 131 |
+
|
| 132 |
# Concatenate with the existing dataset and push the updated dataset to Hugging Face
|
| 133 |
dataset = concatenate_datasets([dataset, new_data])
|
| 134 |
dataset.push_to_hub(dataset_name)
|
| 135 |
print("Pushed updated dataset")
|
| 136 |
|
| 137 |
# Clear user's current image if the validation image has been annotated twice
|
| 138 |
+
if ("val" not in image_id) or (annotation_count + 1 >= 2):
|
| 139 |
session_data["current_image"] = None
|
| 140 |
|
| 141 |
# Fetch the next image
|
| 142 |
next_image = get_next_image(session_data)
|
| 143 |
if next_image:
|
| 144 |
next_caption = get_caption_for_image_id(os.path.basename(next_image)) # Retrieve the caption for the new image
|
| 145 |
+
print("Next image_id:", os.path.basename(next_image)) # Debugging line
|
| 146 |
return gr.update(value=next_image), gr.update(value=""), gr.update(value=next_caption or "")
|
| 147 |
else:
|
| 148 |
return gr.update(visible=False), gr.update(value="All images have been annotated!"), gr.update(value="")
|