GeorgeIbrahim commited on
Commit
7de933f
·
1 Parent(s): 8be8093
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -15,6 +15,9 @@ else:
15
  print("HUGGINGFACE_TOKEN environment variable not set.")
16
  dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
17
 
 
 
 
18
  # Load or create the dataset
19
  try:
20
  dataset = load_dataset(dataset_name, split="train")
@@ -25,6 +28,7 @@ try:
25
  for example in dataset:
26
  image_id = example["image_id"]
27
  count = example["annotation_count"]
 
28
  if image_id not in annotation_counts or count > annotation_counts[image_id]:
29
  annotation_counts[image_id] = count
30
 
@@ -35,9 +39,10 @@ except Exception as e:
35
  features = Features({
36
  'image_id': Value(dtype='string'),
37
  'caption': Value(dtype='string'),
38
- 'annotation_count': Value(dtype='int32') # Add annotation count feature
 
39
  })
40
- dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': []}, features=features)
41
  annotation_counts = {}
42
  dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
43
 
@@ -45,9 +50,6 @@ image_folder = "images"
45
  image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
46
  lock = threading.Lock()
47
 
48
- with open('nearest_neighbors_with_captions.json', 'r') as f:
49
- results = json.load(f)
50
-
51
 
52
  def get_caption_for_image_id(image_path):
53
  """
@@ -107,7 +109,7 @@ def save_annotation(caption, session_data):
107
 
108
  with lock:
109
  image_id = session_data["current_image"]
110
-
111
  # Save caption or "skipped" based on user input
112
  if caption.strip().lower() == "skip":
113
  caption = "skipped"
@@ -119,11 +121,13 @@ def save_annotation(caption, session_data):
119
  new_data = Dataset.from_dict({
120
  "image_id": [image_id],
121
  "caption": [caption],
122
- "annotation_count": [annotation_count + 1] # Increment the annotation count
 
123
  }, features=Features({
124
  'image_id': Value(dtype='string'),
125
  'caption': Value(dtype='string'),
126
- 'annotation_count': Value(dtype='int32') # Ensure int32 type
 
127
  }))
128
 
129
  # Update the annotation count in the dictionary
 
15
  print("HUGGINGFACE_TOKEN environment variable not set.")
16
  dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
17
 
18
+ with open('nearest_neighbors_with_captions.json', 'r') as f:
19
+ results = json.load(f)
20
+
21
  # Load or create the dataset
22
  try:
23
  dataset = load_dataset(dataset_name, split="train")
 
28
  for example in dataset:
29
  image_id = example["image_id"]
30
  count = example["annotation_count"]
31
+
32
  if image_id not in annotation_counts or count > annotation_counts[image_id]:
33
  annotation_counts[image_id] = count
34
 
 
39
  features = Features({
40
  'image_id': Value(dtype='string'),
41
  'caption': Value(dtype='string'),
42
+ 'annotation_count': Value(dtype='int32'),
43
+ 'split': Value(dtype='string')
44
  })
45
+ dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
46
  annotation_counts = {}
47
  dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
48
 
 
50
  image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
51
  lock = threading.Lock()
52
 
 
 
 
53
 
54
  def get_caption_for_image_id(image_path):
55
  """
 
109
 
110
  with lock:
111
  image_id = session_data["current_image"]
112
+ split = "dev" if image_id in results else "train"
113
  # Save caption or "skipped" based on user input
114
  if caption.strip().lower() == "skip":
115
  caption = "skipped"
 
121
  new_data = Dataset.from_dict({
122
  "image_id": [image_id],
123
  "caption": [caption],
124
+ "annotation_count": [annotation_count + 1],
125
+ "split": [split]
126
  }, features=Features({
127
  'image_id': Value(dtype='string'),
128
  'caption': Value(dtype='string'),
129
+ 'annotation_count': Value(dtype='int32'),
130
+ 'split': Value(dtype='string')
131
  }))
132
 
133
  # Update the annotation count in the dictionary