GeorgeIbrahim commited on
Commit
33c417d
·
1 Parent(s): bd6fc64
Files changed (1) hide show
  1. app.py +13 -22
app.py CHANGED
@@ -15,10 +15,18 @@ else:
15
  print("HUGGINGFACE_TOKEN environment variable not set.")
16
  dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
17
 
18
- # Load or create the dataset with a new 'split' column
 
 
 
 
 
 
 
 
19
  try:
20
- dataset = load_dataset(dataset_name, split="train")
21
- print("Loaded existing dataset:", dataset)
22
 
23
  # Initialize annotation counts
24
  annotation_counts = {}
@@ -26,24 +34,12 @@ try:
26
  image_id = example["image_id"]
27
  count = example["annotation_count"]
28
  annotation_counts[image_id] = count
29
-
30
- print("Annotation counts:", annotation_counts)
31
  except Exception as e:
32
  print(f"Error loading dataset: {e}")
33
  # Create an empty dataset if it doesn't exist
34
- features = Features({
35
- 'image_id': Value(dtype='string'),
36
- 'caption': Value(dtype='string'),
37
- 'annotation_count': Value(dtype='int32'), # Add annotation count feature
38
- 'split': Value(dtype='string') # Add split column to mark as "dev" or "train"
39
- })
40
  dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
41
  annotation_counts = {}
42
- dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
43
-
44
- image_folder = "images"
45
- image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
46
- lock = threading.Lock()
47
 
48
  # Load the nearest neighbors JSON file
49
  with open('nearest_neighbors_with_captions.json', 'r') as f:
@@ -122,12 +118,7 @@ def save_annotation(caption, session_data):
122
  "caption": [caption],
123
  "annotation_count": [annotation_count + 1],
124
  "split": [split_type]
125
- }, features=Features({
126
- 'image_id': Value(dtype='string'),
127
- 'caption': Value(dtype='string'),
128
- 'annotation_count': Value(dtype='int32'),
129
- 'split': Value(dtype='string')
130
- }))
131
 
132
  annotation_counts[image_id] = annotation_count + 1
133
 
 
15
  print("HUGGINGFACE_TOKEN environment variable not set.")
16
  dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
17
 
18
+ # Define the updated features including the new 'split' column
19
+ features = Features({
20
+ 'image_id': Value(dtype='string'),
21
+ 'caption': Value(dtype='string'),
22
+ 'annotation_count': Value(dtype='int32'), # Annotation count
23
+ 'split': Value(dtype='string') # New 'split' column
24
+ })
25
+
26
+ # Load the dataset or create it if it doesn’t exist, with updated features
27
  try:
28
+ dataset = load_dataset(dataset_name, split="train").cast(features)
29
+ print("Loaded existing dataset with updated features:", dataset)
30
 
31
  # Initialize annotation counts
32
  annotation_counts = {}
 
34
  image_id = example["image_id"]
35
  count = example["annotation_count"]
36
  annotation_counts[image_id] = count
 
 
37
  except Exception as e:
38
  print(f"Error loading dataset: {e}")
39
  # Create an empty dataset if it doesn't exist
 
 
 
 
 
 
40
  dataset = Dataset.from_dict({'image_id': [], 'caption': [], 'annotation_count': [], 'split': []}, features=features)
41
  annotation_counts = {}
42
+ dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face if it's new
 
 
 
 
43
 
44
  # Load the nearest neighbors JSON file
45
  with open('nearest_neighbors_with_captions.json', 'r') as f:
 
118
  "caption": [caption],
119
  "annotation_count": [annotation_count + 1],
120
  "split": [split_type]
121
+ }, features=features)
 
 
 
 
 
122
 
123
  annotation_counts[image_id] = annotation_count + 1
124