GeorgeSherif commited on
Commit
1fbc4e1
·
0 Parent(s):

Initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.idea/Data_Collection.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.10" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Data_Collection.iml" filepath="$PROJECT_DIR$/.idea/Data_Collection.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
.idea/workspace.xml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="e2d8b51e-654e-4e4e-82ee-2e39a776bb7a" name="Changes" comment="" />
5
+ <option name="SHOW_DIALOG" value="false" />
6
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
7
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
8
+ <option name="LAST_RESOLUTION" value="IGNORE" />
9
+ </component>
10
+ <component name="FileTemplateManagerImpl">
11
+ <option name="RECENT_TEMPLATES">
12
+ <list>
13
+ <option value="Python Script" />
14
+ </list>
15
+ </option>
16
+ </component>
17
+ <component name="Git.Settings">
18
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
19
+ </component>
20
+ <component name="ProjectColorInfo"><![CDATA[{
21
+ "associatedIndex": 4
22
+ }]]></component>
23
+ <component name="ProjectId" id="2oTSyCnM0cuZAAEVnfBnGJXqf8S" />
24
+ <component name="ProjectViewState">
25
+ <option name="hideEmptyMiddlePackages" value="true" />
26
+ <option name="showLibraryContents" value="true" />
27
+ </component>
28
+ <component name="PropertiesComponent"><![CDATA[{
29
+ "keyToString": {
30
+ "RunOnceActivity.ShowReadmeOnStart": "true",
31
+ "git-widget-placeholder": "main",
32
+ "last_opened_file_path": "C:/Users/Owner/PycharmProjects"
33
+ }
34
+ }]]></component>
35
+ <component name="SharedIndexes">
36
+ <attachedChunks>
37
+ <set>
38
+ <option value="bundled-python-sdk-d7ad00fb9fc3-c546a90a8094-com.jetbrains.pycharm.community.sharedIndexes.bundled-PC-242.23726.102" />
39
+ </set>
40
+ </attachedChunks>
41
+ </component>
42
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
43
+ <component name="TaskManager">
44
+ <task active="true" id="Default" summary="Default task">
45
+ <changelist id="e2d8b51e-654e-4e4e-82ee-2e39a776bb7a" name="Changes" comment="" />
46
+ <created>1730892325122</created>
47
+ <option name="number" value="Default" />
48
+ <option name="presentableId" value="Default" />
49
+ <updated>1730892325122</updated>
50
+ </task>
51
+ <servers />
52
+ </component>
53
+ </project>
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Data Collection
3
+ emoji: 🌖
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 5.5.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import threading
4
+ import random
5
+ from datasets import load_dataset, Dataset, Features, Value
6
+ from huggingface_hub import login
7
+ token = os.getenv("HUGGINGFACE_TOKEN")
8
+ if token:
9
+ login(token=token)
10
+ else:
11
+ print("HUGGINGFACE_TOKEN environment variable not set.")
12
+
13
+ # Set up your Hugging Face dataset name
14
+ dataset_name = "GeorgeIbrahim/EGYCOCO" # Replace with your dataset name
15
+
16
+ # Load or create the dataset
17
+ try:
18
+ dataset = load_dataset(dataset_name)
19
+ except Exception as e:
20
+ # Create an empty dataset if it doesn't exist
21
+ features = Features({
22
+ 'image_id': Value(dtype='string'),
23
+ 'caption': Value(dtype='string'),
24
+ })
25
+ dataset = Dataset.from_dict({'image_id': [], 'caption': []}, features=features)
26
+ dataset.push_to_hub(dataset_name) # Push the empty dataset to Hugging Face
27
+
28
+ image_folder = "test"
29
+ image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
30
+ lock = threading.Lock()
31
+
32
+ # Function to get a random image that hasn’t been annotated or skipped
33
+ def get_next_image():
34
+ with lock:
35
+ available_images = [img for img in image_files]
36
+ if available_images:
37
+ return os.path.join(image_folder, random.choice(available_images))
38
+ return None
39
+
40
+ # Function to save the annotation to Hugging Face dataset and fetch the next image
41
+ def save_annotation(caption):
42
+ global current_image
43
+ if current_image is None:
44
+ return gr.update(visible=False), gr.update(value="All images have been annotated!")
45
+
46
+ with lock:
47
+ image_id = os.path.basename(current_image)
48
+
49
+ # Save caption or "skipped" based on user input
50
+ if caption.strip().lower() == "skip":
51
+ caption = "skipped"
52
+
53
+ # Add the new annotation to the dataset
54
+ new_data = {"image_id": image_id, "caption": caption}
55
+ dataset = dataset.add_item(new_data)
56
+
57
+ # Save updated dataset to Hugging Face
58
+ dataset.push_to_hub(dataset_name)
59
+
60
+ # Fetch the next image
61
+ current_image = get_next_image()
62
+ if current_image:
63
+ return gr.update(value=current_image), gr.update(value="")
64
+ else:
65
+ return gr.update(visible=False), gr.update(value="All images have been annotated!")
66
+
67
+ # Function to skip the current image
68
+ def skip_image():
69
+ global current_image
70
+ if current_image is None:
71
+ return gr.update(visible=False), gr.update(value="All images have been annotated!")
72
+
73
+ with lock:
74
+ image_id = os.path.basename(current_image)
75
+ # Record "skipped" caption directly here
76
+ new_data = {"image_id": image_id, "caption": "skipped"}
77
+ dataset = dataset.add_item(new_data)
78
+
79
+ # Save updated dataset to Hugging Face
80
+ dataset.push_to_hub(dataset_name)
81
+
82
+ # Fetch the next image
83
+ current_image = get_next_image()
84
+ if current_image:
85
+ return gr.update(value=current_image), gr.update(value="")
86
+ else:
87
+ return gr.update(visible=False), gr.update(value="All images have been annotated!")
88
+
89
+ # Function to initialize the interface
90
+ def initialize_interface():
91
+ global current_image
92
+ current_image = get_next_image()
93
+ if current_image:
94
+ return gr.update(value=current_image), gr.update(value="")
95
+ else:
96
+ return gr.update(visible=False), gr.update(value="All images have been annotated!")
97
+
98
+ # Build the Gradio interface
99
+ with gr.Blocks() as demo:
100
+ gr.Markdown("# Image Captioning Tool")
101
+ gr.Markdown("Please provide a caption for each image displayed. Click 'Submit' after writing your caption, or type 'skip' if you don’t want to annotate this image.")
102
+
103
+ with gr.Row():
104
+ image = gr.Image()
105
+ caption = gr.Textbox(placeholder="Enter caption here...")
106
+ submit = gr.Button("Submit")
107
+ skip = gr.Button("Skip") # Skip button
108
+
109
+ # Define actions for buttons
110
+ submit.click(fn=save_annotation, inputs=caption, outputs=[image, caption])
111
+ skip.click(fn=skip_image, inputs=None, outputs=[image, caption])
112
+
113
+ # Load initial image
114
+ demo.load(fn=initialize_interface, inputs=None, outputs=[image, caption])
115
+
116
+ demo.launch(share=True)
test/00a0b916fd5941a3.jpg ADDED
test/00a72fa141918070.jpg ADDED
test/00c9616a917be867.jpg ADDED