Spaces:
Sleeping
Sleeping
aded the name of used vlm to the data samples and fix data structure
Browse files- logic/data_utils.py +4 -3
- logic/handlers.py +18 -11
- ui/layout.py +15 -16
logic/data_utils.py
CHANGED
|
@@ -70,7 +70,7 @@ class CustomHFDatasetSaver:
|
|
| 70 |
def _migrate_existing(self):
|
| 71 |
"""
|
| 72 |
Ensure all existing JSON sample files have the same schema
|
| 73 |
-
by adding missing keys for 'vlm_caption' and '
|
| 74 |
"""
|
| 75 |
for root, _, files in os.walk(self.local_ds_folder):
|
| 76 |
for fname in files:
|
|
@@ -79,7 +79,7 @@ class CustomHFDatasetSaver:
|
|
| 79 |
with open(fpath, 'r+', encoding='utf-8') as f:
|
| 80 |
data = json.load(f)
|
| 81 |
updated = False
|
| 82 |
-
for key in ['vlm_caption', 'vlm_feedback']:
|
| 83 |
if key not in data:
|
| 84 |
data[key] = ""
|
| 85 |
updated = True
|
|
@@ -248,7 +248,8 @@ class CustomHFDatasetSaver:
|
|
| 248 |
"image_url": values_dic['image_url'] or "",
|
| 249 |
"caption": values_dic['caption'] or "",
|
| 250 |
"vlm_caption": values_dic['vlm_caption'] or "",
|
| 251 |
-
"vlm_feedback": values_dic['vlm_feedback'] or "",
|
|
|
|
| 252 |
"country": values_dic['country'] or "",
|
| 253 |
"language": values_dic['language'] or "",
|
| 254 |
"category": values_dic['category'] or "",
|
|
|
|
| 70 |
def _migrate_existing(self):
|
| 71 |
"""
|
| 72 |
Ensure all existing JSON sample files have the same schema
|
| 73 |
+
by adding missing keys for 'vlm_caption', 'vlm_feedback', and 'vlm_model'.
|
| 74 |
"""
|
| 75 |
for root, _, files in os.walk(self.local_ds_folder):
|
| 76 |
for fname in files:
|
|
|
|
| 79 |
with open(fpath, 'r+', encoding='utf-8') as f:
|
| 80 |
data = json.load(f)
|
| 81 |
updated = False
|
| 82 |
+
for key in ['vlm_caption', 'vlm_feedback', 'vlm_model']:
|
| 83 |
if key not in data:
|
| 84 |
data[key] = ""
|
| 85 |
updated = True
|
|
|
|
| 248 |
"image_url": values_dic['image_url'] or "",
|
| 249 |
"caption": values_dic['caption'] or "",
|
| 250 |
"vlm_caption": values_dic['vlm_caption'] or "",
|
| 251 |
+
"vlm_feedback": values_dic['vlm_feedback'] or "" if values_dic['vlm_caption'] else "",
|
| 252 |
+
"vlm_model": values_dic['vlm_model'] or "" if values_dic['vlm_caption'] else "",
|
| 253 |
"country": values_dic['country'] or "",
|
| 254 |
"language": values_dic['language'] or "",
|
| 255 |
"category": values_dic['category'] or "",
|
logic/handlers.py
CHANGED
|
@@ -143,7 +143,7 @@ def generate_vlm_caption(image, model_name="SmolVLM-500M", timeout_seconds=120):
|
|
| 143 |
"""
|
| 144 |
if image is None:
|
| 145 |
gr.Warning("⚠️ Please upload an image first.", duration=5)
|
| 146 |
-
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
|
| 147 |
|
| 148 |
def _generate_caption_with_model():
|
| 149 |
"""Helper function to run caption generation in a separate thread."""
|
|
@@ -174,19 +174,19 @@ def generate_vlm_caption(image, model_name="SmolVLM-500M", timeout_seconds=120):
|
|
| 174 |
elapsed_time = time.time() - start_time
|
| 175 |
print(f"Caption generation timed out after {elapsed_time:.1f} seconds")
|
| 176 |
gr.Warning(f"⚠️ Caption generation timed out after {timeout_seconds} seconds. Please try again with a different model or smaller image.", duration=8)
|
| 177 |
-
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
|
| 178 |
|
| 179 |
except Exception as e:
|
| 180 |
print(f"Error generating caption: {e}. Try again later.")
|
| 181 |
gr.Warning(f"⚠️ Error generating caption: {e}. Please try again.", duration=5)
|
| 182 |
-
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
|
| 183 |
finally:
|
| 184 |
# For now, let's cleanup memory after each generation
|
| 185 |
vlm_manager.cleanup_memory()
|
| 186 |
|
| 187 |
# print(caption)
|
| 188 |
|
| 189 |
-
return caption, gr.update(visible=True), gr.update(visible=True), gr.update(interactive=False), gr.update(interactive=False)
|
| 190 |
|
| 191 |
def count_words(caption, language):
|
| 192 |
match language:
|
|
@@ -268,6 +268,7 @@ def update_user_data(client , country, language_choice, HF_DATASET_NAME, local_d
|
|
| 268 |
user_ids = set()
|
| 269 |
samples = []
|
| 270 |
vlm_captions = dict()
|
|
|
|
| 271 |
for d in user_dataset:
|
| 272 |
if d['id'] in user_ids:
|
| 273 |
continue
|
|
@@ -297,7 +298,7 @@ def update_user_data(client , country, language_choice, HF_DATASET_NAME, local_d
|
|
| 297 |
|
| 298 |
if 'vlm_caption' in d:
|
| 299 |
vlm_captions[d['id']] = d.get('vlm_caption', "")
|
| 300 |
-
|
| 301 |
# return gr.Dataset(samples=samples), None
|
| 302 |
# ───────────────────────────────────────────────────
|
| 303 |
# Clean up the “Additional Concepts” column (index 7)
|
|
@@ -327,11 +328,12 @@ def update_user_data(client , country, language_choice, HF_DATASET_NAME, local_d
|
|
| 327 |
# check if vlm_captions is an empty dictionary
|
| 328 |
if not vlm_captions:
|
| 329 |
vlm_captions = None
|
| 330 |
-
|
| 331 |
-
|
|
|
|
| 332 |
else:
|
| 333 |
# TODO: should we show the entire dataset instead? What about "other data" tab?
|
| 334 |
-
return gr.Dataset(samples=[]), None, None
|
| 335 |
|
| 336 |
|
| 337 |
def update_language(local_storage, metadata_dict, concepts_dict):
|
|
@@ -430,7 +432,7 @@ def update_intro_language(selected_country, selected_language, intro_markdown, m
|
|
| 430 |
return gr.Markdown(INTRO_TEXT)
|
| 431 |
|
| 432 |
|
| 433 |
-
def handle_click_example(user_examples, vlm_captions, concepts_dict):
|
| 434 |
# print("handle_click_example")
|
| 435 |
# print(user_examples)
|
| 436 |
# ex = [item for item in user_examples]
|
|
@@ -518,8 +520,13 @@ def handle_click_example(user_examples, vlm_captions, concepts_dict):
|
|
| 518 |
if vlm_captions:
|
| 519 |
if exampleid_btn in vlm_captions:
|
| 520 |
vlm_caption = vlm_captions[exampleid_btn]
|
| 521 |
-
|
| 522 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
# return [
|
| 524 |
# image_inp,
|
| 525 |
# image_url_inp,
|
|
|
|
| 143 |
"""
|
| 144 |
if image is None:
|
| 145 |
gr.Warning("⚠️ Please upload an image first.", duration=5)
|
| 146 |
+
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
|
| 147 |
|
| 148 |
def _generate_caption_with_model():
|
| 149 |
"""Helper function to run caption generation in a separate thread."""
|
|
|
|
| 174 |
elapsed_time = time.time() - start_time
|
| 175 |
print(f"Caption generation timed out after {elapsed_time:.1f} seconds")
|
| 176 |
gr.Warning(f"⚠️ Caption generation timed out after {timeout_seconds} seconds. Please try again with a different model or smaller image.", duration=8)
|
| 177 |
+
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
|
| 178 |
|
| 179 |
except Exception as e:
|
| 180 |
print(f"Error generating caption: {e}. Try again later.")
|
| 181 |
gr.Warning(f"⚠️ Error generating caption: {e}. Please try again.", duration=5)
|
| 182 |
+
return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
|
| 183 |
finally:
|
| 184 |
# For now, let's cleanup memory after each generation
|
| 185 |
vlm_manager.cleanup_memory()
|
| 186 |
|
| 187 |
# print(caption)
|
| 188 |
|
| 189 |
+
return caption, gr.update(visible=True), gr.update(visible=True), gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
|
| 190 |
|
| 191 |
def count_words(caption, language):
|
| 192 |
match language:
|
|
|
|
| 268 |
user_ids = set()
|
| 269 |
samples = []
|
| 270 |
vlm_captions = dict()
|
| 271 |
+
vlm_models = dict()
|
| 272 |
for d in user_dataset:
|
| 273 |
if d['id'] in user_ids:
|
| 274 |
continue
|
|
|
|
| 298 |
|
| 299 |
if 'vlm_caption' in d:
|
| 300 |
vlm_captions[d['id']] = d.get('vlm_caption', "")
|
| 301 |
+
vlm_models[d['id']] = d.get('vlm_model', "")
|
| 302 |
# return gr.Dataset(samples=samples), None
|
| 303 |
# ───────────────────────────────────────────────────
|
| 304 |
# Clean up the “Additional Concepts” column (index 7)
|
|
|
|
| 328 |
# check if vlm_captions is an empty dictionary
|
| 329 |
if not vlm_captions:
|
| 330 |
vlm_captions = None
|
| 331 |
+
if not vlm_models:
|
| 332 |
+
vlm_models = None
|
| 333 |
+
return gr.Dataset(samples=cleaned), None, vlm_captions, vlm_models
|
| 334 |
else:
|
| 335 |
# TODO: should we show the entire dataset instead? What about "other data" tab?
|
| 336 |
+
return gr.Dataset(samples=[]), None, None, None
|
| 337 |
|
| 338 |
|
| 339 |
def update_language(local_storage, metadata_dict, concepts_dict):
|
|
|
|
| 432 |
return gr.Markdown(INTRO_TEXT)
|
| 433 |
|
| 434 |
|
| 435 |
+
def handle_click_example(user_examples, vlm_captions, vlm_models, concepts_dict):
|
| 436 |
# print("handle_click_example")
|
| 437 |
# print(user_examples)
|
| 438 |
# ex = [item for item in user_examples]
|
|
|
|
| 520 |
if vlm_captions:
|
| 521 |
if exampleid_btn in vlm_captions:
|
| 522 |
vlm_caption = vlm_captions[exampleid_btn]
|
| 523 |
+
vlm_model = None
|
| 524 |
+
if vlm_models:
|
| 525 |
+
if exampleid_btn in vlm_models:
|
| 526 |
+
vlm_model = vlm_models[exampleid_btn]
|
| 527 |
+
if not vlm_model or vlm_model == "":
|
| 528 |
+
vlm_model = "SmolVLM-500M" # or get the default from the dropdown definition in main_page.py
|
| 529 |
+
return [image_inp, image_url_inp, long_caption_inp, exampleid_btn, category_btn, concept_btn] + additional_concepts_by_category + [True] + [vlm_caption] + [vlm_model]
|
| 530 |
# return [
|
| 531 |
# image_inp,
|
| 532 |
# image_url_inp,
|
ui/layout.py
CHANGED
|
@@ -325,8 +325,9 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 325 |
vlm_cancel_btn = cmp_main_ui["vlm_cancel_btn"]
|
| 326 |
vlm_model_dropdown = cmp_main_ui["vlm_model_dropdown"]
|
| 327 |
|
| 328 |
-
# dictionary to store
|
| 329 |
vlm_captions = gr.State(None)
|
|
|
|
| 330 |
### Category button
|
| 331 |
category_btn.change(
|
| 332 |
fn=partial(load_concepts, concepts=concepts_dict),
|
|
@@ -461,12 +462,12 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 461 |
# Handle clicking on an example
|
| 462 |
user_examples.click(
|
| 463 |
fn=partial(handle_click_example, concepts_dict=concepts_dict),
|
| 464 |
-
inputs=[user_examples, vlm_captions],
|
| 465 |
outputs=[
|
| 466 |
image_inp, image_url_inp, long_caption_inp, exampleid_btn,
|
| 467 |
category_btn, concept_btn,
|
| 468 |
category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
|
| 469 |
-
category_concept_dropdowns[3], category_concept_dropdowns[4], loading_example, vlm_output
|
| 470 |
],
|
| 471 |
)
|
| 472 |
|
|
@@ -530,7 +531,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 530 |
).then(
|
| 531 |
fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
|
| 532 |
inputs=[supabase_user_client, country_choice, language_choice],
|
| 533 |
-
outputs=[user_examples, loading_msg, vlm_captions],
|
| 534 |
)
|
| 535 |
|
| 536 |
|
|
@@ -587,7 +588,8 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 587 |
"client": supabase_user_client,
|
| 588 |
# "is_blurred": is_blurred
|
| 589 |
"vlm_caption": vlm_output,
|
| 590 |
-
"vlm_feedback": vlm_feedback
|
|
|
|
| 591 |
}
|
| 592 |
# data_outputs = [image_inp, image_url_inp, long_caption_inp,
|
| 593 |
# country_inp, language_inp, category_btn, concept_btn,
|
|
@@ -636,7 +638,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 636 |
).success(
|
| 637 |
fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
|
| 638 |
inputs=[supabase_user_client, country_choice, language_choice],
|
| 639 |
-
outputs=[user_examples, loading_msg, vlm_captions]
|
| 640 |
)
|
| 641 |
return e
|
| 642 |
|
|
@@ -676,19 +678,16 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 676 |
)
|
| 677 |
|
| 678 |
# Handle confirm exclusion
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
confirm_exclude_btn.click(
|
| 680 |
fn=lambda: gr.update(visible=False),
|
| 681 |
outputs=[modal_exclude_confirm]
|
| 682 |
).success(
|
| 683 |
fn=hf_writer.save,
|
| 684 |
-
inputs=
|
| 685 |
-
image_inp, image_url_inp, long_caption_inp, country_inp, language_inp,
|
| 686 |
-
category_btn, concept_btn,
|
| 687 |
-
category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
|
| 688 |
-
category_concept_dropdowns[3], category_concept_dropdowns[4],
|
| 689 |
-
timestamp_btn, username_inp, password_inp, exampleid_btn, gr.State(value=True),
|
| 690 |
-
gr.State(value=concepts_dict), gr.State(value=lang2eng_mapping), vlm_output, vlm_feedback
|
| 691 |
-
],
|
| 692 |
outputs=None
|
| 693 |
).success(
|
| 694 |
fn=partial(clear_data, "remove"),
|
|
@@ -707,7 +706,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 707 |
).success(
|
| 708 |
fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path=LOCAL_DS_DIRECTORY_PATH),
|
| 709 |
inputs=[supabase_user_client, country_choice, language_choice],
|
| 710 |
-
outputs=[user_examples, loading_msg, vlm_captions]
|
| 711 |
)
|
| 712 |
# ============================================= #
|
| 713 |
# VLM Gen button
|
|
@@ -715,7 +714,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
|
|
| 715 |
gen_button.click(
|
| 716 |
fn=generate_vlm_caption, # processor=processor, model=model
|
| 717 |
inputs=[image_inp, vlm_model_dropdown],
|
| 718 |
-
outputs=[vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button]
|
| 719 |
)
|
| 720 |
# vlm_output.change(
|
| 721 |
# fn=lambda : gr.update(interactive=False) if vlm_output.value else gr.update(interactive=True),
|
|
|
|
| 325 |
vlm_cancel_btn = cmp_main_ui["vlm_cancel_btn"]
|
| 326 |
vlm_model_dropdown = cmp_main_ui["vlm_model_dropdown"]
|
| 327 |
|
| 328 |
+
# dictionary to store ALL vlm_outputs and vlm_models by exampleid
|
| 329 |
vlm_captions = gr.State(None)
|
| 330 |
+
vlm_models = gr.State(None)
|
| 331 |
### Category button
|
| 332 |
category_btn.change(
|
| 333 |
fn=partial(load_concepts, concepts=concepts_dict),
|
|
|
|
| 462 |
# Handle clicking on an example
|
| 463 |
user_examples.click(
|
| 464 |
fn=partial(handle_click_example, concepts_dict=concepts_dict),
|
| 465 |
+
inputs=[user_examples, vlm_captions, vlm_models],
|
| 466 |
outputs=[
|
| 467 |
image_inp, image_url_inp, long_caption_inp, exampleid_btn,
|
| 468 |
category_btn, concept_btn,
|
| 469 |
category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
|
| 470 |
+
category_concept_dropdowns[3], category_concept_dropdowns[4], loading_example, vlm_output, vlm_model_dropdown
|
| 471 |
],
|
| 472 |
)
|
| 473 |
|
|
|
|
| 531 |
).then(
|
| 532 |
fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
|
| 533 |
inputs=[supabase_user_client, country_choice, language_choice],
|
| 534 |
+
outputs=[user_examples, loading_msg, vlm_captions, vlm_models],
|
| 535 |
)
|
| 536 |
|
| 537 |
|
|
|
|
| 588 |
"client": supabase_user_client,
|
| 589 |
# "is_blurred": is_blurred
|
| 590 |
"vlm_caption": vlm_output,
|
| 591 |
+
"vlm_feedback": vlm_feedback,
|
| 592 |
+
"vlm_model": vlm_model_dropdown # selected vlm model
|
| 593 |
}
|
| 594 |
# data_outputs = [image_inp, image_url_inp, long_caption_inp,
|
| 595 |
# country_inp, language_inp, category_btn, concept_btn,
|
|
|
|
| 638 |
).success(
|
| 639 |
fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
|
| 640 |
inputs=[supabase_user_client, country_choice, language_choice],
|
| 641 |
+
outputs=[user_examples, loading_msg, vlm_captions, vlm_models]
|
| 642 |
)
|
| 643 |
return e
|
| 644 |
|
|
|
|
| 678 |
)
|
| 679 |
|
| 680 |
# Handle confirm exclusion
|
| 681 |
+
# Create a modified data_outputs for exclusion with excluded=True
|
| 682 |
+
exclude_data_outputs = data_outputs.copy()
|
| 683 |
+
exclude_data_outputs["excluded"] = gr.State(value=True)
|
| 684 |
+
|
| 685 |
confirm_exclude_btn.click(
|
| 686 |
fn=lambda: gr.update(visible=False),
|
| 687 |
outputs=[modal_exclude_confirm]
|
| 688 |
).success(
|
| 689 |
fn=hf_writer.save,
|
| 690 |
+
inputs=list(exclude_data_outputs.values()),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
outputs=None
|
| 692 |
).success(
|
| 693 |
fn=partial(clear_data, "remove"),
|
|
|
|
| 706 |
).success(
|
| 707 |
fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path=LOCAL_DS_DIRECTORY_PATH),
|
| 708 |
inputs=[supabase_user_client, country_choice, language_choice],
|
| 709 |
+
outputs=[user_examples, loading_msg, vlm_captions, vlm_models]
|
| 710 |
)
|
| 711 |
# ============================================= #
|
| 712 |
# VLM Gen button
|
|
|
|
| 714 |
gen_button.click(
|
| 715 |
fn=generate_vlm_caption, # processor=processor, model=model
|
| 716 |
inputs=[image_inp, vlm_model_dropdown],
|
| 717 |
+
outputs=[vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button, vlm_model_dropdown]
|
| 718 |
)
|
| 719 |
# vlm_output.change(
|
| 720 |
# fn=lambda : gr.update(interactive=False) if vlm_output.value else gr.update(interactive=True),
|