Spaces:

team-wonders
/

internal-v0

Sleeping

App Files Files Community

carlosh93 commited on Oct 17

Commit

d76c46c

1 Parent(s): 01c257b

aded the name of used vlm to the data samples and fix data structure

Browse files

Files changed (3) hide show

logic/data_utils.py +4 -3
logic/handlers.py +18 -11
ui/layout.py +15 -16

logic/data_utils.py CHANGED Viewed

@@ -70,7 +70,7 @@ class CustomHFDatasetSaver:
     def _migrate_existing(self):
         """
         Ensure all existing JSON sample files have the same schema
-        by adding missing keys for 'vlm_caption' and 'vlm_feedback'.
         """
         for root, _, files in os.walk(self.local_ds_folder):
             for fname in files:
@@ -79,7 +79,7 @@ class CustomHFDatasetSaver:
                     with open(fpath, 'r+', encoding='utf-8') as f:
                         data = json.load(f)
                         updated = False
-                        for key in ['vlm_caption', 'vlm_feedback']:
                             if key not in data:
                                 data[key] = ""
                                 updated = True
@@ -248,7 +248,8 @@ class CustomHFDatasetSaver:
             "image_url": values_dic['image_url'] or "",
             "caption": values_dic['caption'] or "",
             "vlm_caption": values_dic['vlm_caption'] or "",
-            "vlm_feedback": values_dic['vlm_feedback'] or "",
             "country": values_dic['country'] or "",
             "language": values_dic['language'] or "",
             "category": values_dic['category'] or "",

     def _migrate_existing(self):
         """
         Ensure all existing JSON sample files have the same schema
+        by adding missing keys for 'vlm_caption', 'vlm_feedback', and 'vlm_model'.
         """
         for root, _, files in os.walk(self.local_ds_folder):
             for fname in files:
                     with open(fpath, 'r+', encoding='utf-8') as f:
                         data = json.load(f)
                         updated = False
+                        for key in ['vlm_caption', 'vlm_feedback', 'vlm_model']:
                             if key not in data:
                                 data[key] = ""
                                 updated = True
             "image_url": values_dic['image_url'] or "",
             "caption": values_dic['caption'] or "",
             "vlm_caption": values_dic['vlm_caption'] or "",
+            "vlm_feedback": values_dic['vlm_feedback'] or "" if values_dic['vlm_caption'] else "",
+            "vlm_model": values_dic['vlm_model'] or "" if values_dic['vlm_caption'] else "",
             "country": values_dic['country'] or "",
             "language": values_dic['language'] or "",
             "category": values_dic['category'] or "",

logic/handlers.py CHANGED Viewed

@@ -143,7 +143,7 @@ def generate_vlm_caption(image, model_name="SmolVLM-500M", timeout_seconds=120):
     """
     if image is None:
         gr.Warning("⚠️ Please upload an image first.", duration=5)
-        return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
     def _generate_caption_with_model():
         """Helper function to run caption generation in a separate thread."""
@@ -174,19 +174,19 @@ def generate_vlm_caption(image, model_name="SmolVLM-500M", timeout_seconds=120):
                 elapsed_time = time.time() - start_time
                 print(f"Caption generation timed out after {elapsed_time:.1f} seconds")
                 gr.Warning(f"⚠️ Caption generation timed out after {timeout_seconds} seconds. Please try again with a different model or smaller image.", duration=8)
-                return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
     except Exception as e:
         print(f"Error generating caption: {e}. Try again later.")
         gr.Warning(f"⚠️ Error generating caption: {e}. Please try again.", duration=5)
-        return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True)
     finally:
         # For now, let's cleanup memory after each generation
         vlm_manager.cleanup_memory()
     # print(caption)
-    return caption, gr.update(visible=True), gr.update(visible=True), gr.update(interactive=False), gr.update(interactive=False)
 def count_words(caption, language):
     match language:
@@ -268,6 +268,7 @@ def update_user_data(client , country, language_choice, HF_DATASET_NAME, local_d
         user_ids = set()
         samples = []
         vlm_captions = dict()
         for d in user_dataset:
             if d['id'] in user_ids:
                 continue
@@ -297,7 +298,7 @@ def update_user_data(client , country, language_choice, HF_DATASET_NAME, local_d
             if 'vlm_caption' in d:
                 vlm_captions[d['id']] = d.get('vlm_caption', "")
         # return gr.Dataset(samples=samples), None
         # ───────────────────────────────────────────────────
         # Clean up the “Additional Concepts” column (index 7)
@@ -327,11 +328,12 @@ def update_user_data(client , country, language_choice, HF_DATASET_NAME, local_d
         # check if vlm_captions is an empty dictionary
         if not vlm_captions:
             vlm_captions = None
-        return gr.Dataset(samples=cleaned), None, vlm_captions
     else:
         # TODO: should we show the entire dataset instead? What about "other data" tab?
-        return gr.Dataset(samples=[]), None, None
 def update_language(local_storage, metadata_dict, concepts_dict):
@@ -430,7 +432,7 @@ def update_intro_language(selected_country, selected_language, intro_markdown, m
     return gr.Markdown(INTRO_TEXT)
-def handle_click_example(user_examples, vlm_captions, concepts_dict):
     # print("handle_click_example")
     # print(user_examples)
     # ex = [item for item in user_examples]
@@ -518,8 +520,13 @@ def handle_click_example(user_examples, vlm_captions, concepts_dict):
     if vlm_captions:
         if exampleid_btn in vlm_captions:
             vlm_caption = vlm_captions[exampleid_btn]
-    return [image_inp, image_url_inp, long_caption_inp, exampleid_btn, category_btn, concept_btn] + additional_concepts_by_category + [True] + [vlm_caption]  # loading_example flag + vlm_caption
     # return [
     #     image_inp,
     #     image_url_inp,

     """
     if image is None:
         gr.Warning("⚠️ Please upload an image first.", duration=5)
+        return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
     def _generate_caption_with_model():
         """Helper function to run caption generation in a separate thread."""
                 elapsed_time = time.time() - start_time
                 print(f"Caption generation timed out after {elapsed_time:.1f} seconds")
                 gr.Warning(f"⚠️ Caption generation timed out after {timeout_seconds} seconds. Please try again with a different model or smaller image.", duration=8)
+                return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
     except Exception as e:
         print(f"Error generating caption: {e}. Try again later.")
         gr.Warning(f"⚠️ Error generating caption: {e}. Please try again.", duration=5)
+        return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
     finally:
         # For now, let's cleanup memory after each generation
         vlm_manager.cleanup_memory()
     # print(caption)
+    return caption, gr.update(visible=True), gr.update(visible=True), gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
 def count_words(caption, language):
     match language:
         user_ids = set()
         samples = []
         vlm_captions = dict()
+        vlm_models = dict()
         for d in user_dataset:
             if d['id'] in user_ids:
                 continue
             if 'vlm_caption' in d:
                 vlm_captions[d['id']] = d.get('vlm_caption', "")
+                vlm_models[d['id']] = d.get('vlm_model', "")
         # return gr.Dataset(samples=samples), None
         # ───────────────────────────────────────────────────
         # Clean up the “Additional Concepts” column (index 7)
         # check if vlm_captions is an empty dictionary
         if not vlm_captions:
             vlm_captions = None
+        if not vlm_models:
+            vlm_models = None
+        return gr.Dataset(samples=cleaned), None, vlm_captions, vlm_models
     else:
         # TODO: should we show the entire dataset instead? What about "other data" tab?
+        return gr.Dataset(samples=[]), None, None, None
 def update_language(local_storage, metadata_dict, concepts_dict):
     return gr.Markdown(INTRO_TEXT)
+def handle_click_example(user_examples, vlm_captions, vlm_models, concepts_dict):
     # print("handle_click_example")
     # print(user_examples)
     # ex = [item for item in user_examples]
     if vlm_captions:
         if exampleid_btn in vlm_captions:
             vlm_caption = vlm_captions[exampleid_btn]
+    vlm_model = None
+    if vlm_models:
+        if exampleid_btn in vlm_models:
+            vlm_model = vlm_models[exampleid_btn]
+    if not vlm_model or vlm_model == "":
+        vlm_model = "SmolVLM-500M"  # or get the default from the dropdown definition in main_page.py
+    return [image_inp, image_url_inp, long_caption_inp, exampleid_btn, category_btn, concept_btn] + additional_concepts_by_category + [True] + [vlm_caption] + [vlm_model]
     # return [
     #     image_inp,
     #     image_url_inp,

ui/layout.py CHANGED Viewed

@@ -325,8 +325,9 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         vlm_cancel_btn = cmp_main_ui["vlm_cancel_btn"]
         vlm_model_dropdown = cmp_main_ui["vlm_model_dropdown"]
-        # dictionary to store all vlm_output by exampleid
         vlm_captions = gr.State(None)
         ### Category button
         category_btn.change(
             fn=partial(load_concepts, concepts=concepts_dict),
@@ -461,12 +462,12 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
                         # Handle clicking on an example
                         user_examples.click(
                             fn=partial(handle_click_example, concepts_dict=concepts_dict),
-                            inputs=[user_examples, vlm_captions],
                             outputs=[
                                 image_inp, image_url_inp, long_caption_inp, exampleid_btn,
                                 category_btn, concept_btn,
                                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
-                                category_concept_dropdowns[3], category_concept_dropdowns[4], loading_example, vlm_output
                             ],
                         )
@@ -530,7 +531,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         ).then(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
             inputs=[supabase_user_client, country_choice, language_choice],
-            outputs=[user_examples, loading_msg, vlm_captions],
         )
@@ -587,7 +588,8 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
             "client": supabase_user_client,
             # "is_blurred": is_blurred
             "vlm_caption": vlm_output,
-            "vlm_feedback": vlm_feedback
         }
         # data_outputs = [image_inp, image_url_inp, long_caption_inp,
         #         country_inp, language_inp, category_btn, concept_btn,
@@ -636,7 +638,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
             ).success(
                 fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
                 inputs=[supabase_user_client, country_choice, language_choice],
-                outputs=[user_examples, loading_msg, vlm_captions]
             )
             return e
@@ -676,19 +678,16 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         )
         # Handle confirm exclusion
         confirm_exclude_btn.click(
             fn=lambda: gr.update(visible=False),
             outputs=[modal_exclude_confirm]
         ).success(
             fn=hf_writer.save,
-            inputs=[
-                image_inp, image_url_inp, long_caption_inp, country_inp, language_inp,
-                category_btn, concept_btn,
-                category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
-                category_concept_dropdowns[3], category_concept_dropdowns[4],
-                timestamp_btn, username_inp, password_inp, exampleid_btn, gr.State(value=True),
-                gr.State(value=concepts_dict), gr.State(value=lang2eng_mapping), vlm_output, vlm_feedback
-            ],
             outputs=None
         ).success(
             fn=partial(clear_data, "remove"),
@@ -707,7 +706,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         ).success(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path=LOCAL_DS_DIRECTORY_PATH),
             inputs=[supabase_user_client, country_choice, language_choice],
-            outputs=[user_examples, loading_msg, vlm_captions]
         )
         # ============================================= #
         # VLM Gen button
@@ -715,7 +714,7 @@ def build_ui(concepts_dict, metadata_dict, HF_API_TOKEN, HF_DATASET_NAME):
         gen_button.click(
             fn=generate_vlm_caption, # processor=processor, model=model
             inputs=[image_inp, vlm_model_dropdown],
-            outputs=[vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button]
         )
         # vlm_output.change(
         #     fn=lambda : gr.update(interactive=False) if vlm_output.value else gr.update(interactive=True),

         vlm_cancel_btn = cmp_main_ui["vlm_cancel_btn"]
         vlm_model_dropdown = cmp_main_ui["vlm_model_dropdown"]
+        # dictionary to store ALL vlm_outputs and vlm_models by exampleid
         vlm_captions = gr.State(None)
+        vlm_models = gr.State(None)
         ### Category button
         category_btn.change(
             fn=partial(load_concepts, concepts=concepts_dict),
                         # Handle clicking on an example
                         user_examples.click(
                             fn=partial(handle_click_example, concepts_dict=concepts_dict),
+                            inputs=[user_examples, vlm_captions, vlm_models],
                             outputs=[
                                 image_inp, image_url_inp, long_caption_inp, exampleid_btn,
                                 category_btn, concept_btn,
                                 category_concept_dropdowns[0], category_concept_dropdowns[1], category_concept_dropdowns[2],
+                                category_concept_dropdowns[3], category_concept_dropdowns[4], loading_example, vlm_output, vlm_model_dropdown
                             ],
                         )
         ).then(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
             inputs=[supabase_user_client, country_choice, language_choice],
+            outputs=[user_examples, loading_msg, vlm_captions, vlm_models],
         )
             "client": supabase_user_client,
             # "is_blurred": is_blurred
             "vlm_caption": vlm_output,
+            "vlm_feedback": vlm_feedback,
+            "vlm_model": vlm_model_dropdown # selected vlm model
         }
         # data_outputs = [image_inp, image_url_inp, long_caption_inp,
         #         country_inp, language_inp, category_btn, concept_btn,
             ).success(
                 fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path = LOCAL_DS_DIRECTORY_PATH),
                 inputs=[supabase_user_client, country_choice, language_choice],
+                outputs=[user_examples, loading_msg, vlm_captions, vlm_models]
             )
             return e
         )
         # Handle confirm exclusion
+        # Create a modified data_outputs for exclusion with excluded=True
+        exclude_data_outputs = data_outputs.copy()
+        exclude_data_outputs["excluded"] = gr.State(value=True)
         confirm_exclude_btn.click(
             fn=lambda: gr.update(visible=False),
             outputs=[modal_exclude_confirm]
         ).success(
             fn=hf_writer.save,
+            inputs=list(exclude_data_outputs.values()),
             outputs=None
         ).success(
             fn=partial(clear_data, "remove"),
         ).success(
             fn=partial(update_user_data, HF_DATASET_NAME=HF_DATASET_NAME, local_ds_directory_path=LOCAL_DS_DIRECTORY_PATH),
             inputs=[supabase_user_client, country_choice, language_choice],
+            outputs=[user_examples, loading_msg, vlm_captions, vlm_models]
         )
         # ============================================= #
         # VLM Gen button
         gen_button.click(
             fn=generate_vlm_caption, # processor=processor, model=model
             inputs=[image_inp, vlm_model_dropdown],
+            outputs=[vlm_output, vlm_feedback, vlm_done_btn, vlm_no_btn, gen_button, vlm_model_dropdown]
         )
         # vlm_output.change(
         #     fn=lambda : gr.update(interactive=False) if vlm_output.value else gr.update(interactive=True),