Spaces:
Running
on
T4
Running
on
T4
| import gradio as gr | |
| from functools import partial | |
| from gradio_modal import Modal | |
| from data.lang2eng_map import lang2eng_mapping | |
| from data.words_map import words_mapping | |
| import datetime | |
| from pyuca import Collator | |
| def sort_with_pyuca(strings): | |
| collator = Collator() | |
| return sorted(strings, key=collator.sort_key) | |
| def build_main_page(concepts_dict, metadata_dict, local_storage): | |
| try: | |
| country, lang, _, _ = local_storage.value | |
| if not country and not lang: | |
| country, lang = "USA", "English" | |
| except (TypeError, ValueError): | |
| country, lang = "USA", "English" | |
| with gr.Column(visible=False, elem_id="main_page") as main_ui_placeholder: | |
| # Read the markdown file | |
| with open(metadata_dict["USA"]["English"]["Task"], "r", encoding="utf-8") as f: | |
| TASK_TEXT = f.read() | |
| with open(metadata_dict["USA"]["English"]["Instructions"], "r", encoding="utf-8") as f: | |
| INST_TEXT = f.read() | |
| gr.Markdown("# Wonders: Multicultural Images and Captions for Inclusive AI") | |
| gr.Markdown("Help us collect culturally rich images and captions to improve AI for everyone!") | |
| gr.Markdown("Choose a concept, upload an image showing it in a typical local situation, and write a detailed description about the image.<br> For more information, see the overviews below or click on 'Show instructions' for detailed guidelines.") | |
| gr.Markdown(""" \ | |
| ### Notes | |
| - By participating in this task, you agree to donate images and captions under [CC BY license](https://creativecommons.org/licenses/by/4.0/), which will allow us to share the data with others to improve AI. | |
| - You will be able to **update or remove** any provided data at any time if you wish so. | |
| - Participants that provide more data, across more concepts and of high quality (see "Instructions" button) will be **rewarded** (see TODO). | |
| """ | |
| ) | |
| # with gr.Accordion("π Show task overview", open=False): | |
| intro_text_inp = gr.Markdown(TASK_TEXT, visible=False) | |
| # gr.Markdown("## Data Collection") | |
| with gr.Row(equal_height=True): | |
| country_inp = gr.Textbox(label="Country", elem_id="country_inp", interactive=False) | |
| language_inp = gr.Textbox(label="Language", elem_id="language_inp", interactive=False) | |
| username_inp = gr.Textbox(label="email (optional)", type="email", elem_id="username_inp", interactive=False) | |
| password_inp = gr.Textbox(label="password (optional)", type="password", elem_id="password_inp", interactive=False) | |
| exit_btn = gr.Button("β¬ οΈ Change Language", elem_id="exit_btn", elem_classes=["compact-btn"]) | |
| with gr.Row(equal_height=True, ): | |
| with gr.Column(): | |
| # Main category and concept row - SINGLE SELECTION | |
| with gr.Group(): | |
| gr.Markdown("### Main Concept") | |
| gr.Markdown("""\ | |
| Select the main concept and category for which you want to share data.<br> | |
| If you want to add a new concept, you can just type it in the same box (after selecting its category). | |
| """ | |
| ) | |
| with gr.Row(): | |
| # Use concepts_dict to populate the dropdown | |
| categories = concepts_dict["USA"]["English"] | |
| category_btn = gr.Dropdown(choices=sort_with_pyuca(categories.keys()), interactive=True, label="Main Category", | |
| allow_custom_value=False, elem_id="category_btn", multiselect=False, value=None) | |
| concept_btn = gr.Dropdown(choices=[], interactive=True, label="Main Concept", | |
| allow_custom_value=True, elem_id="concept_btn", multiselect=False) | |
| with gr.Group(): | |
| gr.Markdown("### Image") | |
| with gr.Accordion("π An image of the main concept in typical contexts (click to read more)", open=False): | |
| gr.Markdown("""\= | |
| An image where the *main concept is clearly visible* | |
| - in typical contexts in your country, and | |
| - with other objects/people when possible | |
| Pictures taken by yourself are preferred. | |
| If you want to use an image from the Web, directly add its URL below. | |
| - Please use publicly-licensed images (e.g., from pexels.com or freepik.com) | |
| For more details, click on βShow instructionsβ below. | |
| """ | |
| ) | |
| image_url_inp = gr.Textbox(label="Image URL (Optional, if not uploading an image)", type="text", elem_id="image_url_inp") | |
| image_inp = gr.Image(label="Image", elem_id="image_inp", format="png", height=512, width=768) | |
| with gr.Row(): | |
| hide_all_faces_btn = gr.Button("π€ Hide All Faces", elem_id="hide_all_faces_btn") | |
| hide_faces_btn = gr.Button("π€ Hide Specific Faces", elem_id="hide_faces_btn") | |
| unhide_faces_btn = gr.Button("π Unhide Faces", elem_id="unhide_faces_btn") | |
| with gr.Column(): | |
| # short_caption_inp = gr.Textbox(lines=2, label="Short Description", elem_id="short_caption_inp") | |
| with gr.Group(): | |
| gr.Markdown("### Description") | |
| with gr.Accordion("π A detailed description (around 40 words covering who?, what? and where?) for the image (click to read more)", open=False): | |
| gr.Markdown("""\ | |
| A long image description (around 40 words) with extensive and detailed visual information.<br> | |
| Descriptions must be *objective*: focus on how you would describe the image to someone who canβt see it, without your own opinions/speculations. | |
| The text needs to *include the main concept* and describes the content of the image in detail by including: | |
| - **Who?** The visual *appearance* and observable *emotions* (e.g., βis smilingβ) of persons and animals. | |
| - **What?** The *actions* performed in the image. | |
| - **Where?** The *setting* of the image, including the size, color and relationships between objects. | |
| For more details, click on βShow instructionsβ below. | |
| """ | |
| ) | |
| long_caption_inp = gr.Textbox(lines=6, label="Description", elem_id="long_caption_inp") | |
| num_words_inp = gr.Textbox(lines=1, label="Number of words", elem_id="num_words", interactive=False, value=0) | |
| # num_words_inp = gr.Markdown("Number of words", elem_id="num_words") | |
| ######################################################### | |
| with Modal(visible=False, allow_user_close=False) as modal_vlm: | |
| question = gr.Markdown("Would you like to see if a VLM can generate a culturally aware description for your uploaded concept?") | |
| with gr.Row(): | |
| gen_button = gr.Button("Yes", variant="primary", elem_id="generate_answer_btn") | |
| vlm_no_btn = gr.Button("No") | |
| vlm_cancel_btn = gr.Button("Cancel") | |
| vlm_model_dropdown = gr.Dropdown( | |
| ["SmolVLM-500M", "Qwen2.5-VL-7B", "InternVL3_5-8B", "Gemma3-4B"], value="Gemma3-4B", multiselect=False, label="VLM Model", info="Select the VLM model to use for generating the description." | |
| ) | |
| vlm_output = gr.Textbox(lines=6, label="Generated description", elem_id="vlm_output", interactive=False) | |
| vlm_feedback = gr.Radio(["Yes π", "No π"], label="Do you think the generated description is accurate within the cultural context of your country?", visible=False, elem_id="vlm_feedback", interactive=True) | |
| vlm_done_btn = gr.Button("Complete Submission", visible=False) | |
| with Modal(visible=False, allow_user_close=False) as modal_submit: | |
| gr.Markdown("β οΈ You've already generated a caption for this image. An optional description with the VLM can only be generated once. Would you like to proceed and submit your modified data?") | |
| with gr.Row(): | |
| submit_yes = gr.Button("Yes", variant="primary", elem_id="submit_confirm_yes") | |
| submit_no = gr.Button("No", variant="stop", elem_id="submit_confirm_no") | |
| # with gr.Group(): | |
| # gr.Markdown("### VLM Generation (Optional)") | |
| # with gr.Accordion("π Click here if you want to get a generated answer from a small vlm", open=False): | |
| # gen_button = gr.Button("Generate Answer", variant="primary", elem_id="generate_answer_btn") | |
| # vlm_output = gr.Textbox(lines=6, label="Generated Answer", elem_id="vlm_output", interactive=False) | |
| # vlm_feedback = gr.Radio(["Yes π", "No π"], label="Do you like the generated caption?", visible=False, elem_id="vlm_feedback", interactive=True) | |
| ########################################################## | |
| categories_list = sort_with_pyuca(list(concepts_dict["USA"]["English"].keys())) | |
| def create_category_dropdown(category, index): | |
| original_category = category | |
| if lang in words_mapping: | |
| display_category = words_mapping[lang].get(original_category, original_category) | |
| else: | |
| display_category = original_category | |
| category_choices = concepts_dict[country][lang2eng_mapping.get(lang, lang)][original_category] | |
| sorted_choices = sort_with_pyuca(category_choices) | |
| dropdown = gr.Dropdown( | |
| choices=sorted_choices, | |
| interactive=True, | |
| label=display_category, | |
| allow_custom_value=True, | |
| elem_id=f"category_{index+1}_concepts_btn", | |
| multiselect=True, | |
| value=None | |
| ) | |
| return dropdown | |
| category_concept_dropdowns = [] | |
| with gr.Group(): | |
| gr.Markdown("### Additional concepts (optional)") | |
| gr.Markdown("Tag any other concept that is visible in the image.") | |
| # First row - categories 1 and 2 | |
| with gr.Row(): | |
| dropdown1 = create_category_dropdown(categories_list[0], 0) | |
| category_concept_dropdowns.append(dropdown1) | |
| dropdown2 = create_category_dropdown(categories_list[1], 1) | |
| category_concept_dropdowns.append(dropdown2) | |
| # Second row - categories 3 and 4 | |
| # with gr.Row(): | |
| dropdown3 = create_category_dropdown(categories_list[2], 2) | |
| category_concept_dropdowns.append(dropdown3) | |
| dropdown4 = create_category_dropdown(categories_list[3], 3) | |
| category_concept_dropdowns.append(dropdown4) | |
| dropdown5 = create_category_dropdown(categories_list[4], 4) | |
| category_concept_dropdowns.append(dropdown5) | |
| # Third row - category 5 and instructions button | |
| with gr.Row(equal_height=True): | |
| # dropdown5 = create_category_dropdown(categories_list[4], 4) | |
| # category_concept_dropdowns.append(dropdown5) | |
| instruct_btn = gr.Button("π Show Instructions") | |
| with Modal(visible=False) as modal: | |
| intro_text_inst_inp = gr.Markdown(INST_TEXT) | |
| with gr.Column(): | |
| # with gr.Row(): | |
| # instruct_btn = gr.Button("π Show Instructions") | |
| # with Modal(visible=False) as modal: | |
| # intro_text_inst_inp = gr.Markdown(INST_TEXT) | |
| with gr.Row(equal_height=True): | |
| clear_btn = gr.Button("Clear", variant="huggingface", elem_id="clear_btn") | |
| with Modal(visible=False, allow_user_close=False) as modal_saving: | |
| modal_saving_text = gr.Markdown("β³ Please wait while your submission is being saved.") | |
| with Modal(visible=False) as modal_data_saved: | |
| modal_data_saved_text = gr.Markdown("Your data has been saved successfully. The data in the table below will be updated shortly. You can now close this window.") | |
| submit_btn = gr.Button("Submit", variant="primary", interactive=False, elem_id="submit_btn") | |
| with Modal(visible=False) as modal_exclude_confirm: | |
| gr.Markdown("## Are you sure you want to exclude this example?") | |
| gr.Markdown("This action will permanently delete the example.") | |
| with gr.Row(): | |
| cancel_exclude_btn = gr.Button("Cancel") | |
| confirm_exclude_btn = gr.Button("Yes, delete", variant="stop") | |
| exclude_btn = gr.Button("Exclude Selected Example", variant="stop", visible=True) | |
| with gr.Column(): | |
| timestamp_btn = gr.Textbox(datetime.datetime.now(), label="Timestamp", visible=False, elem_id="timestamp_btn", interactive=False) # FIXME visible=False) | |
| exampleid_btn = gr.Textbox(label="ID", visible=False, elem_id="example_id", interactive=False) # FIXME visible=False) | |
| output_dict = { | |
| "main_ui_placeholder": main_ui_placeholder, | |
| "country_inp": country_inp, | |
| "language_inp": language_inp, | |
| "username_inp": username_inp, | |
| "password_inp": password_inp, | |
| "image_inp": image_inp, | |
| "image_url_inp": image_url_inp, | |
| "long_caption_inp": long_caption_inp, | |
| "num_words_inp": num_words_inp, | |
| "category_btn": category_btn, | |
| "concept_btn": concept_btn, | |
| "category_concept_dropdowns": category_concept_dropdowns, | |
| "category_1_concepts": category_concept_dropdowns[0], | |
| "category_2_concepts": category_concept_dropdowns[1], | |
| "category_3_concepts": category_concept_dropdowns[2], | |
| "category_4_concepts": category_concept_dropdowns[3], | |
| "category_5_concepts": category_concept_dropdowns[4], | |
| "instruct_btn": instruct_btn, | |
| "clear_btn": clear_btn, | |
| "submit_btn": submit_btn, | |
| "modal": modal, | |
| "modal_saving": modal_saving, | |
| "modal_data_saved": modal_data_saved, | |
| "timestamp_btn": timestamp_btn, | |
| "exampleid_btn": exampleid_btn, | |
| "exit_btn": exit_btn, | |
| "intro_text_inp": intro_text_inp, | |
| "intro_text_inst_inp": intro_text_inst_inp, | |
| "modal_saving_text": modal_saving_text, | |
| "modal_data_saved_text": modal_data_saved_text, | |
| "hide_faces_btn": hide_faces_btn, | |
| "hide_all_faces_btn": hide_all_faces_btn, | |
| "unhide_faces_btn": unhide_faces_btn, | |
| "exclude_btn": exclude_btn, | |
| "modal_exclude_confirm": modal_exclude_confirm, | |
| "cancel_exclude_btn": cancel_exclude_btn, | |
| "confirm_exclude_btn": confirm_exclude_btn, | |
| "vlm_output": vlm_output, | |
| "gen_button": gen_button, | |
| "vlm_feedback": vlm_feedback, | |
| "modal_vlm": modal_vlm, | |
| "vlm_no_btn": vlm_no_btn, | |
| "vlm_done_btn": vlm_done_btn, | |
| "submit_yes": submit_yes, | |
| "submit_no": submit_no, | |
| "modal_submit": modal_submit, | |
| "vlm_cancel_btn": vlm_cancel_btn, | |
| "vlm_model_dropdown": vlm_model_dropdown | |
| } | |
| return output_dict | |