Spaces:

GEM
/

DatasetCardForm

Runtime error

App Files Files Community

Sebastian Gehrmann commited on Nov 16, 2021

Commit

13fd677

1 Parent(s): 396d1e7

considerations

Browse files

Files changed (3) hide show

datacards/considerations.py +88 -4
datacards/curation.py +9 -9
datacards/overview.py +3 -3

datacards/considerations.py CHANGED Viewed

@@ -1,13 +1,97 @@
 import streamlit as st
-from .streamlit_utils import make_text_input
-N_FIELDS = 1
 def considerations_page():
-    return None
 def considerations_summary():
-    return None

 import streamlit as st
+from .streamlit_utils import (
+    make_multiselect,
+    make_selectbox,
+    make_text_area,
+    make_text_input,
+    make_radio,
+)
+N_FIELDS_PII = 3
+N_FIELDS_LICENSES = 3
+N_FIELDS_LIMITATIONS = 4
+N_FIELDS = N_FIELDS_PII + N_FIELDS_LICENSES + N_FIELDS_LIMITATIONS
 def considerations_page():
+    st.session_state.card_dict["considerations"] = st.session_state.card_dict.get(
+        "considerations", {}
+    )
+    with st.expander("PII Risks and Liability", expanded=False):
+        key_pref = ["considerations", "pii"]
+        st.session_state.card_dict["considerations"]["pii"] = st.session_state.card_dict[
+            "considerations"
+        ].get("pii", {})
+        # TODO: cross-link this section with curation.
+    with st.expander("Licenses", expanded=False):
+        key_pref = ["considerations", "licenses"]
+        st.session_state.card_dict["considerations"]["licenses"] = st.session_state.card_dict[
+            "considerations"
+        ].get("licenses", {})
+        # TODO: cross-link the first question with overview.py.
+        make_text_input(
+            label="Can the dataset be used for research and/or commercial purposes?",
+            key_list=key_pref + ["data-restrictions"],
+            help="Describe any restrictions put on how the data can be used.",
+        )
+        make_radio(
+            label="Are thre restrictions on the underlying data?",
+            options=["Open", "Non-Commercial", "Copyrighted", "Other"],
+            key_list=key_pref + ["data-copyright"],
+            help="Are there restructions on the underlying data?",
+        )
+    with st.expander("Known limitations", expanded=False):
+        key_pref = ["considerations", "limitations"]
+        st.session_state.card_dict["considerations"]["limitations"] = st.session_state.card_dict[
+            "considerations"
+        ].get("limitations", {})
+        # TODO: Form proper language
+        make_text_area(
+            label="Technical limitations, annotation noise, etc.",
+            key_list=key_pref + ["data-technical-limitations"],
+            help="",
+        )
+        make_text_area(
+            label="Particularly unsuited for applications",
+            key_list=key_pref + ["data-unsuited-applications"],
+            help="",
+        )
+        make_text_area(
+            label="What are discouraged use cases of the dataset?",
+            key_list=key_pref + ["data-discouraged-use"],
+            help="",
+        )
+        make_text_area(
+            label="Citation of work identifying these limitations",
+            key_list=key_pref + ["data-citations-limitations"],
+            help="",
+        )
 def considerations_summary():
+    total_filled = sum(
+        [len(dct) for dct in st.session_state.card_dict.get("considerations", {}).values()]
+    )
+    with st.expander(
+        f"Dataset Overview Completion - {total_filled} of {N_FIELDS}", expanded=False
+    ):
+        completion_markdown = ""
+        completion_markdown += (
+            f"- **Overall competion:**\n  - {total_filled} of {N_FIELDS} fields\n"
+        )
+        completion_markdown += f"- **Sub-section - PII Risks and Liability:**\n  - {len(st.session_state.card_dict.get('considerations', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n"
+        completion_markdown += f"- **Sub-section - Licenses:**\n  - {len(st.session_state.card_dict.get('considerations', {}).get('licenses', {}))} of {N_FIELDS_LICENSES} fields\n"
+        completion_markdown += f"- **Sub-section - Known limitations:**\n  - {len(st.session_state.card_dict.get('considerations', {}).get('limitations', {}))} of {N_FIELDS_LIMITATIONS} fields\n"
+        st.markdown(completion_markdown)

datacards/curation.py CHANGED Viewed

@@ -72,17 +72,17 @@ def curation_page():
         make_multiselect(
             label="How was the language data obtained?",
             options=[
-                "found",
-                "created for the dataset",
-                "crowdsourced",
-                "machine-generated",
-                "other",
             ],
             key_list=key_pref + ["obtained"],
         )
         make_multiselect(
             label="If found, where from?",
-            options=["website", "offline media collection", "other", "N/A"],
             key_list=key_pref + ["found"],
             help="select N/A if none of the language data was found",
         )
@@ -90,9 +90,9 @@ def curation_page():
             label="If crowdsourced, where from?",
             options=[
                 "Amazon Mechanical Turk",
-                "other crowdworker platform",
-                "participatory experiment",
-                "other",
                 "N/A",
             ],
             key_list=key_pref + ["crowdsourced"],

         make_multiselect(
             label="How was the language data obtained?",
             options=[
+                "Found",
+                "Created for the dataset",
+                "Crowdsourced",
+                "Machine-generated",
+                "Other",
             ],
             key_list=key_pref + ["obtained"],
         )
         make_multiselect(
             label="If found, where from?",
+            options=["Multiple websites", "Single website", "Offline media collection", "Other", "N/A"],
             key_list=key_pref + ["found"],
             help="select N/A if none of the language data was found",
         )
             label="If crowdsourced, where from?",
             options=[
                 "Amazon Mechanical Turk",
+                "Other crowdworker platform",
+                "Participatory experiment",
+                "Other",
                 "N/A",
             ],
             key_list=key_pref + ["crowdsourced"],

datacards/overview.py CHANGED Viewed

@@ -167,9 +167,9 @@ def overview_page():
         )
     with st.expander("Structure", expanded=False):
         key_pref = ["overview", "structure"]
-        st.session_state.card_dict["overview"][
-            "structure"
-        ] = st.session_state.card_dict.get("structure", {})
         data_fields_help = """
         [free text; paragraphs]
         - Mention their data type, and whether and how they are used as part of the generation pipeline.

         )
     with st.expander("Structure", expanded=False):
         key_pref = ["overview", "structure"]
+        st.session_state.card_dict["overview"]["structure"] = st.session_state.card_dict[
+            "overview"
+        ].get("structure", {})
         data_fields_help = """
         [free text; paragraphs]
         - Mention their data type, and whether and how they are used as part of the generation pipeline.