Spaces:
Runtime error
Runtime error
Sebastian Gehrmann
commited on
Commit
·
13fd677
1
Parent(s):
396d1e7
considerations
Browse files- datacards/considerations.py +88 -4
- datacards/curation.py +9 -9
- datacards/overview.py +3 -3
datacards/considerations.py
CHANGED
|
@@ -1,13 +1,97 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
-
from .streamlit_utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def considerations_page():
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def considerations_summary():
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
+
from .streamlit_utils import (
|
| 4 |
+
make_multiselect,
|
| 5 |
+
make_selectbox,
|
| 6 |
+
make_text_area,
|
| 7 |
+
make_text_input,
|
| 8 |
+
make_radio,
|
| 9 |
+
)
|
| 10 |
|
| 11 |
+
N_FIELDS_PII = 3
|
| 12 |
+
N_FIELDS_LICENSES = 3
|
| 13 |
+
N_FIELDS_LIMITATIONS = 4
|
| 14 |
+
|
| 15 |
+
N_FIELDS = N_FIELDS_PII + N_FIELDS_LICENSES + N_FIELDS_LIMITATIONS
|
| 16 |
|
| 17 |
|
| 18 |
def considerations_page():
|
| 19 |
+
st.session_state.card_dict["considerations"] = st.session_state.card_dict.get(
|
| 20 |
+
"considerations", {}
|
| 21 |
+
)
|
| 22 |
+
with st.expander("PII Risks and Liability", expanded=False):
|
| 23 |
+
key_pref = ["considerations", "pii"]
|
| 24 |
+
st.session_state.card_dict["considerations"]["pii"] = st.session_state.card_dict[
|
| 25 |
+
"considerations"
|
| 26 |
+
].get("pii", {})
|
| 27 |
+
|
| 28 |
+
# TODO: cross-link this section with curation.
|
| 29 |
+
|
| 30 |
+
with st.expander("Licenses", expanded=False):
|
| 31 |
+
key_pref = ["considerations", "licenses"]
|
| 32 |
+
st.session_state.card_dict["considerations"]["licenses"] = st.session_state.card_dict[
|
| 33 |
+
"considerations"
|
| 34 |
+
].get("licenses", {})
|
| 35 |
+
|
| 36 |
+
# TODO: cross-link the first question with overview.py.
|
| 37 |
+
|
| 38 |
+
make_text_input(
|
| 39 |
+
label="Can the dataset be used for research and/or commercial purposes?",
|
| 40 |
+
key_list=key_pref + ["data-restrictions"],
|
| 41 |
+
help="Describe any restrictions put on how the data can be used.",
|
| 42 |
+
)
|
| 43 |
+
make_radio(
|
| 44 |
+
label="Are thre restrictions on the underlying data?",
|
| 45 |
+
options=["Open", "Non-Commercial", "Copyrighted", "Other"],
|
| 46 |
+
key_list=key_pref + ["data-copyright"],
|
| 47 |
+
help="Are there restructions on the underlying data?",
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
with st.expander("Known limitations", expanded=False):
|
| 51 |
+
key_pref = ["considerations", "limitations"]
|
| 52 |
+
st.session_state.card_dict["considerations"]["limitations"] = st.session_state.card_dict[
|
| 53 |
+
"considerations"
|
| 54 |
+
].get("limitations", {})
|
| 55 |
+
|
| 56 |
+
# TODO: Form proper language
|
| 57 |
+
|
| 58 |
+
make_text_area(
|
| 59 |
+
label="Technical limitations, annotation noise, etc.",
|
| 60 |
+
key_list=key_pref + ["data-technical-limitations"],
|
| 61 |
+
help="",
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
make_text_area(
|
| 65 |
+
label="Particularly unsuited for applications",
|
| 66 |
+
key_list=key_pref + ["data-unsuited-applications"],
|
| 67 |
+
help="",
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
make_text_area(
|
| 71 |
+
label="What are discouraged use cases of the dataset?",
|
| 72 |
+
key_list=key_pref + ["data-discouraged-use"],
|
| 73 |
+
help="",
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
make_text_area(
|
| 77 |
+
label="Citation of work identifying these limitations",
|
| 78 |
+
key_list=key_pref + ["data-citations-limitations"],
|
| 79 |
+
help="",
|
| 80 |
+
)
|
| 81 |
|
| 82 |
|
| 83 |
def considerations_summary():
|
| 84 |
+
total_filled = sum(
|
| 85 |
+
[len(dct) for dct in st.session_state.card_dict.get("considerations", {}).values()]
|
| 86 |
+
)
|
| 87 |
+
with st.expander(
|
| 88 |
+
f"Dataset Overview Completion - {total_filled} of {N_FIELDS}", expanded=False
|
| 89 |
+
):
|
| 90 |
+
completion_markdown = ""
|
| 91 |
+
completion_markdown += (
|
| 92 |
+
f"- **Overall competion:**\n - {total_filled} of {N_FIELDS} fields\n"
|
| 93 |
+
)
|
| 94 |
+
completion_markdown += f"- **Sub-section - PII Risks and Liability:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n"
|
| 95 |
+
completion_markdown += f"- **Sub-section - Licenses:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('licenses', {}))} of {N_FIELDS_LICENSES} fields\n"
|
| 96 |
+
completion_markdown += f"- **Sub-section - Known limitations:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('limitations', {}))} of {N_FIELDS_LIMITATIONS} fields\n"
|
| 97 |
+
st.markdown(completion_markdown)
|
datacards/curation.py
CHANGED
|
@@ -72,17 +72,17 @@ def curation_page():
|
|
| 72 |
make_multiselect(
|
| 73 |
label="How was the language data obtained?",
|
| 74 |
options=[
|
| 75 |
-
"
|
| 76 |
-
"
|
| 77 |
-
"
|
| 78 |
-
"
|
| 79 |
-
"
|
| 80 |
],
|
| 81 |
key_list=key_pref + ["obtained"],
|
| 82 |
)
|
| 83 |
make_multiselect(
|
| 84 |
label="If found, where from?",
|
| 85 |
-
options=["website", "
|
| 86 |
key_list=key_pref + ["found"],
|
| 87 |
help="select N/A if none of the language data was found",
|
| 88 |
)
|
|
@@ -90,9 +90,9 @@ def curation_page():
|
|
| 90 |
label="If crowdsourced, where from?",
|
| 91 |
options=[
|
| 92 |
"Amazon Mechanical Turk",
|
| 93 |
-
"
|
| 94 |
-
"
|
| 95 |
-
"
|
| 96 |
"N/A",
|
| 97 |
],
|
| 98 |
key_list=key_pref + ["crowdsourced"],
|
|
|
|
| 72 |
make_multiselect(
|
| 73 |
label="How was the language data obtained?",
|
| 74 |
options=[
|
| 75 |
+
"Found",
|
| 76 |
+
"Created for the dataset",
|
| 77 |
+
"Crowdsourced",
|
| 78 |
+
"Machine-generated",
|
| 79 |
+
"Other",
|
| 80 |
],
|
| 81 |
key_list=key_pref + ["obtained"],
|
| 82 |
)
|
| 83 |
make_multiselect(
|
| 84 |
label="If found, where from?",
|
| 85 |
+
options=["Multiple websites", "Single website", "Offline media collection", "Other", "N/A"],
|
| 86 |
key_list=key_pref + ["found"],
|
| 87 |
help="select N/A if none of the language data was found",
|
| 88 |
)
|
|
|
|
| 90 |
label="If crowdsourced, where from?",
|
| 91 |
options=[
|
| 92 |
"Amazon Mechanical Turk",
|
| 93 |
+
"Other crowdworker platform",
|
| 94 |
+
"Participatory experiment",
|
| 95 |
+
"Other",
|
| 96 |
"N/A",
|
| 97 |
],
|
| 98 |
key_list=key_pref + ["crowdsourced"],
|
datacards/overview.py
CHANGED
|
@@ -167,9 +167,9 @@ def overview_page():
|
|
| 167 |
)
|
| 168 |
with st.expander("Structure", expanded=False):
|
| 169 |
key_pref = ["overview", "structure"]
|
| 170 |
-
st.session_state.card_dict["overview"][
|
| 171 |
-
"
|
| 172 |
-
]
|
| 173 |
data_fields_help = """
|
| 174 |
[free text; paragraphs]
|
| 175 |
- Mention their data type, and whether and how they are used as part of the generation pipeline.
|
|
|
|
| 167 |
)
|
| 168 |
with st.expander("Structure", expanded=False):
|
| 169 |
key_pref = ["overview", "structure"]
|
| 170 |
+
st.session_state.card_dict["overview"]["structure"] = st.session_state.card_dict[
|
| 171 |
+
"overview"
|
| 172 |
+
].get("structure", {})
|
| 173 |
data_fields_help = """
|
| 174 |
[free text; paragraphs]
|
| 175 |
- Mention their data type, and whether and how they are used as part of the generation pipeline.
|