Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from .streamlit_utils import ( | |
| make_multiselect, | |
| make_selectbox, | |
| make_text_area, | |
| make_text_input, | |
| make_radio, | |
| ) | |
| N_FIELDS_PII = 1 | |
| N_FIELDS_LICENSES = 2 | |
| N_FIELDS_LIMITATIONS = 3 | |
| N_FIELDS = N_FIELDS_PII + N_FIELDS_LICENSES + N_FIELDS_LIMITATIONS | |
| def considerations_page(): | |
| st.session_state.card_dict["considerations"] = st.session_state.card_dict.get( | |
| "considerations", {} | |
| ) | |
| with st.expander("PII Risks and Liability", expanded=False): | |
| key_pref = ["considerations", "pii"] | |
| st.session_state.card_dict["considerations"]["pii"] = st.session_state.card_dict[ | |
| "considerations" | |
| ].get("pii", {}) | |
| make_text_area( | |
| label="Considering your answers to the PII part of the Data Curation Section, describe any potential privacy to the data subjects and creators risks when using the dataset.", | |
| key_list=key_pref+["risks-description"], | |
| help="In terms for example of having models memorize private information of data subjects or other breaches of privacy." | |
| ) | |
| with st.expander("Licenses", expanded=False): | |
| key_pref = ["considerations", "licenses"] | |
| st.session_state.card_dict["considerations"]["licenses"] = st.session_state.card_dict[ | |
| "considerations" | |
| ].get("licenses", {}) | |
| make_multiselect( | |
| label="Based on your answers in the Intended Use part of the Data Overview Section, which of the following best describe the copyright and licensing status of the dataset?", | |
| options=[ | |
| "public domain", | |
| "multiple licenses", | |
| "copyright - all rights reserved", | |
| "open license - commercial use allowed", | |
| "research use only", | |
| "non-commercial use only", | |
| "do not distribute", | |
| "other", | |
| ], | |
| key_list=key_pref + ["dataset-restrictions"], | |
| help="Does the license restrict how the dataset can be used?", | |
| ) | |
| if "other" in st.session_state.card_dict["considerations"]["licenses"].get("dataset-restrictions", []): | |
| make_text_area( | |
| label="You selected `other` for the dataset licensing status, please elaborate here:", | |
| key_list=key_pref+["dataset-restrictions-other"] | |
| ) | |
| else: | |
| st.session_state.card_dict["considerations"]["licenses"]["dataset-restrictions-other"] = "N/A" | |
| make_multiselect( | |
| label="Based on your answers in the Language part of the Data Curation Section, which of the following best describe the copyright and licensing status of the underlying language data?", | |
| options=[ | |
| "public domain", | |
| "multiple licenses", | |
| "copyright - all rights reserved", | |
| "open license - commercial use allowed", | |
| "research use only", | |
| "non-commercial use only", | |
| "do not distribute", | |
| "other", | |
| ], | |
| key_list=key_pref + ["data-copyright"], | |
| help="For example if the dataset uses data from Wikipedia, we are asking about the status of Wikipedia text in general.", | |
| ) | |
| if "other" in st.session_state.card_dict["considerations"]["licenses"].get("data-copyright", []): | |
| make_text_area( | |
| label="You selected `other` for the source data licensing status, please elaborate here:", | |
| key_list=key_pref+["data-copyright-other"] | |
| ) | |
| else: | |
| st.session_state.card_dict["considerations"]["licenses"]["data-copyright-other"] = "N/A" | |
| with st.expander("Known Technical Limitations", expanded=False): | |
| key_pref = ["considerations", "limitations"] | |
| st.session_state.card_dict["considerations"]["limitations"] = st.session_state.card_dict[ | |
| "considerations" | |
| ].get("limitations", {}) | |
| make_text_area( | |
| label="Describe any known technical limitations, such as spurrious correlations, train/test overlap, annotation biases, or mis-annotations, " + \ | |
| "and cite the works that first identified these limitations when possible.", | |
| key_list=key_pref + ["data-technical-limitations"], | |
| help="Outline any properties of the dataset that might lead a trained model with good performance on the metric to not behave as expected.", | |
| ) | |
| make_text_area( | |
| label="When using a model trained on this dataset in a setting where users or the public may interact with its predictions, what are some pitfalls to look out for? " + \ | |
| "In particular, describe some applications of the general task featured in this dataset that its curation or properties make it less suitable for.", | |
| key_list=key_pref + ["data-unsuited-applications"], | |
| help="For example, outline language varieties or domains that the model might underperform for.", | |
| ) | |
| make_text_area( | |
| label="What are some discouraged use cases of a model trained to maximize the proposed metrics on this dataset? " + | |
| "In particular, think about settings where decisions made by a model that performs reasonably well on the metric my still have strong negative consequences for user or members of the public.", | |
| key_list=key_pref + ["data-discouraged-use"], | |
| help="For example, think about application settings where certain types of mistakes (such as missing a negation) might have a particularly strong negative impact but are not particularly singled out by the aggregated evaluation.", | |
| ) | |
| def considerations_summary(): | |
| total_filled = sum( | |
| [len(dct) for dct in st.session_state.card_dict.get("considerations", {}).values()] | |
| ) | |
| with st.expander( | |
| f"Considerations for Using Data Completion - {total_filled} of {N_FIELDS}", expanded=False | |
| ): | |
| completion_markdown = "" | |
| completion_markdown += ( | |
| f"- **Overall completion:**\n - {total_filled} of {N_FIELDS} fields\n" | |
| ) | |
| completion_markdown += f"- **Sub-section - PII Risks and Liability:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n" | |
| completion_markdown += f"- **Sub-section - Licenses:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('licenses', {}))} of {N_FIELDS_LICENSES} fields\n" | |
| completion_markdown += f"- **Sub-section - Known Technical Limitations:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('limitations', {}))} of {N_FIELDS_LIMITATIONS} fields\n" | |
| st.markdown(completion_markdown) | |