Spaces:
Sleeping
Sleeping
Fredrik Sitje
commited on
Commit
·
3fb85e6
1
Parent(s):
6df93c7
Update the display for language and country codes.
Browse files- requirements.txt +2 -0
- src/streamlit_app.py +90 -4
requirements.txt
CHANGED
|
@@ -3,3 +3,5 @@ pandas
|
|
| 3 |
pyarrow
|
| 4 |
huggingface-hub
|
| 5 |
datasets
|
|
|
|
|
|
|
|
|
| 3 |
pyarrow
|
| 4 |
huggingface-hub
|
| 5 |
datasets
|
| 6 |
+
pycountry
|
| 7 |
+
babel
|
src/streamlit_app.py
CHANGED
|
@@ -5,6 +5,8 @@ import hashlib
|
|
| 5 |
import json
|
| 6 |
import tempfile
|
| 7 |
from huggingface_hub import HfApi, login, hf_hub_download
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Hugging Face Dataset configuration
|
| 10 |
# In HF Spaces, variables and secrets are available as environment variables
|
|
@@ -84,6 +86,72 @@ def discover_available_jurisdictions():
|
|
| 84 |
# Return empty list as fallback
|
| 85 |
return []
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
@st.cache_data
|
| 88 |
def load_grading_template(jurisdiction):
|
| 89 |
"""Load grading template from Hugging Face Dataset for the specified jurisdiction"""
|
|
@@ -695,12 +763,29 @@ if not st.session_state.logged_in:
|
|
| 695 |
st.error("❌ **Error**: No jurisdictions found in the repository. Please ensure the repository structure is correct.")
|
| 696 |
st.stop()
|
| 697 |
|
| 698 |
-
#
|
|
|
|
|
|
|
|
|
|
| 699 |
default_index = 0
|
| 700 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
default_index = available_jurisdictions.index(st.session_state.jurisdiction)
|
| 702 |
|
| 703 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 704 |
st.session_state.jurisdiction = jurisdiction
|
| 705 |
|
| 706 |
username = st.text_input("Username")
|
|
@@ -788,7 +873,8 @@ elif st.session_state.logged_in:
|
|
| 788 |
st.write(f"HF Token configured: {HF_TOKEN is not None}")
|
| 789 |
st.write(f"HF API initialized: {hf_api is not None}")
|
| 790 |
if username:
|
| 791 |
-
|
|
|
|
| 792 |
st.write(f"User parquet file: `{jurisdiction}/users/{username}_answers.parquet`")
|
| 793 |
st.write(f"Users file: `{jurisdiction}/users/users.json`")
|
| 794 |
|
|
|
|
| 5 |
import json
|
| 6 |
import tempfile
|
| 7 |
from huggingface_hub import HfApi, login, hf_hub_download
|
| 8 |
+
import pycountry
|
| 9 |
+
from babel import Locale
|
| 10 |
|
| 11 |
# Hugging Face Dataset configuration
|
| 12 |
# In HF Spaces, variables and secrets are available as environment variables
|
|
|
|
| 86 |
# Return empty list as fallback
|
| 87 |
return []
|
| 88 |
|
| 89 |
+
def get_jurisdiction_display_name(jurisdiction_code):
|
| 90 |
+
"""
|
| 91 |
+
Convert jurisdiction code (e.g., 'hr-hr') to display name (e.g., 'Croatian-Croatia').
|
| 92 |
+
|
| 93 |
+
Uses ISO 639-1 (language) and ISO 3166-1 (country) codes to generate human-readable names.
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
jurisdiction_code: String in format 'language-country' (e.g., 'hr-hr', 'en-us')
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
Display name string (e.g., 'Croatian-Croatia') or original code if conversion fails
|
| 100 |
+
"""
|
| 101 |
+
try:
|
| 102 |
+
# Parse jurisdiction code (e.g., "hr-hr" -> language="hr", country="HR")
|
| 103 |
+
parts = jurisdiction_code.lower().split('-')
|
| 104 |
+
if len(parts) != 2:
|
| 105 |
+
return jurisdiction_code # Fallback to original if format is wrong
|
| 106 |
+
|
| 107 |
+
language_code, country_code = parts[0], parts[1].upper()
|
| 108 |
+
|
| 109 |
+
# Get language name using babel
|
| 110 |
+
language_name = None
|
| 111 |
+
try:
|
| 112 |
+
# Try to parse locale (e.g., "hr_HR" or "en_US")
|
| 113 |
+
locale_str = f"{language_code}_{country_code}"
|
| 114 |
+
locale = Locale.parse(locale_str)
|
| 115 |
+
language_name = locale.get_language_name('en')
|
| 116 |
+
if language_name:
|
| 117 |
+
language_name = language_name.title()
|
| 118 |
+
except Exception:
|
| 119 |
+
pass
|
| 120 |
+
|
| 121 |
+
# Fallback: try pycountry for language
|
| 122 |
+
if not language_name:
|
| 123 |
+
try:
|
| 124 |
+
lang = pycountry.languages.get(alpha_2=language_code)
|
| 125 |
+
language_name = lang.name
|
| 126 |
+
except Exception:
|
| 127 |
+
language_name = language_code.upper()
|
| 128 |
+
|
| 129 |
+
# Get country name using pycountry
|
| 130 |
+
country_name = None
|
| 131 |
+
try:
|
| 132 |
+
country = pycountry.countries.get(alpha_2=country_code)
|
| 133 |
+
country_name = country.name
|
| 134 |
+
except Exception:
|
| 135 |
+
country_name = country_code
|
| 136 |
+
|
| 137 |
+
return f"{language_name}-{country_name}"
|
| 138 |
+
except Exception:
|
| 139 |
+
# Fallback to original code if anything goes wrong
|
| 140 |
+
return jurisdiction_code
|
| 141 |
+
|
| 142 |
+
@st.cache_data
|
| 143 |
+
def get_jurisdiction_display_mapping(jurisdiction_codes):
|
| 144 |
+
"""
|
| 145 |
+
Create a mapping from jurisdiction codes to display names.
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
jurisdiction_codes: List of jurisdiction code strings
|
| 149 |
+
|
| 150 |
+
Returns:
|
| 151 |
+
Dictionary mapping codes to display names
|
| 152 |
+
"""
|
| 153 |
+
return {code: get_jurisdiction_display_name(code) for code in jurisdiction_codes}
|
| 154 |
+
|
| 155 |
@st.cache_data
|
| 156 |
def load_grading_template(jurisdiction):
|
| 157 |
"""Load grading template from Hugging Face Dataset for the specified jurisdiction"""
|
|
|
|
| 763 |
st.error("❌ **Error**: No jurisdictions found in the repository. Please ensure the repository structure is correct.")
|
| 764 |
st.stop()
|
| 765 |
|
| 766 |
+
# Get display name mapping
|
| 767 |
+
display_mapping = get_jurisdiction_display_mapping(available_jurisdictions)
|
| 768 |
+
|
| 769 |
+
# Determine default index for selectbox - prioritize hr-hr
|
| 770 |
default_index = 0
|
| 771 |
+
if "hr-hr" in available_jurisdictions:
|
| 772 |
+
default_index = available_jurisdictions.index("hr-hr")
|
| 773 |
+
# Set hr-hr as default in session state if not already set
|
| 774 |
+
if not st.session_state.jurisdiction:
|
| 775 |
+
st.session_state.jurisdiction = "hr-hr"
|
| 776 |
+
elif st.session_state.jurisdiction and st.session_state.jurisdiction in available_jurisdictions:
|
| 777 |
default_index = available_jurisdictions.index(st.session_state.jurisdiction)
|
| 778 |
|
| 779 |
+
# Create format function to show display names
|
| 780 |
+
def format_jurisdiction(code):
|
| 781 |
+
return display_mapping.get(code, code)
|
| 782 |
+
|
| 783 |
+
jurisdiction = st.selectbox(
|
| 784 |
+
"Jurisdiction",
|
| 785 |
+
options=available_jurisdictions,
|
| 786 |
+
index=default_index,
|
| 787 |
+
format_func=format_jurisdiction
|
| 788 |
+
)
|
| 789 |
st.session_state.jurisdiction = jurisdiction
|
| 790 |
|
| 791 |
username = st.text_input("Username")
|
|
|
|
| 873 |
st.write(f"HF Token configured: {HF_TOKEN is not None}")
|
| 874 |
st.write(f"HF API initialized: {hf_api is not None}")
|
| 875 |
if username:
|
| 876 |
+
jurisdiction_display = get_jurisdiction_display_name(jurisdiction)
|
| 877 |
+
st.write(f"Jurisdiction: {jurisdiction_display} (`{jurisdiction}`)")
|
| 878 |
st.write(f"User parquet file: `{jurisdiction}/users/{username}_answers.parquet`")
|
| 879 |
st.write(f"Users file: `{jurisdiction}/users/users.json`")
|
| 880 |
|