Fredrik Sitje commited on
Commit
3fb85e6
·
1 Parent(s): 6df93c7

Update the display for language and country codes.

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -0
  2. src/streamlit_app.py +90 -4
requirements.txt CHANGED
@@ -3,3 +3,5 @@ pandas
3
  pyarrow
4
  huggingface-hub
5
  datasets
 
 
 
3
  pyarrow
4
  huggingface-hub
5
  datasets
6
+ pycountry
7
+ babel
src/streamlit_app.py CHANGED
@@ -5,6 +5,8 @@ import hashlib
5
  import json
6
  import tempfile
7
  from huggingface_hub import HfApi, login, hf_hub_download
 
 
8
 
9
  # Hugging Face Dataset configuration
10
  # In HF Spaces, variables and secrets are available as environment variables
@@ -84,6 +86,72 @@ def discover_available_jurisdictions():
84
  # Return empty list as fallback
85
  return []
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  @st.cache_data
88
  def load_grading_template(jurisdiction):
89
  """Load grading template from Hugging Face Dataset for the specified jurisdiction"""
@@ -695,12 +763,29 @@ if not st.session_state.logged_in:
695
  st.error("❌ **Error**: No jurisdictions found in the repository. Please ensure the repository structure is correct.")
696
  st.stop()
697
 
698
- # Determine default index for selectbox
 
 
 
699
  default_index = 0
700
- if st.session_state.jurisdiction and st.session_state.jurisdiction in available_jurisdictions:
 
 
 
 
 
701
  default_index = available_jurisdictions.index(st.session_state.jurisdiction)
702
 
703
- jurisdiction = st.selectbox("Jurisdiction", options=available_jurisdictions, index=default_index)
 
 
 
 
 
 
 
 
 
704
  st.session_state.jurisdiction = jurisdiction
705
 
706
  username = st.text_input("Username")
@@ -788,7 +873,8 @@ elif st.session_state.logged_in:
788
  st.write(f"HF Token configured: {HF_TOKEN is not None}")
789
  st.write(f"HF API initialized: {hf_api is not None}")
790
  if username:
791
- st.write(f"Jurisdiction: `{jurisdiction}`")
 
792
  st.write(f"User parquet file: `{jurisdiction}/users/{username}_answers.parquet`")
793
  st.write(f"Users file: `{jurisdiction}/users/users.json`")
794
 
 
5
  import json
6
  import tempfile
7
  from huggingface_hub import HfApi, login, hf_hub_download
8
+ import pycountry
9
+ from babel import Locale
10
 
11
  # Hugging Face Dataset configuration
12
  # In HF Spaces, variables and secrets are available as environment variables
 
86
  # Return empty list as fallback
87
  return []
88
 
89
+ def get_jurisdiction_display_name(jurisdiction_code):
90
+ """
91
+ Convert jurisdiction code (e.g., 'hr-hr') to display name (e.g., 'Croatian-Croatia').
92
+
93
+ Uses ISO 639-1 (language) and ISO 3166-1 (country) codes to generate human-readable names.
94
+
95
+ Args:
96
+ jurisdiction_code: String in format 'language-country' (e.g., 'hr-hr', 'en-us')
97
+
98
+ Returns:
99
+ Display name string (e.g., 'Croatian-Croatia') or original code if conversion fails
100
+ """
101
+ try:
102
+ # Parse jurisdiction code (e.g., "hr-hr" -> language="hr", country="HR")
103
+ parts = jurisdiction_code.lower().split('-')
104
+ if len(parts) != 2:
105
+ return jurisdiction_code # Fallback to original if format is wrong
106
+
107
+ language_code, country_code = parts[0], parts[1].upper()
108
+
109
+ # Get language name using babel
110
+ language_name = None
111
+ try:
112
+ # Try to parse locale (e.g., "hr_HR" or "en_US")
113
+ locale_str = f"{language_code}_{country_code}"
114
+ locale = Locale.parse(locale_str)
115
+ language_name = locale.get_language_name('en')
116
+ if language_name:
117
+ language_name = language_name.title()
118
+ except Exception:
119
+ pass
120
+
121
+ # Fallback: try pycountry for language
122
+ if not language_name:
123
+ try:
124
+ lang = pycountry.languages.get(alpha_2=language_code)
125
+ language_name = lang.name
126
+ except Exception:
127
+ language_name = language_code.upper()
128
+
129
+ # Get country name using pycountry
130
+ country_name = None
131
+ try:
132
+ country = pycountry.countries.get(alpha_2=country_code)
133
+ country_name = country.name
134
+ except Exception:
135
+ country_name = country_code
136
+
137
+ return f"{language_name}-{country_name}"
138
+ except Exception:
139
+ # Fallback to original code if anything goes wrong
140
+ return jurisdiction_code
141
+
142
+ @st.cache_data
143
+ def get_jurisdiction_display_mapping(jurisdiction_codes):
144
+ """
145
+ Create a mapping from jurisdiction codes to display names.
146
+
147
+ Args:
148
+ jurisdiction_codes: List of jurisdiction code strings
149
+
150
+ Returns:
151
+ Dictionary mapping codes to display names
152
+ """
153
+ return {code: get_jurisdiction_display_name(code) for code in jurisdiction_codes}
154
+
155
  @st.cache_data
156
  def load_grading_template(jurisdiction):
157
  """Load grading template from Hugging Face Dataset for the specified jurisdiction"""
 
763
  st.error("❌ **Error**: No jurisdictions found in the repository. Please ensure the repository structure is correct.")
764
  st.stop()
765
 
766
+ # Get display name mapping
767
+ display_mapping = get_jurisdiction_display_mapping(available_jurisdictions)
768
+
769
+ # Determine default index for selectbox - prioritize hr-hr
770
  default_index = 0
771
+ if "hr-hr" in available_jurisdictions:
772
+ default_index = available_jurisdictions.index("hr-hr")
773
+ # Set hr-hr as default in session state if not already set
774
+ if not st.session_state.jurisdiction:
775
+ st.session_state.jurisdiction = "hr-hr"
776
+ elif st.session_state.jurisdiction and st.session_state.jurisdiction in available_jurisdictions:
777
  default_index = available_jurisdictions.index(st.session_state.jurisdiction)
778
 
779
+ # Create format function to show display names
780
+ def format_jurisdiction(code):
781
+ return display_mapping.get(code, code)
782
+
783
+ jurisdiction = st.selectbox(
784
+ "Jurisdiction",
785
+ options=available_jurisdictions,
786
+ index=default_index,
787
+ format_func=format_jurisdiction
788
+ )
789
  st.session_state.jurisdiction = jurisdiction
790
 
791
  username = st.text_input("Username")
 
873
  st.write(f"HF Token configured: {HF_TOKEN is not None}")
874
  st.write(f"HF API initialized: {hf_api is not None}")
875
  if username:
876
+ jurisdiction_display = get_jurisdiction_display_name(jurisdiction)
877
+ st.write(f"Jurisdiction: {jurisdiction_display} (`{jurisdiction}`)")
878
  st.write(f"User parquet file: `{jurisdiction}/users/{username}_answers.parquet`")
879
  st.write(f"Users file: `{jurisdiction}/users/users.json`")
880