Spaces:
Running
Running
Fredrik Sitje
commited on
Commit
·
f1694ed
1
Parent(s):
110375c
Added sorting order based on the order the TransLegal Client uses.
Browse files- config/category_order.json +69 -0
- src/streamlit_app.py +93 -7
config/category_order.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"categories": [
|
| 3 |
+
{
|
| 4 |
+
"name": "purpose",
|
| 5 |
+
"subcategories": ["purpose_goal", "historical_background", "historical_influence", "origin_system"]
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"name": "legal_effect",
|
| 9 |
+
"subcategories": ["creates_right", "modifies_right", "extinguishes_right", "creates_obligation", "modifies_obligation", "extinguishes_obligation", "creates_status", "modifies_status", "extinguishes_status", "normative_structure", "judicial_interpretation"]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"name": "subjects",
|
| 13 |
+
"subcategories": ["natural_persons", "legal_entities", "state", "third_parties", "private_property", "public_property"]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"name": "legal_source",
|
| 17 |
+
"subcategories": ["based_on_statute", "based_on_case_law", "based_on_custom", "based_on_regulation"]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"name": "enforceability",
|
| 21 |
+
"subcategories": ["enforceable_by_whom", "enforceable_how"]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"name": "third_parties",
|
| 25 |
+
"subcategories": ["third_party_rights", "third_party_obligations"]
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"name": "formal_requirements",
|
| 29 |
+
"subcategories": ["requires_written_document", "requires_registration", "requires_consent", "requires_notarization"]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"name": "limitations_or_conditions",
|
| 33 |
+
"subcategories": ["substantive_limitations", "procedural_limitations", "temporal_limitations", "geographical_limitations"]
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"name": "public_policy_limits",
|
| 37 |
+
"subcategories": ["limited_by_public_policy", "voided_by_public_policy"]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"name": "remedies_consequences",
|
| 41 |
+
"subcategories": ["private_law_remedies", "public_law_remedies", "administrative_law_remedies", "criminal_law_remedies", "international_law_remedies"]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "procedural_vs_substantive_nature",
|
| 45 |
+
"subcategories": ["is_procedural_or_substantive"]
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"name": "direct_or_derivative_rights",
|
| 49 |
+
"subcategories": ["confers_rights_directly", "confers_rights_derivatively", "confers_obligations_directly", "confers_obligations_derivatively"]
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"name": "private_vs_public_law",
|
| 53 |
+
"subcategories": ["is_private_or_public_law"]
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"name": "legal_fictions_presumptions",
|
| 57 |
+
"subcategories": ["relies_on_fictions"]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"name": "systemic_classification",
|
| 61 |
+
"subcategories": ["legal_field_classification", "internal_classification", "authoritative_definition"]
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"name": "cross_border_effects",
|
| 65 |
+
"subcategories": ["international_implications"]
|
| 66 |
+
}
|
| 67 |
+
]
|
| 68 |
+
}
|
| 69 |
+
|
src/streamlit_app.py
CHANGED
|
@@ -218,6 +218,55 @@ def format_snake_case(text):
|
|
| 218 |
"""Convert snake_case to Title Case"""
|
| 219 |
return ' '.join(word.capitalize() for word in text.split('_'))
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
def inject_tooltip_css():
|
| 222 |
"""Inject CSS to style radio button captions"""
|
| 223 |
caption_css = """
|
|
@@ -662,8 +711,18 @@ class Category:
|
|
| 662 |
(df['answer'] != "Unknown")]
|
| 663 |
|
| 664 |
# Get all subcategories for this term-category pair (excluding Unknown answers)
|
| 665 |
-
subcategory_names = filtered_df['subcategory'].unique()
|
| 666 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
# Create Subcategory instances (only for non-Unknown answers)
|
| 669 |
self.subcategories = [
|
|
@@ -680,8 +739,14 @@ class Term:
|
|
| 680 |
self.formatted_name = format_snake_case(term_name)
|
| 681 |
|
| 682 |
# Get all categories for this term
|
| 683 |
-
category_names = df[df['term'] == term_name]['category'].unique()
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
# Create Category instances
|
| 687 |
self.categories = [
|
|
@@ -700,10 +765,31 @@ class Term:
|
|
| 700 |
@st.cache_data
|
| 701 |
def get_term_category_pairs(df):
|
| 702 |
"""Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
|
|
|
|
|
|
|
|
|
|
| 703 |
# Filter out categories that have no subcategories after filtering Unknown answers
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
|
| 708 |
# Cache for Term instances (keyed by jurisdiction and term_name)
|
| 709 |
term_cache = {}
|
|
|
|
| 218 |
"""Convert snake_case to Title Case"""
|
| 219 |
return ' '.join(word.capitalize() for word in text.split('_'))
|
| 220 |
|
| 221 |
+
@st.cache_data
|
| 222 |
+
def load_category_order():
|
| 223 |
+
"""Load category order configuration from JSON file"""
|
| 224 |
+
try:
|
| 225 |
+
config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'category_order.json')
|
| 226 |
+
with open(config_path, 'r') as f:
|
| 227 |
+
return json.load(f)
|
| 228 |
+
except Exception as e:
|
| 229 |
+
st.warning(f"⚠️ Could not load category order config: {str(e)}. Using default alphabetical order.")
|
| 230 |
+
return {"categories": []}
|
| 231 |
+
|
| 232 |
+
def sort_by_config_order(items, config_order, default_to_end=True):
|
| 233 |
+
"""
|
| 234 |
+
Sort items according to a configured order.
|
| 235 |
+
|
| 236 |
+
Args:
|
| 237 |
+
items: List of items to sort
|
| 238 |
+
config_order: List defining the desired order
|
| 239 |
+
default_to_end: If True, append items not in config at the end; if False, exclude them
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
Sorted list of items
|
| 243 |
+
"""
|
| 244 |
+
if not config_order:
|
| 245 |
+
# Fallback to alphabetical if no config
|
| 246 |
+
return sorted(items)
|
| 247 |
+
|
| 248 |
+
# Create a mapping of item -> position in config
|
| 249 |
+
order_map = {item: idx for idx, item in enumerate(config_order)}
|
| 250 |
+
|
| 251 |
+
# Separate items into those in config and those not
|
| 252 |
+
in_config = []
|
| 253 |
+
not_in_config = []
|
| 254 |
+
|
| 255 |
+
for item in items:
|
| 256 |
+
if item in order_map:
|
| 257 |
+
in_config.append(item)
|
| 258 |
+
else:
|
| 259 |
+
not_in_config.append(item)
|
| 260 |
+
|
| 261 |
+
# Sort items that are in config by their configured position
|
| 262 |
+
in_config.sort(key=lambda x: order_map[x])
|
| 263 |
+
|
| 264 |
+
# Combine: config items first, then others (sorted alphabetically) if default_to_end
|
| 265 |
+
if default_to_end:
|
| 266 |
+
return in_config + sorted(not_in_config)
|
| 267 |
+
else:
|
| 268 |
+
return in_config
|
| 269 |
+
|
| 270 |
def inject_tooltip_css():
|
| 271 |
"""Inject CSS to style radio button captions"""
|
| 272 |
caption_css = """
|
|
|
|
| 711 |
(df['answer'] != "Unknown")]
|
| 712 |
|
| 713 |
# Get all subcategories for this term-category pair (excluding Unknown answers)
|
| 714 |
+
subcategory_names = filtered_df['subcategory'].unique().tolist()
|
| 715 |
+
|
| 716 |
+
# Load category order config and sort subcategories accordingly
|
| 717 |
+
config = load_category_order()
|
| 718 |
+
subcategory_order = []
|
| 719 |
+
for cat in config.get('categories', []):
|
| 720 |
+
if cat['name'] == category_name:
|
| 721 |
+
subcategory_order = cat['subcategories']
|
| 722 |
+
break
|
| 723 |
+
|
| 724 |
+
# Sort subcategories using config order (items not in config are appended at the end)
|
| 725 |
+
subcategory_names = sort_by_config_order(subcategory_names, subcategory_order, default_to_end=True)
|
| 726 |
|
| 727 |
# Create Subcategory instances (only for non-Unknown answers)
|
| 728 |
self.subcategories = [
|
|
|
|
| 739 |
self.formatted_name = format_snake_case(term_name)
|
| 740 |
|
| 741 |
# Get all categories for this term
|
| 742 |
+
category_names = df[df['term'] == term_name]['category'].unique().tolist()
|
| 743 |
+
|
| 744 |
+
# Load category order config and sort categories accordingly
|
| 745 |
+
config = load_category_order()
|
| 746 |
+
category_order = [cat['name'] for cat in config.get('categories', [])]
|
| 747 |
+
|
| 748 |
+
# Sort categories using config order (items not in config are appended at the end)
|
| 749 |
+
category_names = sort_by_config_order(category_names, category_order, default_to_end=True)
|
| 750 |
|
| 751 |
# Create Category instances
|
| 752 |
self.categories = [
|
|
|
|
| 765 |
@st.cache_data
|
| 766 |
def get_term_category_pairs(df):
|
| 767 |
"""Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
|
| 768 |
+
# Get all unique term-category pairs (without sorting yet)
|
| 769 |
+
all_pairs = df[['term', 'category']].drop_duplicates().values.tolist()
|
| 770 |
+
|
| 771 |
# Filter out categories that have no subcategories after filtering Unknown answers
|
| 772 |
+
filtered_pairs = [(term, category) for term, category in all_pairs
|
| 773 |
+
if category_has_subcategories(term, category, df)]
|
| 774 |
+
|
| 775 |
+
# Load category order config
|
| 776 |
+
config = load_category_order()
|
| 777 |
+
category_order = [cat['name'] for cat in config.get('categories', [])]
|
| 778 |
+
|
| 779 |
+
# Sort pairs by category order (preserving term order within each category)
|
| 780 |
+
# Create order mapping
|
| 781 |
+
order_map = {cat: idx for idx, cat in enumerate(category_order)}
|
| 782 |
+
|
| 783 |
+
# Sort: first by category order (using config), then by term name alphabetically
|
| 784 |
+
def sort_key(pair):
|
| 785 |
+
term, category = pair
|
| 786 |
+
# Get category position from config (or large number if not in config)
|
| 787 |
+
cat_position = order_map.get(category, len(category_order))
|
| 788 |
+
return (cat_position, term)
|
| 789 |
+
|
| 790 |
+
filtered_pairs.sort(key=sort_key)
|
| 791 |
+
|
| 792 |
+
return filtered_pairs
|
| 793 |
|
| 794 |
# Cache for Term instances (keyed by jurisdiction and term_name)
|
| 795 |
term_cache = {}
|