Spaces:

TransLegal
/

grading-answers

Running

App Files Files Community

Fredrik Sitje commited on Dec 18, 2025

Commit

f1694ed

1 Parent(s): 110375c

Added sorting order based on the order the TransLegal Client uses.

Browse files

Files changed (2) hide show

config/category_order.json +69 -0
src/streamlit_app.py +93 -7

config/category_order.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "categories": [
+    {
+      "name": "purpose",
+      "subcategories": ["purpose_goal", "historical_background", "historical_influence", "origin_system"]
+    },
+    {
+      "name": "legal_effect",
+      "subcategories": ["creates_right", "modifies_right", "extinguishes_right", "creates_obligation", "modifies_obligation", "extinguishes_obligation", "creates_status", "modifies_status", "extinguishes_status", "normative_structure", "judicial_interpretation"]
+    },
+    {
+      "name": "subjects",
+      "subcategories": ["natural_persons", "legal_entities", "state", "third_parties", "private_property", "public_property"]
+    },
+    {
+      "name": "legal_source",
+      "subcategories": ["based_on_statute", "based_on_case_law", "based_on_custom", "based_on_regulation"]
+    },
+    {
+      "name": "enforceability",
+      "subcategories": ["enforceable_by_whom", "enforceable_how"]
+    },
+    {
+      "name": "third_parties",
+      "subcategories": ["third_party_rights", "third_party_obligations"]
+    },
+    {
+      "name": "formal_requirements",
+      "subcategories": ["requires_written_document", "requires_registration", "requires_consent", "requires_notarization"]
+    },
+    {
+      "name": "limitations_or_conditions",
+      "subcategories": ["substantive_limitations", "procedural_limitations", "temporal_limitations", "geographical_limitations"]
+    },
+    {
+      "name": "public_policy_limits",
+      "subcategories": ["limited_by_public_policy", "voided_by_public_policy"]
+    },
+    {
+      "name": "remedies_consequences",
+      "subcategories": ["private_law_remedies", "public_law_remedies", "administrative_law_remedies", "criminal_law_remedies", "international_law_remedies"]
+    },
+    {
+      "name": "procedural_vs_substantive_nature",
+      "subcategories": ["is_procedural_or_substantive"]
+    },
+    {
+      "name": "direct_or_derivative_rights",
+      "subcategories": ["confers_rights_directly", "confers_rights_derivatively", "confers_obligations_directly", "confers_obligations_derivatively"]
+    },
+    {
+      "name": "private_vs_public_law",
+      "subcategories": ["is_private_or_public_law"]
+    },
+    {
+      "name": "legal_fictions_presumptions",
+      "subcategories": ["relies_on_fictions"]
+    },
+    {
+      "name": "systemic_classification",
+      "subcategories": ["legal_field_classification", "internal_classification", "authoritative_definition"]
+    },
+    {
+      "name": "cross_border_effects",
+      "subcategories": ["international_implications"]
+    }
+  ]
+}

src/streamlit_app.py CHANGED Viewed

@@ -218,6 +218,55 @@ def format_snake_case(text):
     """Convert snake_case to Title Case"""
     return ' '.join(word.capitalize() for word in text.split('_'))
 def inject_tooltip_css():
     """Inject CSS to style radio button captions"""
     caption_css = """
@@ -662,8 +711,18 @@ class Category:
                         (df['answer'] != "Unknown")]
         # Get all subcategories for this term-category pair (excluding Unknown answers)
-        subcategory_names = filtered_df['subcategory'].unique()
-        subcategory_names = sorted(subcategory_names.tolist())
         # Create Subcategory instances (only for non-Unknown answers)
         self.subcategories = [
@@ -680,8 +739,14 @@ class Term:
         self.formatted_name = format_snake_case(term_name)
         # Get all categories for this term
-        category_names = df[df['term'] == term_name]['category'].unique()
-        category_names = sorted(category_names.tolist())
         # Create Category instances
         self.categories = [
@@ -700,10 +765,31 @@ class Term:
 @st.cache_data
 def get_term_category_pairs(df):
     """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
     # Filter out categories that have no subcategories after filtering Unknown answers
-    all_pairs = df[['term', 'category']].drop_duplicates().sort_values(['term', 'category']).values.tolist()
-    return [(term, category) for term, category in all_pairs
-            if category_has_subcategories(term, category, df)]
 # Cache for Term instances (keyed by jurisdiction and term_name)
 term_cache = {}

     """Convert snake_case to Title Case"""
     return ' '.join(word.capitalize() for word in text.split('_'))
+@st.cache_data
+def load_category_order():
+    """Load category order configuration from JSON file"""
+    try:
+        config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'category_order.json')
+        with open(config_path, 'r') as f:
+            return json.load(f)
+    except Exception as e:
+        st.warning(f"⚠️ Could not load category order config: {str(e)}. Using default alphabetical order.")
+        return {"categories": []}
+def sort_by_config_order(items, config_order, default_to_end=True):
+    """
+    Sort items according to a configured order.
+    Args:
+        items: List of items to sort
+        config_order: List defining the desired order
+        default_to_end: If True, append items not in config at the end; if False, exclude them
+    Returns:
+        Sorted list of items
+    """
+    if not config_order:
+        # Fallback to alphabetical if no config
+        return sorted(items)
+    # Create a mapping of item -> position in config
+    order_map = {item: idx for idx, item in enumerate(config_order)}
+    # Separate items into those in config and those not
+    in_config = []
+    not_in_config = []
+    for item in items:
+        if item in order_map:
+            in_config.append(item)
+        else:
+            not_in_config.append(item)
+    # Sort items that are in config by their configured position
+    in_config.sort(key=lambda x: order_map[x])
+    # Combine: config items first, then others (sorted alphabetically) if default_to_end
+    if default_to_end:
+        return in_config + sorted(not_in_config)
+    else:
+        return in_config
 def inject_tooltip_css():
     """Inject CSS to style radio button captions"""
     caption_css = """
                         (df['answer'] != "Unknown")]
         # Get all subcategories for this term-category pair (excluding Unknown answers)
+        subcategory_names = filtered_df['subcategory'].unique().tolist()
+        # Load category order config and sort subcategories accordingly
+        config = load_category_order()
+        subcategory_order = []
+        for cat in config.get('categories', []):
+            if cat['name'] == category_name:
+                subcategory_order = cat['subcategories']
+                break
+        # Sort subcategories using config order (items not in config are appended at the end)
+        subcategory_names = sort_by_config_order(subcategory_names, subcategory_order, default_to_end=True)
         # Create Subcategory instances (only for non-Unknown answers)
         self.subcategories = [
         self.formatted_name = format_snake_case(term_name)
         # Get all categories for this term
+        category_names = df[df['term'] == term_name]['category'].unique().tolist()
+        # Load category order config and sort categories accordingly
+        config = load_category_order()
+        category_order = [cat['name'] for cat in config.get('categories', [])]
+        # Sort categories using config order (items not in config are appended at the end)
+        category_names = sort_by_config_order(category_names, category_order, default_to_end=True)
         # Create Category instances
         self.categories = [
 @st.cache_data
 def get_term_category_pairs(df):
     """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
+    # Get all unique term-category pairs (without sorting yet)
+    all_pairs = df[['term', 'category']].drop_duplicates().values.tolist()
     # Filter out categories that have no subcategories after filtering Unknown answers
+    filtered_pairs = [(term, category) for term, category in all_pairs
+                      if category_has_subcategories(term, category, df)]
+    # Load category order config
+    config = load_category_order()
+    category_order = [cat['name'] for cat in config.get('categories', [])]
+    # Sort pairs by category order (preserving term order within each category)
+    # Create order mapping
+    order_map = {cat: idx for idx, cat in enumerate(category_order)}
+    # Sort: first by category order (using config), then by term name alphabetically
+    def sort_key(pair):
+        term, category = pair
+        # Get category position from config (or large number if not in config)
+        cat_position = order_map.get(category, len(category_order))
+        return (cat_position, term)
+    filtered_pairs.sort(key=sort_key)
+    return filtered_pairs
 # Cache for Term instances (keyed by jurisdiction and term_name)
 term_cache = {}