Spaces:

TransLegal
/

grading-answers

Running

App Files Files Community

Fredrik Sitje commited on Dec 18, 2025

Commit

c3069c3

1 Parent(s): f1694ed

Enhance README.md with new indexing columns for categories and subcategories. Removed deprecated category order configuration file and updated Streamlit app to sort categories and subcategories based on their respective indices. This improves the display order and maintains consistency in data presentation.

Browse files

Files changed (4) hide show

README.md +3 -0
config/category_order.json +0 -69
src/grading_template.parquet +2 -2
src/streamlit_app.py +15 -91

README.md CHANGED Viewed

@@ -58,10 +58,13 @@ To add a new jurisdiction to the repository:
    - **Required Structure:** The parquet file must contain the following columns:
      - `term` (string) - The legal term being assessed
      - `category` (string) - Category within the term
      - `subcategory` (string) - Subcategory within the category
      - `question` (string) - The question being asked
      - `answer` (string) - The AI-generated answer to be graded
    - **Special Values:** Answers can be `"Unknown."` or `"Unknown"` to indicate unknown/unavailable information (these are automatically scored as "Irrelevant / NA")
 3. **Create users directory:**
    - Create `{jurisdiction}/users/` directory with an empty `.gitkeep` file (so the directory is tracked in Git)

    - **Required Structure:** The parquet file must contain the following columns:
      - `term` (string) - The legal term being assessed
      - `category` (string) - Category within the term
+     - `category_index` (integer) - Display order for categories (lower numbers appear first)
      - `subcategory` (string) - Subcategory within the category
+     - `subcategory_index` (integer) - Display order for subcategories within each category (lower numbers appear first)
      - `question` (string) - The question being asked
      - `answer` (string) - The AI-generated answer to be graded
    - **Special Values:** Answers can be `"Unknown."` or `"Unknown"` to indicate unknown/unavailable information (these are automatically scored as "Irrelevant / NA")
+   - **Display Order:** The `category_index` and `subcategory_index` columns control the order in which categories and subcategories are displayed in the app. Items with lower index values appear first.
 3. **Create users directory:**
    - Create `{jurisdiction}/users/` directory with an empty `.gitkeep` file (so the directory is tracked in Git)

config/category_order.json DELETED Viewed

@@ -1,69 +0,0 @@
-{
-  "categories": [
-    {
-      "name": "purpose",
-      "subcategories": ["purpose_goal", "historical_background", "historical_influence", "origin_system"]
-    },
-    {
-      "name": "legal_effect",
-      "subcategories": ["creates_right", "modifies_right", "extinguishes_right", "creates_obligation", "modifies_obligation", "extinguishes_obligation", "creates_status", "modifies_status", "extinguishes_status", "normative_structure", "judicial_interpretation"]
-    },
-    {
-      "name": "subjects",
-      "subcategories": ["natural_persons", "legal_entities", "state", "third_parties", "private_property", "public_property"]
-    },
-    {
-      "name": "legal_source",
-      "subcategories": ["based_on_statute", "based_on_case_law", "based_on_custom", "based_on_regulation"]
-    },
-    {
-      "name": "enforceability",
-      "subcategories": ["enforceable_by_whom", "enforceable_how"]
-    },
-    {
-      "name": "third_parties",
-      "subcategories": ["third_party_rights", "third_party_obligations"]
-    },
-    {
-      "name": "formal_requirements",
-      "subcategories": ["requires_written_document", "requires_registration", "requires_consent", "requires_notarization"]
-    },
-    {
-      "name": "limitations_or_conditions",
-      "subcategories": ["substantive_limitations", "procedural_limitations", "temporal_limitations", "geographical_limitations"]
-    },
-    {
-      "name": "public_policy_limits",
-      "subcategories": ["limited_by_public_policy", "voided_by_public_policy"]
-    },
-    {
-      "name": "remedies_consequences",
-      "subcategories": ["private_law_remedies", "public_law_remedies", "administrative_law_remedies", "criminal_law_remedies", "international_law_remedies"]
-    },
-    {
-      "name": "procedural_vs_substantive_nature",
-      "subcategories": ["is_procedural_or_substantive"]
-    },
-    {
-      "name": "direct_or_derivative_rights",
-      "subcategories": ["confers_rights_directly", "confers_rights_derivatively", "confers_obligations_directly", "confers_obligations_derivatively"]
-    },
-    {
-      "name": "private_vs_public_law",
-      "subcategories": ["is_private_or_public_law"]
-    },
-    {
-      "name": "legal_fictions_presumptions",
-      "subcategories": ["relies_on_fictions"]
-    },
-    {
-      "name": "systemic_classification",
-      "subcategories": ["legal_field_classification", "internal_classification", "authoritative_definition"]
-    },
-    {
-      "name": "cross_border_effects",
-      "subcategories": ["international_implications"]
-    }
-  ]
-}

src/grading_template.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21b094fa1c104c990de260611e55e7ae8914ea35cc6048049198056e0b7057a8
-size 186660

 version https://git-lfs.github.com/spec/v1
+oid sha256:27b4eb36dd848643a26b4d9ec102382c3ae19d36d859132149d82f927458fcec
+size 188408

src/streamlit_app.py CHANGED Viewed

@@ -218,55 +218,6 @@ def format_snake_case(text):
     """Convert snake_case to Title Case"""
     return ' '.join(word.capitalize() for word in text.split('_'))
-@st.cache_data
-def load_category_order():
-    """Load category order configuration from JSON file"""
-    try:
-        config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'category_order.json')
-        with open(config_path, 'r') as f:
-            return json.load(f)
-    except Exception as e:
-        st.warning(f"⚠️ Could not load category order config: {str(e)}. Using default alphabetical order.")
-        return {"categories": []}
-def sort_by_config_order(items, config_order, default_to_end=True):
-    """
-    Sort items according to a configured order.
-    Args:
-        items: List of items to sort
-        config_order: List defining the desired order
-        default_to_end: If True, append items not in config at the end; if False, exclude them
-    Returns:
-        Sorted list of items
-    """
-    if not config_order:
-        # Fallback to alphabetical if no config
-        return sorted(items)
-    # Create a mapping of item -> position in config
-    order_map = {item: idx for idx, item in enumerate(config_order)}
-    # Separate items into those in config and those not
-    in_config = []
-    not_in_config = []
-    for item in items:
-        if item in order_map:
-            in_config.append(item)
-        else:
-            not_in_config.append(item)
-    # Sort items that are in config by their configured position
-    in_config.sort(key=lambda x: order_map[x])
-    # Combine: config items first, then others (sorted alphabetically) if default_to_end
-    if default_to_end:
-        return in_config + sorted(not_in_config)
-    else:
-        return in_config
 def inject_tooltip_css():
     """Inject CSS to style radio button captions"""
     caption_css = """
@@ -711,18 +662,10 @@ class Category:
                         (df['answer'] != "Unknown")]
         # Get all subcategories for this term-category pair (excluding Unknown answers)
-        subcategory_names = filtered_df['subcategory'].unique().tolist()
-        # Load category order config and sort subcategories accordingly
-        config = load_category_order()
-        subcategory_order = []
-        for cat in config.get('categories', []):
-            if cat['name'] == category_name:
-                subcategory_order = cat['subcategories']
-                break
-        # Sort subcategories using config order (items not in config are appended at the end)
-        subcategory_names = sort_by_config_order(subcategory_names, subcategory_order, default_to_end=True)
         # Create Subcategory instances (only for non-Unknown answers)
         self.subcategories = [
@@ -738,15 +681,10 @@ class Term:
         self.term_name = term_name
         self.formatted_name = format_snake_case(term_name)
-        # Get all categories for this term
-        category_names = df[df['term'] == term_name]['category'].unique().tolist()
-        # Load category order config and sort categories accordingly
-        config = load_category_order()
-        category_order = [cat['name'] for cat in config.get('categories', [])]
-        # Sort categories using config order (items not in config are appended at the end)
-        category_names = sort_by_config_order(category_names, category_order, default_to_end=True)
         # Create Category instances
         self.categories = [
@@ -765,29 +703,15 @@ class Term:
 @st.cache_data
 def get_term_category_pairs(df):
     """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
-    # Get all unique term-category pairs (without sorting yet)
-    all_pairs = df[['term', 'category']].drop_duplicates().values.tolist()
-    # Filter out categories that have no subcategories after filtering Unknown answers
-    filtered_pairs = [(term, category) for term, category in all_pairs
-                      if category_has_subcategories(term, category, df)]
-    # Load category order config
-    config = load_category_order()
-    category_order = [cat['name'] for cat in config.get('categories', [])]
-    # Sort pairs by category order (preserving term order within each category)
-    # Create order mapping
-    order_map = {cat: idx for idx, cat in enumerate(category_order)}
-    # Sort: first by category order (using config), then by term name alphabetically
-    def sort_key(pair):
-        term, category = pair
-        # Get category position from config (or large number if not in config)
-        cat_position = order_map.get(category, len(category_order))
-        return (cat_position, term)
-    filtered_pairs.sort(key=sort_key)
     return filtered_pairs

     """Convert snake_case to Title Case"""
     return ' '.join(word.capitalize() for word in text.split('_'))
 def inject_tooltip_css():
     """Inject CSS to style radio button captions"""
     caption_css = """
                         (df['answer'] != "Unknown")]
         # Get all subcategories for this term-category pair (excluding Unknown answers)
+        # Sort by subcategory_index to maintain the configured order
+        subcat_data = filtered_df[['subcategory', 'subcategory_index']].drop_duplicates()
+        subcat_data = subcat_data.sort_values('subcategory_index')
+        subcategory_names = subcat_data['subcategory'].tolist()
         # Create Subcategory instances (only for non-Unknown answers)
         self.subcategories = [
         self.term_name = term_name
         self.formatted_name = format_snake_case(term_name)
+        # Get all categories for this term, sorted by category_index
+        cat_data = df[df['term'] == term_name][['category', 'category_index']].drop_duplicates()
+        cat_data = cat_data.sort_values('category_index')
+        category_names = cat_data['category'].tolist()
         # Create Category instances
         self.categories = [
 @st.cache_data
 def get_term_category_pairs(df):
     """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
+    # Get all unique term-category pairs with their category indexes
+    all_pairs_df = df[['term', 'category', 'category_index']].drop_duplicates()
+    # Sort by term name and category_index
+    all_pairs_df = all_pairs_df.sort_values(['term', 'category_index'])
+    # Filter out categories that have no subcategories after filtering Unknown answers
+    filtered_pairs = [(row['term'], row['category']) for _, row in all_pairs_df.iterrows()
+                      if category_has_subcategories(row['term'], row['category'], df)]
     return filtered_pairs