Spaces:
Running
Running
Fredrik Sitje commited on
Commit ·
c3069c3
1
Parent(s): f1694ed
Enhance README.md with new indexing columns for categories and subcategories. Removed deprecated category order configuration file and updated Streamlit app to sort categories and subcategories based on their respective indices. This improves the display order and maintains consistency in data presentation.
Browse files- README.md +3 -0
- config/category_order.json +0 -69
- src/grading_template.parquet +2 -2
- src/streamlit_app.py +15 -91
README.md
CHANGED
|
@@ -58,10 +58,13 @@ To add a new jurisdiction to the repository:
|
|
| 58 |
- **Required Structure:** The parquet file must contain the following columns:
|
| 59 |
- `term` (string) - The legal term being assessed
|
| 60 |
- `category` (string) - Category within the term
|
|
|
|
| 61 |
- `subcategory` (string) - Subcategory within the category
|
|
|
|
| 62 |
- `question` (string) - The question being asked
|
| 63 |
- `answer` (string) - The AI-generated answer to be graded
|
| 64 |
- **Special Values:** Answers can be `"Unknown."` or `"Unknown"` to indicate unknown/unavailable information (these are automatically scored as "Irrelevant / NA")
|
|
|
|
| 65 |
|
| 66 |
3. **Create users directory:**
|
| 67 |
- Create `{jurisdiction}/users/` directory with an empty `.gitkeep` file (so the directory is tracked in Git)
|
|
|
|
| 58 |
- **Required Structure:** The parquet file must contain the following columns:
|
| 59 |
- `term` (string) - The legal term being assessed
|
| 60 |
- `category` (string) - Category within the term
|
| 61 |
+
- `category_index` (integer) - Display order for categories (lower numbers appear first)
|
| 62 |
- `subcategory` (string) - Subcategory within the category
|
| 63 |
+
- `subcategory_index` (integer) - Display order for subcategories within each category (lower numbers appear first)
|
| 64 |
- `question` (string) - The question being asked
|
| 65 |
- `answer` (string) - The AI-generated answer to be graded
|
| 66 |
- **Special Values:** Answers can be `"Unknown."` or `"Unknown"` to indicate unknown/unavailable information (these are automatically scored as "Irrelevant / NA")
|
| 67 |
+
- **Display Order:** The `category_index` and `subcategory_index` columns control the order in which categories and subcategories are displayed in the app. Items with lower index values appear first.
|
| 68 |
|
| 69 |
3. **Create users directory:**
|
| 70 |
- Create `{jurisdiction}/users/` directory with an empty `.gitkeep` file (so the directory is tracked in Git)
|
config/category_order.json
DELETED
|
@@ -1,69 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"categories": [
|
| 3 |
-
{
|
| 4 |
-
"name": "purpose",
|
| 5 |
-
"subcategories": ["purpose_goal", "historical_background", "historical_influence", "origin_system"]
|
| 6 |
-
},
|
| 7 |
-
{
|
| 8 |
-
"name": "legal_effect",
|
| 9 |
-
"subcategories": ["creates_right", "modifies_right", "extinguishes_right", "creates_obligation", "modifies_obligation", "extinguishes_obligation", "creates_status", "modifies_status", "extinguishes_status", "normative_structure", "judicial_interpretation"]
|
| 10 |
-
},
|
| 11 |
-
{
|
| 12 |
-
"name": "subjects",
|
| 13 |
-
"subcategories": ["natural_persons", "legal_entities", "state", "third_parties", "private_property", "public_property"]
|
| 14 |
-
},
|
| 15 |
-
{
|
| 16 |
-
"name": "legal_source",
|
| 17 |
-
"subcategories": ["based_on_statute", "based_on_case_law", "based_on_custom", "based_on_regulation"]
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"name": "enforceability",
|
| 21 |
-
"subcategories": ["enforceable_by_whom", "enforceable_how"]
|
| 22 |
-
},
|
| 23 |
-
{
|
| 24 |
-
"name": "third_parties",
|
| 25 |
-
"subcategories": ["third_party_rights", "third_party_obligations"]
|
| 26 |
-
},
|
| 27 |
-
{
|
| 28 |
-
"name": "formal_requirements",
|
| 29 |
-
"subcategories": ["requires_written_document", "requires_registration", "requires_consent", "requires_notarization"]
|
| 30 |
-
},
|
| 31 |
-
{
|
| 32 |
-
"name": "limitations_or_conditions",
|
| 33 |
-
"subcategories": ["substantive_limitations", "procedural_limitations", "temporal_limitations", "geographical_limitations"]
|
| 34 |
-
},
|
| 35 |
-
{
|
| 36 |
-
"name": "public_policy_limits",
|
| 37 |
-
"subcategories": ["limited_by_public_policy", "voided_by_public_policy"]
|
| 38 |
-
},
|
| 39 |
-
{
|
| 40 |
-
"name": "remedies_consequences",
|
| 41 |
-
"subcategories": ["private_law_remedies", "public_law_remedies", "administrative_law_remedies", "criminal_law_remedies", "international_law_remedies"]
|
| 42 |
-
},
|
| 43 |
-
{
|
| 44 |
-
"name": "procedural_vs_substantive_nature",
|
| 45 |
-
"subcategories": ["is_procedural_or_substantive"]
|
| 46 |
-
},
|
| 47 |
-
{
|
| 48 |
-
"name": "direct_or_derivative_rights",
|
| 49 |
-
"subcategories": ["confers_rights_directly", "confers_rights_derivatively", "confers_obligations_directly", "confers_obligations_derivatively"]
|
| 50 |
-
},
|
| 51 |
-
{
|
| 52 |
-
"name": "private_vs_public_law",
|
| 53 |
-
"subcategories": ["is_private_or_public_law"]
|
| 54 |
-
},
|
| 55 |
-
{
|
| 56 |
-
"name": "legal_fictions_presumptions",
|
| 57 |
-
"subcategories": ["relies_on_fictions"]
|
| 58 |
-
},
|
| 59 |
-
{
|
| 60 |
-
"name": "systemic_classification",
|
| 61 |
-
"subcategories": ["legal_field_classification", "internal_classification", "authoritative_definition"]
|
| 62 |
-
},
|
| 63 |
-
{
|
| 64 |
-
"name": "cross_border_effects",
|
| 65 |
-
"subcategories": ["international_implications"]
|
| 66 |
-
}
|
| 67 |
-
]
|
| 68 |
-
}
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/grading_template.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27b4eb36dd848643a26b4d9ec102382c3ae19d36d859132149d82f927458fcec
|
| 3 |
+
size 188408
|
src/streamlit_app.py
CHANGED
|
@@ -218,55 +218,6 @@ def format_snake_case(text):
|
|
| 218 |
"""Convert snake_case to Title Case"""
|
| 219 |
return ' '.join(word.capitalize() for word in text.split('_'))
|
| 220 |
|
| 221 |
-
@st.cache_data
|
| 222 |
-
def load_category_order():
|
| 223 |
-
"""Load category order configuration from JSON file"""
|
| 224 |
-
try:
|
| 225 |
-
config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'category_order.json')
|
| 226 |
-
with open(config_path, 'r') as f:
|
| 227 |
-
return json.load(f)
|
| 228 |
-
except Exception as e:
|
| 229 |
-
st.warning(f"⚠️ Could not load category order config: {str(e)}. Using default alphabetical order.")
|
| 230 |
-
return {"categories": []}
|
| 231 |
-
|
| 232 |
-
def sort_by_config_order(items, config_order, default_to_end=True):
|
| 233 |
-
"""
|
| 234 |
-
Sort items according to a configured order.
|
| 235 |
-
|
| 236 |
-
Args:
|
| 237 |
-
items: List of items to sort
|
| 238 |
-
config_order: List defining the desired order
|
| 239 |
-
default_to_end: If True, append items not in config at the end; if False, exclude them
|
| 240 |
-
|
| 241 |
-
Returns:
|
| 242 |
-
Sorted list of items
|
| 243 |
-
"""
|
| 244 |
-
if not config_order:
|
| 245 |
-
# Fallback to alphabetical if no config
|
| 246 |
-
return sorted(items)
|
| 247 |
-
|
| 248 |
-
# Create a mapping of item -> position in config
|
| 249 |
-
order_map = {item: idx for idx, item in enumerate(config_order)}
|
| 250 |
-
|
| 251 |
-
# Separate items into those in config and those not
|
| 252 |
-
in_config = []
|
| 253 |
-
not_in_config = []
|
| 254 |
-
|
| 255 |
-
for item in items:
|
| 256 |
-
if item in order_map:
|
| 257 |
-
in_config.append(item)
|
| 258 |
-
else:
|
| 259 |
-
not_in_config.append(item)
|
| 260 |
-
|
| 261 |
-
# Sort items that are in config by their configured position
|
| 262 |
-
in_config.sort(key=lambda x: order_map[x])
|
| 263 |
-
|
| 264 |
-
# Combine: config items first, then others (sorted alphabetically) if default_to_end
|
| 265 |
-
if default_to_end:
|
| 266 |
-
return in_config + sorted(not_in_config)
|
| 267 |
-
else:
|
| 268 |
-
return in_config
|
| 269 |
-
|
| 270 |
def inject_tooltip_css():
|
| 271 |
"""Inject CSS to style radio button captions"""
|
| 272 |
caption_css = """
|
|
@@ -711,18 +662,10 @@ class Category:
|
|
| 711 |
(df['answer'] != "Unknown")]
|
| 712 |
|
| 713 |
# Get all subcategories for this term-category pair (excluding Unknown answers)
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
subcategory_order = []
|
| 719 |
-
for cat in config.get('categories', []):
|
| 720 |
-
if cat['name'] == category_name:
|
| 721 |
-
subcategory_order = cat['subcategories']
|
| 722 |
-
break
|
| 723 |
-
|
| 724 |
-
# Sort subcategories using config order (items not in config are appended at the end)
|
| 725 |
-
subcategory_names = sort_by_config_order(subcategory_names, subcategory_order, default_to_end=True)
|
| 726 |
|
| 727 |
# Create Subcategory instances (only for non-Unknown answers)
|
| 728 |
self.subcategories = [
|
|
@@ -738,15 +681,10 @@ class Term:
|
|
| 738 |
self.term_name = term_name
|
| 739 |
self.formatted_name = format_snake_case(term_name)
|
| 740 |
|
| 741 |
-
# Get all categories for this term
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
config = load_category_order()
|
| 746 |
-
category_order = [cat['name'] for cat in config.get('categories', [])]
|
| 747 |
-
|
| 748 |
-
# Sort categories using config order (items not in config are appended at the end)
|
| 749 |
-
category_names = sort_by_config_order(category_names, category_order, default_to_end=True)
|
| 750 |
|
| 751 |
# Create Category instances
|
| 752 |
self.categories = [
|
|
@@ -765,29 +703,15 @@ class Term:
|
|
| 765 |
@st.cache_data
|
| 766 |
def get_term_category_pairs(df):
|
| 767 |
"""Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
|
| 768 |
-
# Get all unique term-category pairs
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
# Filter out categories that have no subcategories after filtering Unknown answers
|
| 772 |
-
filtered_pairs = [(term, category) for term, category in all_pairs
|
| 773 |
-
if category_has_subcategories(term, category, df)]
|
| 774 |
|
| 775 |
-
#
|
| 776 |
-
|
| 777 |
-
category_order = [cat['name'] for cat in config.get('categories', [])]
|
| 778 |
|
| 779 |
-
#
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
# Sort: first by category order (using config), then by term name alphabetically
|
| 784 |
-
def sort_key(pair):
|
| 785 |
-
term, category = pair
|
| 786 |
-
# Get category position from config (or large number if not in config)
|
| 787 |
-
cat_position = order_map.get(category, len(category_order))
|
| 788 |
-
return (cat_position, term)
|
| 789 |
-
|
| 790 |
-
filtered_pairs.sort(key=sort_key)
|
| 791 |
|
| 792 |
return filtered_pairs
|
| 793 |
|
|
|
|
| 218 |
"""Convert snake_case to Title Case"""
|
| 219 |
return ' '.join(word.capitalize() for word in text.split('_'))
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
def inject_tooltip_css():
|
| 222 |
"""Inject CSS to style radio button captions"""
|
| 223 |
caption_css = """
|
|
|
|
| 662 |
(df['answer'] != "Unknown")]
|
| 663 |
|
| 664 |
# Get all subcategories for this term-category pair (excluding Unknown answers)
|
| 665 |
+
# Sort by subcategory_index to maintain the configured order
|
| 666 |
+
subcat_data = filtered_df[['subcategory', 'subcategory_index']].drop_duplicates()
|
| 667 |
+
subcat_data = subcat_data.sort_values('subcategory_index')
|
| 668 |
+
subcategory_names = subcat_data['subcategory'].tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 669 |
|
| 670 |
# Create Subcategory instances (only for non-Unknown answers)
|
| 671 |
self.subcategories = [
|
|
|
|
| 681 |
self.term_name = term_name
|
| 682 |
self.formatted_name = format_snake_case(term_name)
|
| 683 |
|
| 684 |
+
# Get all categories for this term, sorted by category_index
|
| 685 |
+
cat_data = df[df['term'] == term_name][['category', 'category_index']].drop_duplicates()
|
| 686 |
+
cat_data = cat_data.sort_values('category_index')
|
| 687 |
+
category_names = cat_data['category'].tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 688 |
|
| 689 |
# Create Category instances
|
| 690 |
self.categories = [
|
|
|
|
| 703 |
@st.cache_data
|
| 704 |
def get_term_category_pairs(df):
|
| 705 |
"""Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
|
| 706 |
+
# Get all unique term-category pairs with their category indexes
|
| 707 |
+
all_pairs_df = df[['term', 'category', 'category_index']].drop_duplicates()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
|
| 709 |
+
# Sort by term name and category_index
|
| 710 |
+
all_pairs_df = all_pairs_df.sort_values(['term', 'category_index'])
|
|
|
|
| 711 |
|
| 712 |
+
# Filter out categories that have no subcategories after filtering Unknown answers
|
| 713 |
+
filtered_pairs = [(row['term'], row['category']) for _, row in all_pairs_df.iterrows()
|
| 714 |
+
if category_has_subcategories(row['term'], row['category'], df)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
|
| 716 |
return filtered_pairs
|
| 717 |
|