Fredrik Sitje commited on
Commit
c3069c3
·
1 Parent(s): f1694ed

Enhance README.md with new indexing columns for categories and subcategories. Removed deprecated category order configuration file and updated Streamlit app to sort categories and subcategories based on their respective indices. This improves the display order and maintains consistency in data presentation.

Browse files
README.md CHANGED
@@ -58,10 +58,13 @@ To add a new jurisdiction to the repository:
58
  - **Required Structure:** The parquet file must contain the following columns:
59
  - `term` (string) - The legal term being assessed
60
  - `category` (string) - Category within the term
 
61
  - `subcategory` (string) - Subcategory within the category
 
62
  - `question` (string) - The question being asked
63
  - `answer` (string) - The AI-generated answer to be graded
64
  - **Special Values:** Answers can be `"Unknown."` or `"Unknown"` to indicate unknown/unavailable information (these are automatically scored as "Irrelevant / NA")
 
65
 
66
  3. **Create users directory:**
67
  - Create `{jurisdiction}/users/` directory with an empty `.gitkeep` file (so the directory is tracked in Git)
 
58
  - **Required Structure:** The parquet file must contain the following columns:
59
  - `term` (string) - The legal term being assessed
60
  - `category` (string) - Category within the term
61
+ - `category_index` (integer) - Display order for categories (lower numbers appear first)
62
  - `subcategory` (string) - Subcategory within the category
63
+ - `subcategory_index` (integer) - Display order for subcategories within each category (lower numbers appear first)
64
  - `question` (string) - The question being asked
65
  - `answer` (string) - The AI-generated answer to be graded
66
  - **Special Values:** Answers can be `"Unknown."` or `"Unknown"` to indicate unknown/unavailable information (these are automatically scored as "Irrelevant / NA")
67
+ - **Display Order:** The `category_index` and `subcategory_index` columns control the order in which categories and subcategories are displayed in the app. Items with lower index values appear first.
68
 
69
  3. **Create users directory:**
70
  - Create `{jurisdiction}/users/` directory with an empty `.gitkeep` file (so the directory is tracked in Git)
config/category_order.json DELETED
@@ -1,69 +0,0 @@
1
- {
2
- "categories": [
3
- {
4
- "name": "purpose",
5
- "subcategories": ["purpose_goal", "historical_background", "historical_influence", "origin_system"]
6
- },
7
- {
8
- "name": "legal_effect",
9
- "subcategories": ["creates_right", "modifies_right", "extinguishes_right", "creates_obligation", "modifies_obligation", "extinguishes_obligation", "creates_status", "modifies_status", "extinguishes_status", "normative_structure", "judicial_interpretation"]
10
- },
11
- {
12
- "name": "subjects",
13
- "subcategories": ["natural_persons", "legal_entities", "state", "third_parties", "private_property", "public_property"]
14
- },
15
- {
16
- "name": "legal_source",
17
- "subcategories": ["based_on_statute", "based_on_case_law", "based_on_custom", "based_on_regulation"]
18
- },
19
- {
20
- "name": "enforceability",
21
- "subcategories": ["enforceable_by_whom", "enforceable_how"]
22
- },
23
- {
24
- "name": "third_parties",
25
- "subcategories": ["third_party_rights", "third_party_obligations"]
26
- },
27
- {
28
- "name": "formal_requirements",
29
- "subcategories": ["requires_written_document", "requires_registration", "requires_consent", "requires_notarization"]
30
- },
31
- {
32
- "name": "limitations_or_conditions",
33
- "subcategories": ["substantive_limitations", "procedural_limitations", "temporal_limitations", "geographical_limitations"]
34
- },
35
- {
36
- "name": "public_policy_limits",
37
- "subcategories": ["limited_by_public_policy", "voided_by_public_policy"]
38
- },
39
- {
40
- "name": "remedies_consequences",
41
- "subcategories": ["private_law_remedies", "public_law_remedies", "administrative_law_remedies", "criminal_law_remedies", "international_law_remedies"]
42
- },
43
- {
44
- "name": "procedural_vs_substantive_nature",
45
- "subcategories": ["is_procedural_or_substantive"]
46
- },
47
- {
48
- "name": "direct_or_derivative_rights",
49
- "subcategories": ["confers_rights_directly", "confers_rights_derivatively", "confers_obligations_directly", "confers_obligations_derivatively"]
50
- },
51
- {
52
- "name": "private_vs_public_law",
53
- "subcategories": ["is_private_or_public_law"]
54
- },
55
- {
56
- "name": "legal_fictions_presumptions",
57
- "subcategories": ["relies_on_fictions"]
58
- },
59
- {
60
- "name": "systemic_classification",
61
- "subcategories": ["legal_field_classification", "internal_classification", "authoritative_definition"]
62
- },
63
- {
64
- "name": "cross_border_effects",
65
- "subcategories": ["international_implications"]
66
- }
67
- ]
68
- }
69
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/grading_template.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21b094fa1c104c990de260611e55e7ae8914ea35cc6048049198056e0b7057a8
3
- size 186660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27b4eb36dd848643a26b4d9ec102382c3ae19d36d859132149d82f927458fcec
3
+ size 188408
src/streamlit_app.py CHANGED
@@ -218,55 +218,6 @@ def format_snake_case(text):
218
  """Convert snake_case to Title Case"""
219
  return ' '.join(word.capitalize() for word in text.split('_'))
220
 
221
- @st.cache_data
222
- def load_category_order():
223
- """Load category order configuration from JSON file"""
224
- try:
225
- config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'category_order.json')
226
- with open(config_path, 'r') as f:
227
- return json.load(f)
228
- except Exception as e:
229
- st.warning(f"⚠️ Could not load category order config: {str(e)}. Using default alphabetical order.")
230
- return {"categories": []}
231
-
232
- def sort_by_config_order(items, config_order, default_to_end=True):
233
- """
234
- Sort items according to a configured order.
235
-
236
- Args:
237
- items: List of items to sort
238
- config_order: List defining the desired order
239
- default_to_end: If True, append items not in config at the end; if False, exclude them
240
-
241
- Returns:
242
- Sorted list of items
243
- """
244
- if not config_order:
245
- # Fallback to alphabetical if no config
246
- return sorted(items)
247
-
248
- # Create a mapping of item -> position in config
249
- order_map = {item: idx for idx, item in enumerate(config_order)}
250
-
251
- # Separate items into those in config and those not
252
- in_config = []
253
- not_in_config = []
254
-
255
- for item in items:
256
- if item in order_map:
257
- in_config.append(item)
258
- else:
259
- not_in_config.append(item)
260
-
261
- # Sort items that are in config by their configured position
262
- in_config.sort(key=lambda x: order_map[x])
263
-
264
- # Combine: config items first, then others (sorted alphabetically) if default_to_end
265
- if default_to_end:
266
- return in_config + sorted(not_in_config)
267
- else:
268
- return in_config
269
-
270
  def inject_tooltip_css():
271
  """Inject CSS to style radio button captions"""
272
  caption_css = """
@@ -711,18 +662,10 @@ class Category:
711
  (df['answer'] != "Unknown")]
712
 
713
  # Get all subcategories for this term-category pair (excluding Unknown answers)
714
- subcategory_names = filtered_df['subcategory'].unique().tolist()
715
-
716
- # Load category order config and sort subcategories accordingly
717
- config = load_category_order()
718
- subcategory_order = []
719
- for cat in config.get('categories', []):
720
- if cat['name'] == category_name:
721
- subcategory_order = cat['subcategories']
722
- break
723
-
724
- # Sort subcategories using config order (items not in config are appended at the end)
725
- subcategory_names = sort_by_config_order(subcategory_names, subcategory_order, default_to_end=True)
726
 
727
  # Create Subcategory instances (only for non-Unknown answers)
728
  self.subcategories = [
@@ -738,15 +681,10 @@ class Term:
738
  self.term_name = term_name
739
  self.formatted_name = format_snake_case(term_name)
740
 
741
- # Get all categories for this term
742
- category_names = df[df['term'] == term_name]['category'].unique().tolist()
743
-
744
- # Load category order config and sort categories accordingly
745
- config = load_category_order()
746
- category_order = [cat['name'] for cat in config.get('categories', [])]
747
-
748
- # Sort categories using config order (items not in config are appended at the end)
749
- category_names = sort_by_config_order(category_names, category_order, default_to_end=True)
750
 
751
  # Create Category instances
752
  self.categories = [
@@ -765,29 +703,15 @@ class Term:
765
  @st.cache_data
766
  def get_term_category_pairs(df):
767
  """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
768
- # Get all unique term-category pairs (without sorting yet)
769
- all_pairs = df[['term', 'category']].drop_duplicates().values.tolist()
770
-
771
- # Filter out categories that have no subcategories after filtering Unknown answers
772
- filtered_pairs = [(term, category) for term, category in all_pairs
773
- if category_has_subcategories(term, category, df)]
774
 
775
- # Load category order config
776
- config = load_category_order()
777
- category_order = [cat['name'] for cat in config.get('categories', [])]
778
 
779
- # Sort pairs by category order (preserving term order within each category)
780
- # Create order mapping
781
- order_map = {cat: idx for idx, cat in enumerate(category_order)}
782
-
783
- # Sort: first by category order (using config), then by term name alphabetically
784
- def sort_key(pair):
785
- term, category = pair
786
- # Get category position from config (or large number if not in config)
787
- cat_position = order_map.get(category, len(category_order))
788
- return (cat_position, term)
789
-
790
- filtered_pairs.sort(key=sort_key)
791
 
792
  return filtered_pairs
793
 
 
218
  """Convert snake_case to Title Case"""
219
  return ' '.join(word.capitalize() for word in text.split('_'))
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  def inject_tooltip_css():
222
  """Inject CSS to style radio button captions"""
223
  caption_css = """
 
662
  (df['answer'] != "Unknown")]
663
 
664
  # Get all subcategories for this term-category pair (excluding Unknown answers)
665
+ # Sort by subcategory_index to maintain the configured order
666
+ subcat_data = filtered_df[['subcategory', 'subcategory_index']].drop_duplicates()
667
+ subcat_data = subcat_data.sort_values('subcategory_index')
668
+ subcategory_names = subcat_data['subcategory'].tolist()
 
 
 
 
 
 
 
 
669
 
670
  # Create Subcategory instances (only for non-Unknown answers)
671
  self.subcategories = [
 
681
  self.term_name = term_name
682
  self.formatted_name = format_snake_case(term_name)
683
 
684
+ # Get all categories for this term, sorted by category_index
685
+ cat_data = df[df['term'] == term_name][['category', 'category_index']].drop_duplicates()
686
+ cat_data = cat_data.sort_values('category_index')
687
+ category_names = cat_data['category'].tolist()
 
 
 
 
 
688
 
689
  # Create Category instances
690
  self.categories = [
 
703
  @st.cache_data
704
  def get_term_category_pairs(df):
705
  """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
706
+ # Get all unique term-category pairs with their category indexes
707
+ all_pairs_df = df[['term', 'category', 'category_index']].drop_duplicates()
 
 
 
 
708
 
709
+ # Sort by term name and category_index
710
+ all_pairs_df = all_pairs_df.sort_values(['term', 'category_index'])
 
711
 
712
+ # Filter out categories that have no subcategories after filtering Unknown answers
713
+ filtered_pairs = [(row['term'], row['category']) for _, row in all_pairs_df.iterrows()
714
+ if category_has_subcategories(row['term'], row['category'], df)]
 
 
 
 
 
 
 
 
 
715
 
716
  return filtered_pairs
717