Fredrik Sitje commited on
Commit
f1694ed
·
1 Parent(s): 110375c

Added sorting order based on the order the TransLegal Client uses.

Browse files
Files changed (2) hide show
  1. config/category_order.json +69 -0
  2. src/streamlit_app.py +93 -7
config/category_order.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "categories": [
3
+ {
4
+ "name": "purpose",
5
+ "subcategories": ["purpose_goal", "historical_background", "historical_influence", "origin_system"]
6
+ },
7
+ {
8
+ "name": "legal_effect",
9
+ "subcategories": ["creates_right", "modifies_right", "extinguishes_right", "creates_obligation", "modifies_obligation", "extinguishes_obligation", "creates_status", "modifies_status", "extinguishes_status", "normative_structure", "judicial_interpretation"]
10
+ },
11
+ {
12
+ "name": "subjects",
13
+ "subcategories": ["natural_persons", "legal_entities", "state", "third_parties", "private_property", "public_property"]
14
+ },
15
+ {
16
+ "name": "legal_source",
17
+ "subcategories": ["based_on_statute", "based_on_case_law", "based_on_custom", "based_on_regulation"]
18
+ },
19
+ {
20
+ "name": "enforceability",
21
+ "subcategories": ["enforceable_by_whom", "enforceable_how"]
22
+ },
23
+ {
24
+ "name": "third_parties",
25
+ "subcategories": ["third_party_rights", "third_party_obligations"]
26
+ },
27
+ {
28
+ "name": "formal_requirements",
29
+ "subcategories": ["requires_written_document", "requires_registration", "requires_consent", "requires_notarization"]
30
+ },
31
+ {
32
+ "name": "limitations_or_conditions",
33
+ "subcategories": ["substantive_limitations", "procedural_limitations", "temporal_limitations", "geographical_limitations"]
34
+ },
35
+ {
36
+ "name": "public_policy_limits",
37
+ "subcategories": ["limited_by_public_policy", "voided_by_public_policy"]
38
+ },
39
+ {
40
+ "name": "remedies_consequences",
41
+ "subcategories": ["private_law_remedies", "public_law_remedies", "administrative_law_remedies", "criminal_law_remedies", "international_law_remedies"]
42
+ },
43
+ {
44
+ "name": "procedural_vs_substantive_nature",
45
+ "subcategories": ["is_procedural_or_substantive"]
46
+ },
47
+ {
48
+ "name": "direct_or_derivative_rights",
49
+ "subcategories": ["confers_rights_directly", "confers_rights_derivatively", "confers_obligations_directly", "confers_obligations_derivatively"]
50
+ },
51
+ {
52
+ "name": "private_vs_public_law",
53
+ "subcategories": ["is_private_or_public_law"]
54
+ },
55
+ {
56
+ "name": "legal_fictions_presumptions",
57
+ "subcategories": ["relies_on_fictions"]
58
+ },
59
+ {
60
+ "name": "systemic_classification",
61
+ "subcategories": ["legal_field_classification", "internal_classification", "authoritative_definition"]
62
+ },
63
+ {
64
+ "name": "cross_border_effects",
65
+ "subcategories": ["international_implications"]
66
+ }
67
+ ]
68
+ }
69
+
src/streamlit_app.py CHANGED
@@ -218,6 +218,55 @@ def format_snake_case(text):
218
  """Convert snake_case to Title Case"""
219
  return ' '.join(word.capitalize() for word in text.split('_'))
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  def inject_tooltip_css():
222
  """Inject CSS to style radio button captions"""
223
  caption_css = """
@@ -662,8 +711,18 @@ class Category:
662
  (df['answer'] != "Unknown")]
663
 
664
  # Get all subcategories for this term-category pair (excluding Unknown answers)
665
- subcategory_names = filtered_df['subcategory'].unique()
666
- subcategory_names = sorted(subcategory_names.tolist())
 
 
 
 
 
 
 
 
 
 
667
 
668
  # Create Subcategory instances (only for non-Unknown answers)
669
  self.subcategories = [
@@ -680,8 +739,14 @@ class Term:
680
  self.formatted_name = format_snake_case(term_name)
681
 
682
  # Get all categories for this term
683
- category_names = df[df['term'] == term_name]['category'].unique()
684
- category_names = sorted(category_names.tolist())
 
 
 
 
 
 
685
 
686
  # Create Category instances
687
  self.categories = [
@@ -700,10 +765,31 @@ class Term:
700
  @st.cache_data
701
  def get_term_category_pairs(df):
702
  """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
 
 
 
703
  # Filter out categories that have no subcategories after filtering Unknown answers
704
- all_pairs = df[['term', 'category']].drop_duplicates().sort_values(['term', 'category']).values.tolist()
705
- return [(term, category) for term, category in all_pairs
706
- if category_has_subcategories(term, category, df)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
707
 
708
  # Cache for Term instances (keyed by jurisdiction and term_name)
709
  term_cache = {}
 
218
  """Convert snake_case to Title Case"""
219
  return ' '.join(word.capitalize() for word in text.split('_'))
220
 
221
+ @st.cache_data
222
+ def load_category_order():
223
+ """Load category order configuration from JSON file"""
224
+ try:
225
+ config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'category_order.json')
226
+ with open(config_path, 'r') as f:
227
+ return json.load(f)
228
+ except Exception as e:
229
+ st.warning(f"⚠️ Could not load category order config: {str(e)}. Using default alphabetical order.")
230
+ return {"categories": []}
231
+
232
+ def sort_by_config_order(items, config_order, default_to_end=True):
233
+ """
234
+ Sort items according to a configured order.
235
+
236
+ Args:
237
+ items: List of items to sort
238
+ config_order: List defining the desired order
239
+ default_to_end: If True, append items not in config at the end; if False, exclude them
240
+
241
+ Returns:
242
+ Sorted list of items
243
+ """
244
+ if not config_order:
245
+ # Fallback to alphabetical if no config
246
+ return sorted(items)
247
+
248
+ # Create a mapping of item -> position in config
249
+ order_map = {item: idx for idx, item in enumerate(config_order)}
250
+
251
+ # Separate items into those in config and those not
252
+ in_config = []
253
+ not_in_config = []
254
+
255
+ for item in items:
256
+ if item in order_map:
257
+ in_config.append(item)
258
+ else:
259
+ not_in_config.append(item)
260
+
261
+ # Sort items that are in config by their configured position
262
+ in_config.sort(key=lambda x: order_map[x])
263
+
264
+ # Combine: config items first, then others (sorted alphabetically) if default_to_end
265
+ if default_to_end:
266
+ return in_config + sorted(not_in_config)
267
+ else:
268
+ return in_config
269
+
270
  def inject_tooltip_css():
271
  """Inject CSS to style radio button captions"""
272
  caption_css = """
 
711
  (df['answer'] != "Unknown")]
712
 
713
  # Get all subcategories for this term-category pair (excluding Unknown answers)
714
+ subcategory_names = filtered_df['subcategory'].unique().tolist()
715
+
716
+ # Load category order config and sort subcategories accordingly
717
+ config = load_category_order()
718
+ subcategory_order = []
719
+ for cat in config.get('categories', []):
720
+ if cat['name'] == category_name:
721
+ subcategory_order = cat['subcategories']
722
+ break
723
+
724
+ # Sort subcategories using config order (items not in config are appended at the end)
725
+ subcategory_names = sort_by_config_order(subcategory_names, subcategory_order, default_to_end=True)
726
 
727
  # Create Subcategory instances (only for non-Unknown answers)
728
  self.subcategories = [
 
739
  self.formatted_name = format_snake_case(term_name)
740
 
741
  # Get all categories for this term
742
+ category_names = df[df['term'] == term_name]['category'].unique().tolist()
743
+
744
+ # Load category order config and sort categories accordingly
745
+ config = load_category_order()
746
+ category_order = [cat['name'] for cat in config.get('categories', [])]
747
+
748
+ # Sort categories using config order (items not in config are appended at the end)
749
+ category_names = sort_by_config_order(category_names, category_order, default_to_end=True)
750
 
751
  # Create Category instances
752
  self.categories = [
 
765
  @st.cache_data
766
  def get_term_category_pairs(df):
767
  """Get filtered term-category pairs, cached to avoid recomputation on every rerun"""
768
+ # Get all unique term-category pairs (without sorting yet)
769
+ all_pairs = df[['term', 'category']].drop_duplicates().values.tolist()
770
+
771
  # Filter out categories that have no subcategories after filtering Unknown answers
772
+ filtered_pairs = [(term, category) for term, category in all_pairs
773
+ if category_has_subcategories(term, category, df)]
774
+
775
+ # Load category order config
776
+ config = load_category_order()
777
+ category_order = [cat['name'] for cat in config.get('categories', [])]
778
+
779
+ # Sort pairs by category order (preserving term order within each category)
780
+ # Create order mapping
781
+ order_map = {cat: idx for idx, cat in enumerate(category_order)}
782
+
783
+ # Sort: first by category order (using config), then by term name alphabetically
784
+ def sort_key(pair):
785
+ term, category = pair
786
+ # Get category position from config (or large number if not in config)
787
+ cat_position = order_map.get(category, len(category_order))
788
+ return (cat_position, term)
789
+
790
+ filtered_pairs.sort(key=sort_key)
791
+
792
+ return filtered_pairs
793
 
794
  # Cache for Term instances (keyed by jurisdiction and term_name)
795
  term_cache = {}