rafmacalaba commited on
Commit
ee38d1e
Β·
1 Parent(s): 148fbc3

add search capability

Browse files
Files changed (1) hide show
  1. app.py +74 -12
app.py CHANGED
@@ -110,6 +110,14 @@ class ComparisonManager:
110
  if term not in self.term_groups:
111
  return 0
112
  return len(self.term_groups[term]['validated' if is_validated else 'not_validated'])
 
 
 
 
 
 
 
 
113
 
114
 
115
  # ── Highlight utils ──────────────────────────────────────────────────────────
@@ -144,7 +152,7 @@ def prepare_for_highlight(rec: Dict) -> List[Tuple[str, Optional[str]]]:
144
 
145
 
146
  # ── Filtering helpers ─────────────────────────────────────────────────────────
147
- def record_matches_filters(rec: Dict, dataset_filter: str, type_filter: str):
148
  is_validated = rec.get("validated", False)
149
  tags = rec.get("tags", [])
150
 
@@ -156,7 +164,22 @@ def record_matches_filters(rec: Dict, dataset_filter: str, type_filter: str):
156
  return "borderline" in tags
157
 
158
  if type_filter != "All types":
159
- return rec.get("type") == type_filter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  return True
162
 
@@ -345,10 +368,10 @@ def create_demo() -> gr.Blocks:
345
  return segs, idx, make_info(rec)
346
 
347
  # When filters change β†’ jump to first matching record
348
- def jump_on_filters(dataset_filter, type_filter):
349
  n = dynamic_dataset.len
350
  for i in range(n):
351
- if record_matches_filters(data[i], dataset_filter, type_filter):
352
  dynamic_dataset.current = i
353
  rec = data[i]
354
  segs = prepare_for_highlight(rec)
@@ -358,11 +381,11 @@ def create_demo() -> gr.Blocks:
358
  return [], 0, "⚠️ No matching records found with the selected filters."
359
 
360
  # Navigation respecting filters
361
- def nav_next(dataset_filter, type_filter):
362
  i = dynamic_dataset.current + 1
363
  n = dynamic_dataset.len
364
  while i < n:
365
- if record_matches_filters(data[i], dataset_filter, type_filter):
366
  break
367
  i += 1
368
  if i >= n:
@@ -371,10 +394,10 @@ def create_demo() -> gr.Blocks:
371
  rec = data[i]
372
  return prepare_for_highlight(rec), i, make_info(rec)
373
 
374
- def nav_prev(dataset_filter, type_filter):
375
  i = dynamic_dataset.current - 1
376
  while i >= 0:
377
- if record_matches_filters(data[i], dataset_filter, type_filter):
378
  break
379
  i -= 1
380
  if i < 0:
@@ -466,6 +489,12 @@ def create_demo() -> gr.Blocks:
466
  value="All types",
467
  label="πŸ“‚ Filter by Data Type",
468
  )
 
 
 
 
 
 
469
 
470
  inp_box = gr.HighlightedText(
471
  label="πŸ“„ Document Text (with highlighted dataset mentions)",
@@ -497,24 +526,29 @@ def create_demo() -> gr.Blocks:
497
  # Filters
498
  dataset_filter.change(
499
  fn=jump_on_filters,
500
- inputs=[dataset_filter, type_filter],
501
  outputs=[inp_box, prog, info_md],
502
  )
503
  type_filter.change(
504
  fn=jump_on_filters,
505
- inputs=[dataset_filter, type_filter],
 
 
 
 
 
506
  outputs=[inp_box, prog, info_md],
507
  )
508
 
509
  # Prev / Next navigation respecting filters
510
  prev_btn.click(
511
  fn=nav_prev,
512
- inputs=[dataset_filter, type_filter],
513
  outputs=[inp_box, prog, info_md],
514
  )
515
  next_btn.click(
516
  fn=nav_next,
517
- inputs=[dataset_filter, type_filter],
518
  outputs=[inp_box, prog, info_md],
519
  )
520
 
@@ -556,6 +590,13 @@ def create_demo() -> gr.Blocks:
556
  # Term comparison
557
  with gr.Group(visible=False) as term_comparison_group:
558
  gr.Markdown("**Compare by Term**: See how the exact same term appears in different validation contexts")
 
 
 
 
 
 
 
559
  comp_term_selector = gr.Dropdown(
560
  choices=comparison_manager.confusing_terms,
561
  value=comparison_manager.confusing_terms[0] if comparison_manager.confusing_terms else None,
@@ -619,6 +660,27 @@ def create_demo() -> gr.Blocks:
619
  )
620
 
621
  # Term comparison events
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
  comp_term_selector.change(
623
  fn=lambda: (0, 0),
624
  outputs=[term_pos_idx_state, term_neg_idx_state]
 
110
  if term not in self.term_groups:
111
  return 0
112
  return len(self.term_groups[term]['validated' if is_validated else 'not_validated'])
113
+
114
+ def filter_terms(self, search_query: str) -> List[str]:
115
+ """Filter confusing terms by search query (case-insensitive substring match)."""
116
+ if not search_query or not search_query.strip():
117
+ return self.confusing_terms
118
+
119
+ query = search_query.strip().lower()
120
+ return [term for term in self.confusing_terms if query in term.lower()]
121
 
122
 
123
  # ── Highlight utils ──────────────────────────────────────────────────────────
 
152
 
153
 
154
  # ── Filtering helpers ─────────────────────────────────────────────────────────
155
+ def record_matches_filters(rec: Dict, dataset_filter: str, type_filter: str, term_search: str = ""):
156
  is_validated = rec.get("validated", False)
157
  tags = rec.get("tags", [])
158
 
 
164
  return "borderline" in tags
165
 
166
  if type_filter != "All types":
167
+ if rec.get("type") != type_filter:
168
+ return False
169
+
170
+ # Term search filter
171
+ if term_search and term_search.strip():
172
+ query = term_search.strip().lower()
173
+ # Extract term from ner_text
174
+ if rec.get('ner_text') and len(rec['ner_text']) > 0 and rec.get('text'):
175
+ start, end, label = rec['ner_text'][0]
176
+ term = rec['text'][start:end]
177
+ if term and query in term.lower():
178
+ return True
179
+ else:
180
+ return False
181
+ else:
182
+ return False
183
 
184
  return True
185
 
 
368
  return segs, idx, make_info(rec)
369
 
370
  # When filters change β†’ jump to first matching record
371
+ def jump_on_filters(dataset_filter, type_filter, term_search):
372
  n = dynamic_dataset.len
373
  for i in range(n):
374
+ if record_matches_filters(data[i], dataset_filter, type_filter, term_search):
375
  dynamic_dataset.current = i
376
  rec = data[i]
377
  segs = prepare_for_highlight(rec)
 
381
  return [], 0, "⚠️ No matching records found with the selected filters."
382
 
383
  # Navigation respecting filters
384
+ def nav_next(dataset_filter, type_filter, term_search):
385
  i = dynamic_dataset.current + 1
386
  n = dynamic_dataset.len
387
  while i < n:
388
+ if record_matches_filters(data[i], dataset_filter, type_filter, term_search):
389
  break
390
  i += 1
391
  if i >= n:
 
394
  rec = data[i]
395
  return prepare_for_highlight(rec), i, make_info(rec)
396
 
397
+ def nav_prev(dataset_filter, type_filter, term_search):
398
  i = dynamic_dataset.current - 1
399
  while i >= 0:
400
+ if record_matches_filters(data[i], dataset_filter, type_filter, term_search):
401
  break
402
  i -= 1
403
  if i < 0:
 
489
  value="All types",
490
  label="πŸ“‚ Filter by Data Type",
491
  )
492
+
493
+ term_search = gr.Textbox(
494
+ label="πŸ” Search by Term",
495
+ placeholder="Type to filter by dataset term (e.g., 'MIS' to find EMIS, MIS, MIS database...)",
496
+ value="",
497
+ )
498
 
499
  inp_box = gr.HighlightedText(
500
  label="πŸ“„ Document Text (with highlighted dataset mentions)",
 
526
  # Filters
527
  dataset_filter.change(
528
  fn=jump_on_filters,
529
+ inputs=[dataset_filter, type_filter, term_search],
530
  outputs=[inp_box, prog, info_md],
531
  )
532
  type_filter.change(
533
  fn=jump_on_filters,
534
+ inputs=[dataset_filter, type_filter, term_search],
535
+ outputs=[inp_box, prog, info_md],
536
+ )
537
+ term_search.change(
538
+ fn=jump_on_filters,
539
+ inputs=[dataset_filter, type_filter, term_search],
540
  outputs=[inp_box, prog, info_md],
541
  )
542
 
543
  # Prev / Next navigation respecting filters
544
  prev_btn.click(
545
  fn=nav_prev,
546
+ inputs=[dataset_filter, type_filter, term_search],
547
  outputs=[inp_box, prog, info_md],
548
  )
549
  next_btn.click(
550
  fn=nav_next,
551
+ inputs=[dataset_filter, type_filter, term_search],
552
  outputs=[inp_box, prog, info_md],
553
  )
554
 
 
590
  # Term comparison
591
  with gr.Group(visible=False) as term_comparison_group:
592
  gr.Markdown("**Compare by Term**: See how the exact same term appears in different validation contexts")
593
+
594
+ term_search_box = gr.Textbox(
595
+ label="πŸ” Search Terms",
596
+ placeholder="Type to filter terms (e.g., 'MIS' to find EMIS, MIS, MIS database...)",
597
+ value="",
598
+ )
599
+
600
  comp_term_selector = gr.Dropdown(
601
  choices=comparison_manager.confusing_terms,
602
  value=comparison_manager.confusing_terms[0] if comparison_manager.confusing_terms else None,
 
660
  )
661
 
662
  # Term comparison events
663
+ def update_term_dropdown(search_query):
664
+ """Update dropdown choices based on search query."""
665
+ filtered_terms = comparison_manager.filter_terms(search_query)
666
+ if filtered_terms:
667
+ return gr.update(choices=filtered_terms, value=filtered_terms[0])
668
+ else:
669
+ return gr.update(choices=[], value=None)
670
+
671
+ term_search_box.change(
672
+ fn=update_term_dropdown,
673
+ inputs=[term_search_box],
674
+ outputs=[comp_term_selector]
675
+ ).then(
676
+ fn=lambda: (0, 0),
677
+ outputs=[term_pos_idx_state, term_neg_idx_state]
678
+ ).then(
679
+ fn=load_term_comparison,
680
+ inputs=[comp_term_selector, term_pos_idx_state, term_neg_idx_state],
681
+ outputs=[term_pos_hl_box, term_pos_info_box, term_neg_hl_box, term_neg_info_box, term_pos_header, term_neg_header]
682
+ )
683
+
684
  comp_term_selector.change(
685
  fn=lambda: (0, 0),
686
  outputs=[term_pos_idx_state, term_neg_idx_state]