SorrelC commited on
Commit
bbea684
Β·
verified Β·
1 Parent(s): 0d307e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -86
app.py CHANGED
@@ -37,6 +37,10 @@ KEYWORD_COLORS = [
37
  '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
38
  ]
39
 
 
 
 
 
40
  class KeywordExtractionManager:
41
  def __init__(self):
42
  self.rake_extractor = None
@@ -453,7 +457,7 @@ def get_relevance_level(score, max_score):
453
  else:
454
  return 'low'
455
 
456
- def create_highlighted_html(text, keywords):
457
  """Create HTML with highlighted keywords in the text"""
458
  if not keywords:
459
  return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
@@ -472,12 +476,15 @@ def create_highlighted_html(text, keywords):
472
  color = get_score_color(score, max_score)
473
  relevance = get_relevance_level(score, max_score)
474
 
 
 
 
475
  # Create regex pattern for whole word matching (case-insensitive)
476
  pattern = r'\b' + re.escape(keyword) + r'\b'
477
 
478
  # Replace with highlighted version
479
- replacement = f'<span class="keyword-{relevance}" style="background-color: {color}; padding: 2px 4px; ' \
480
- f'border-radius: 3px; margin: 0 1px; ' \
481
  f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
482
  f'title="Score: {score:.3f}">{keyword}</span>'
483
 
@@ -486,11 +493,11 @@ def create_highlighted_html(text, keywords):
486
  return f"""
487
  <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
488
  <h4 style='margin: 0 0 15px 0; color: #333;'>πŸ“ Text with Highlighted Keywords</h4>
489
- <div id="highlighted-text" style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_text}</div>
490
  </div>
491
  """
492
 
493
- def create_keyword_table_html(keywords):
494
  """Create HTML table for keywords with filtering capability"""
495
  if not keywords:
496
  return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
@@ -499,10 +506,22 @@ def create_keyword_table_html(keywords):
499
  sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
500
  max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
501
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  table_html = """
503
  <div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
504
  <h3 style="margin: 0 0 20px 0;">🎯 Extracted Keywords</h3>
505
- <table id="keywords-table" style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; background-color: white;">
506
  <thead>
507
  <tr style="background-color: #4ECDC4; color: white;">
508
  <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Rank</th>
@@ -518,7 +537,6 @@ def create_keyword_table_html(keywords):
518
  for i, kw_data in enumerate(sorted_keywords):
519
  score = kw_data['score']
520
  color = get_score_color(score, max_score)
521
- relevance = get_relevance_level(score, max_score)
522
 
523
  # Create relevance bar
524
  bar_width = int((score / max_score) * 100) if max_score > 0 else 0
@@ -529,7 +547,7 @@ def create_keyword_table_html(keywords):
529
  """
530
 
531
  table_html += f"""
532
- <tr class="keyword-row relevance-{relevance}" style="background-color: #fff;">
533
  <td style="padding: 10px; border: 1px solid #ddd; text-align: center; font-weight: bold;">#{i+1}</td>
534
  <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw_data['keyword']}</td>
535
  <td style="padding: 10px; border: 1px solid #ddd;">
@@ -553,87 +571,23 @@ def create_keyword_table_html(keywords):
553
  return table_html
554
 
555
  def create_legend_html():
556
- """Create an interactive legend showing score colors"""
557
  html = """
558
  <div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
559
- <h4 style='margin: 0 0 15px 0;'>🎨 Relevance Score Legend (Click to Filter)</h4>
 
560
  <div style='display: flex; flex-wrap: wrap; gap: 15px;'>
561
- <button onclick="filterByRelevance('all')"
562
- style='background-color: #6c757d; padding: 8px 16px; border-radius: 15px;
563
- color: white; font-weight: bold; border: none; cursor: pointer;
564
- transition: all 0.3s ease;'>
565
- Show All
566
- </button>
567
- <button onclick="filterByRelevance('high')"
568
- style='background-color: #00B894; padding: 8px 16px; border-radius: 15px;
569
- color: white; font-weight: bold; border: none; cursor: pointer;
570
- transition: all 0.3s ease;'>
571
  High Relevance (70%+)
572
- </button>
573
- <button onclick="filterByRelevance('medium')"
574
- style='background-color: #F9CA24; padding: 8px 16px; border-radius: 15px;
575
- color: white; font-weight: bold; border: none; cursor: pointer;
576
- transition: all 0.3s ease;'>
577
  Medium Relevance (40-70%)
578
- </button>
579
- <button onclick="filterByRelevance('low')"
580
- style='background-color: #FF6B6B; padding: 8px 16px; border-radius: 15px;
581
- color: white; font-weight: bold; border: none; cursor: pointer;
582
- transition: all 0.3s ease;'>
583
  Low Relevance (<40%)
584
- </button>
585
  </div>
586
  </div>
587
-
588
- <script>
589
- function filterByRelevance(level) {
590
- const table = document.getElementById('keywords-table');
591
- const rows = table.getElementsByClassName('keyword-row');
592
- const textContainer = document.getElementById('highlighted-text');
593
- const keywords = textContainer.getElementsByTagName('span');
594
-
595
- // Filter table rows
596
- for (let row of rows) {
597
- if (level === 'all') {
598
- row.style.display = '';
599
- } else {
600
- if (row.classList.contains('relevance-' + level)) {
601
- row.style.display = '';
602
- } else {
603
- row.style.display = 'none';
604
- }
605
- }
606
- }
607
-
608
- // Highlight keywords in text
609
- for (let keyword of keywords) {
610
- if (level === 'all') {
611
- keyword.style.opacity = '1';
612
- keyword.style.filter = 'none';
613
- } else {
614
- if (keyword.classList.contains('keyword-' + level)) {
615
- keyword.style.opacity = '1';
616
- keyword.style.filter = 'none';
617
- } else {
618
- keyword.style.opacity = '0.3';
619
- keyword.style.filter = 'grayscale(100%)';
620
- }
621
- }
622
- }
623
-
624
- // Update button styles
625
- const buttons = document.querySelectorAll('button');
626
- buttons.forEach(button => {
627
- if (button.onclick && button.onclick.toString().includes(level)) {
628
- button.style.transform = 'scale(1.1)';
629
- button.style.boxShadow = '0 4px 8px rgba(0,0,0,0.2)';
630
- } else {
631
- button.style.transform = 'scale(1)';
632
- button.style.boxShadow = 'none';
633
- }
634
- });
635
- }
636
- </script>
637
  """
638
  return html
639
 
@@ -643,11 +597,16 @@ keyword_manager = KeywordExtractionManager()
643
 
644
  def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
645
  """Main processing function for Gradio interface with progress tracking"""
 
 
646
  if not text.strip():
647
- return "❌ Please enter some text to analyse", "", ""
648
 
649
  progress(0.1, desc="Initialising...")
650
 
 
 
 
651
  # Extract keywords
652
  progress(0.2, desc="Extracting keywords...")
653
  keywords = keyword_manager.extract_keywords(
@@ -659,7 +618,10 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
659
  )
660
 
661
  if not keywords:
662
- return "❌ No keywords found. Try adjusting the parameters.", "", ""
 
 
 
663
 
664
  progress(0.8, desc="Processing results...")
665
 
@@ -683,7 +645,26 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
683
 
684
  progress(1.0, desc="Complete!")
685
 
686
- return summary, legend_html + highlighted_html, results_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
687
 
688
  # Create Gradio interface
689
  def create_interface():
@@ -698,7 +679,7 @@ def create_interface():
698
  2. **🎯 Select a model** from the dropdown for keyword extraction
699
  3. **βš™οΈ Adjust parameters** (number of keywords, n-gram range)
700
  4. **πŸ” Click "Extract Keywords"** to see results with organized output
701
- 5. **🎨 Click on the legend buttons** to filter keywords by relevance level
702
  """)
703
 
704
  # Add tip box
@@ -788,6 +769,21 @@ def create_interface():
788
 
789
  extract_btn = gr.Button("πŸ” Extract Keywords", variant="primary", size="lg")
790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791
  # Output sections
792
  with gr.Row():
793
  summary_output = gr.Markdown(label="Summary")
@@ -811,7 +807,14 @@ def create_interface():
811
  ngram_min,
812
  ngram_max
813
  ],
814
- outputs=[summary_output, highlighted_output, results_output]
 
 
 
 
 
 
 
815
  )
816
 
817
  gr.Examples(
 
37
  '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
38
  ]
39
 
40
+ # Global variable to store current keywords for filtering
41
+ current_keywords = []
42
+ current_text = ""
43
+
44
  class KeywordExtractionManager:
45
  def __init__(self):
46
  self.rake_extractor = None
 
457
  else:
458
  return 'low'
459
 
460
+ def create_highlighted_html(text, keywords, filter_level='all'):
461
  """Create HTML with highlighted keywords in the text"""
462
  if not keywords:
463
  return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
 
476
  color = get_score_color(score, max_score)
477
  relevance = get_relevance_level(score, max_score)
478
 
479
+ # Apply filtering based on filter_level
480
+ opacity = '1' if filter_level == 'all' or relevance == filter_level else '0.3'
481
+
482
  # Create regex pattern for whole word matching (case-insensitive)
483
  pattern = r'\b' + re.escape(keyword) + r'\b'
484
 
485
  # Replace with highlighted version
486
+ replacement = f'<span style="background-color: {color}; padding: 2px 4px; ' \
487
+ f'border-radius: 3px; margin: 0 1px; opacity: {opacity}; ' \
488
  f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
489
  f'title="Score: {score:.3f}">{keyword}</span>'
490
 
 
493
  return f"""
494
  <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
495
  <h4 style='margin: 0 0 15px 0; color: #333;'>πŸ“ Text with Highlighted Keywords</h4>
496
+ <div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_text}</div>
497
  </div>
498
  """
499
 
500
+ def create_keyword_table_html(keywords, filter_level='all'):
501
  """Create HTML table for keywords with filtering capability"""
502
  if not keywords:
503
  return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
 
506
  sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
507
  max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
508
 
509
+ # Filter keywords based on filter_level
510
+ if filter_level != 'all':
511
+ filtered_keywords = []
512
+ for kw in sorted_keywords:
513
+ relevance = get_relevance_level(kw['score'], max_score)
514
+ if relevance == filter_level:
515
+ filtered_keywords.append(kw)
516
+ sorted_keywords = filtered_keywords
517
+
518
+ if not sorted_keywords:
519
+ return f"<p style='text-align: center; padding: 20px;'>No {filter_level} relevance keywords found.</p>"
520
+
521
  table_html = """
522
  <div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
523
  <h3 style="margin: 0 0 20px 0;">🎯 Extracted Keywords</h3>
524
+ <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; background-color: white;">
525
  <thead>
526
  <tr style="background-color: #4ECDC4; color: white;">
527
  <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Rank</th>
 
537
  for i, kw_data in enumerate(sorted_keywords):
538
  score = kw_data['score']
539
  color = get_score_color(score, max_score)
 
540
 
541
  # Create relevance bar
542
  bar_width = int((score / max_score) * 100) if max_score > 0 else 0
 
547
  """
548
 
549
  table_html += f"""
550
+ <tr style="background-color: #fff;">
551
  <td style="padding: 10px; border: 1px solid #ddd; text-align: center; font-weight: bold;">#{i+1}</td>
552
  <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw_data['keyword']}</td>
553
  <td style="padding: 10px; border: 1px solid #ddd;">
 
571
  return table_html
572
 
573
  def create_legend_html():
574
+ """Create a legend showing score colors"""
575
  html = """
576
  <div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
577
+ <h4 style='margin: 0 0 15px 0;'>🎨 Relevance Score Legend</h4>
578
+ <p style='font-size: 14px; color: #666; margin-bottom: 15px;'>Use the radio buttons below to filter keywords by relevance level</p>
579
  <div style='display: flex; flex-wrap: wrap; gap: 15px;'>
580
+ <span style='background-color: #00B894; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
 
 
 
 
 
 
 
 
 
581
  High Relevance (70%+)
582
+ </span>
583
+ <span style='background-color: #F9CA24; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
 
 
 
584
  Medium Relevance (40-70%)
585
+ </span>
586
+ <span style='background-color: #FF6B6B; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
 
 
 
587
  Low Relevance (<40%)
588
+ </span>
589
  </div>
590
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  """
592
  return html
593
 
 
597
 
598
  def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
599
  """Main processing function for Gradio interface with progress tracking"""
600
+ global current_keywords, current_text
601
+
602
  if not text.strip():
603
+ return "❌ Please enter some text to analyse", "", "", gr.update(visible=False)
604
 
605
  progress(0.1, desc="Initialising...")
606
 
607
+ # Store the text globally for filtering
608
+ current_text = text
609
+
610
  # Extract keywords
611
  progress(0.2, desc="Extracting keywords...")
612
  keywords = keyword_manager.extract_keywords(
 
618
  )
619
 
620
  if not keywords:
621
+ return "❌ No keywords found. Try adjusting the parameters.", "", "", gr.update(visible=False)
622
+
623
+ # Store keywords globally for filtering
624
+ current_keywords = keywords
625
 
626
  progress(0.8, desc="Processing results...")
627
 
 
645
 
646
  progress(1.0, desc="Complete!")
647
 
648
+ # Make filter controls visible
649
+ return summary, legend_html + highlighted_html, results_html, gr.update(visible=True)
650
+
651
+ def filter_results(filter_level):
652
+ """Filter the results based on relevance level"""
653
+ global current_keywords, current_text
654
+
655
+ if not current_keywords:
656
+ return "", ""
657
+
658
+ # Update highlighted text
659
+ highlighted_html = create_highlighted_html(current_text, current_keywords, filter_level)
660
+
661
+ # Update table
662
+ results_html = create_keyword_table_html(current_keywords, filter_level)
663
+
664
+ # Add legend to highlighted output
665
+ legend_html = create_legend_html()
666
+
667
+ return legend_html + highlighted_html, results_html
668
 
669
  # Create Gradio interface
670
  def create_interface():
 
679
  2. **🎯 Select a model** from the dropdown for keyword extraction
680
  3. **βš™οΈ Adjust parameters** (number of keywords, n-gram range)
681
  4. **πŸ” Click "Extract Keywords"** to see results with organized output
682
+ 5. **🎨 Use the filter buttons** to show keywords by relevance level
683
  """)
684
 
685
  # Add tip box
 
769
 
770
  extract_btn = gr.Button("πŸ” Extract Keywords", variant="primary", size="lg")
771
 
772
+ # Filter controls (initially hidden)
773
+ with gr.Row(visible=False) as filter_row:
774
+ gr.Markdown("### 🎯 Filter by Relevance Level:")
775
+ filter_radio = gr.Radio(
776
+ choices=[
777
+ ("Show All", "all"),
778
+ ("High Relevance (70%+)", "high"),
779
+ ("Medium Relevance (40-70%)", "medium"),
780
+ ("Low Relevance (<40%)", "low")
781
+ ],
782
+ value="all",
783
+ label="",
784
+ interactive=True
785
+ )
786
+
787
  # Output sections
788
  with gr.Row():
789
  summary_output = gr.Markdown(label="Summary")
 
807
  ngram_min,
808
  ngram_max
809
  ],
810
+ outputs=[summary_output, highlighted_output, results_output, filter_row]
811
+ )
812
+
813
+ # Connect filter radio to filter function
814
+ filter_radio.change(
815
+ fn=filter_results,
816
+ inputs=[filter_radio],
817
+ outputs=[highlighted_output, results_output]
818
  )
819
 
820
  gr.Examples(