Spaces:

SorrelC
/

KeywordExtraction-Explorer-Tool

Runtime error

App Files Files Community

SorrelC commited on Jul 17, 2025

Commit

bbea684

verified ·

1 Parent(s): 0d307e5

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -86

app.py CHANGED Viewed

@@ -37,6 +37,10 @@ KEYWORD_COLORS = [
     '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
 ]
 class KeywordExtractionManager:
     def __init__(self):
         self.rake_extractor = None
@@ -453,7 +457,7 @@ def get_relevance_level(score, max_score):
     else:
         return 'low'
-def create_highlighted_html(text, keywords):
     """Create HTML with highlighted keywords in the text"""
     if not keywords:
         return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
@@ -472,12 +476,15 @@ def create_highlighted_html(text, keywords):
         color = get_score_color(score, max_score)
         relevance = get_relevance_level(score, max_score)
         # Create regex pattern for whole word matching (case-insensitive)
         pattern = r'\b' + re.escape(keyword) + r'\b'
         # Replace with highlighted version
-        replacement = f'<span class="keyword-{relevance}" style="background-color: {color}; padding: 2px 4px; ' \
-                     f'border-radius: 3px; margin: 0 1px; ' \
                      f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
                      f'title="Score: {score:.3f}">{keyword}</span>'
@@ -486,11 +493,11 @@ def create_highlighted_html(text, keywords):
     return f"""
     <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
         <h4 style='margin: 0 0 15px 0; color: #333;'>📝 Text with Highlighted Keywords</h4>
-        <div id="highlighted-text" style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_text}</div>
     </div>
     """
-def create_keyword_table_html(keywords):
     """Create HTML table for keywords with filtering capability"""
     if not keywords:
         return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
@@ -499,10 +506,22 @@ def create_keyword_table_html(keywords):
     sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
     max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
     table_html = """
     <div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
         <h3 style="margin: 0 0 20px 0;">🎯 Extracted Keywords</h3>
-        <table id="keywords-table" style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; background-color: white;">
             <thead>
                 <tr style="background-color: #4ECDC4; color: white;">
                     <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Rank</th>
@@ -518,7 +537,6 @@ def create_keyword_table_html(keywords):
     for i, kw_data in enumerate(sorted_keywords):
         score = kw_data['score']
         color = get_score_color(score, max_score)
-        relevance = get_relevance_level(score, max_score)
         # Create relevance bar
         bar_width = int((score / max_score) * 100) if max_score > 0 else 0
@@ -529,7 +547,7 @@ def create_keyword_table_html(keywords):
         """
         table_html += f"""
-            <tr class="keyword-row relevance-{relevance}" style="background-color: #fff;">
                 <td style="padding: 10px; border: 1px solid #ddd; text-align: center; font-weight: bold;">#{i+1}</td>
                 <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw_data['keyword']}</td>
                 <td style="padding: 10px; border: 1px solid #ddd;">
@@ -553,87 +571,23 @@ def create_keyword_table_html(keywords):
     return table_html
 def create_legend_html():
-    """Create an interactive legend showing score colors"""
     html = """
     <div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
-        <h4 style='margin: 0 0 15px 0;'>🎨 Relevance Score Legend (Click to Filter)</h4>
         <div style='display: flex; flex-wrap: wrap; gap: 15px;'>
-            <button onclick="filterByRelevance('all')"
-                    style='background-color: #6c757d; padding: 8px 16px; border-radius: 15px;
-                           color: white; font-weight: bold; border: none; cursor: pointer;
-                           transition: all 0.3s ease;'>
-                Show All
-            </button>
-            <button onclick="filterByRelevance('high')"
-                    style='background-color: #00B894; padding: 8px 16px; border-radius: 15px;
-                           color: white; font-weight: bold; border: none; cursor: pointer;
-                           transition: all 0.3s ease;'>
                 High Relevance (70%+)
-            </button>
-            <button onclick="filterByRelevance('medium')"
-                    style='background-color: #F9CA24; padding: 8px 16px; border-radius: 15px;
-                           color: white; font-weight: bold; border: none; cursor: pointer;
-                           transition: all 0.3s ease;'>
                 Medium Relevance (40-70%)
-            </button>
-            <button onclick="filterByRelevance('low')"
-                    style='background-color: #FF6B6B; padding: 8px 16px; border-radius: 15px;
-                           color: white; font-weight: bold; border: none; cursor: pointer;
-                           transition: all 0.3s ease;'>
                 Low Relevance (<40%)
-            </button>
         </div>
     </div>
-    <script>
-    function filterByRelevance(level) {
-        const table = document.getElementById('keywords-table');
-        const rows = table.getElementsByClassName('keyword-row');
-        const textContainer = document.getElementById('highlighted-text');
-        const keywords = textContainer.getElementsByTagName('span');
-        // Filter table rows
-        for (let row of rows) {
-            if (level === 'all') {
-                row.style.display = '';
-            } else {
-                if (row.classList.contains('relevance-' + level)) {
-                    row.style.display = '';
-                } else {
-                    row.style.display = 'none';
-                }
-            }
-        }
-        // Highlight keywords in text
-        for (let keyword of keywords) {
-            if (level === 'all') {
-                keyword.style.opacity = '1';
-                keyword.style.filter = 'none';
-            } else {
-                if (keyword.classList.contains('keyword-' + level)) {
-                    keyword.style.opacity = '1';
-                    keyword.style.filter = 'none';
-                } else {
-                    keyword.style.opacity = '0.3';
-                    keyword.style.filter = 'grayscale(100%)';
-                }
-            }
-        }
-        // Update button styles
-        const buttons = document.querySelectorAll('button');
-        buttons.forEach(button => {
-            if (button.onclick && button.onclick.toString().includes(level)) {
-                button.style.transform = 'scale(1.1)';
-                button.style.boxShadow = '0 4px 8px rgba(0,0,0,0.2)';
-            } else {
-                button.style.transform = 'scale(1)';
-                button.style.boxShadow = 'none';
-            }
-        });
-    }
-    </script>
     """
     return html
@@ -643,11 +597,16 @@ keyword_manager = KeywordExtractionManager()
 def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
     """Main processing function for Gradio interface with progress tracking"""
     if not text.strip():
-        return "❌ Please enter some text to analyse", "", ""
     progress(0.1, desc="Initialising...")
     # Extract keywords
     progress(0.2, desc="Extracting keywords...")
     keywords = keyword_manager.extract_keywords(
@@ -659,7 +618,10 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
     )
     if not keywords:
-        return "❌ No keywords found. Try adjusting the parameters.", "", ""
     progress(0.8, desc="Processing results...")
@@ -683,7 +645,26 @@ def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progr
     progress(1.0, desc="Complete!")
-    return summary, legend_html + highlighted_html, results_html
 # Create Gradio interface
 def create_interface():
@@ -698,7 +679,7 @@ def create_interface():
         2. **🎯 Select a model** from the dropdown for keyword extraction
         3. **⚙️ Adjust parameters** (number of keywords, n-gram range)
         4. **🔍 Click "Extract Keywords"** to see results with organized output
-        5. **🎨 Click on the legend buttons** to filter keywords by relevance level
         """)
         # Add tip box
@@ -788,6 +769,21 @@ def create_interface():
         extract_btn = gr.Button("🔍 Extract Keywords", variant="primary", size="lg")
         # Output sections
         with gr.Row():
             summary_output = gr.Markdown(label="Summary")
@@ -811,7 +807,14 @@ def create_interface():
                 ngram_min,
                 ngram_max
             ],
-            outputs=[summary_output, highlighted_output, results_output]
         )
         gr.Examples(

     '#10AC84', '#EE5A24', '#0FBC89', '#5F27CD', '#FF3838'
 ]
+# Global variable to store current keywords for filtering
+current_keywords = []
+current_text = ""
 class KeywordExtractionManager:
     def __init__(self):
         self.rake_extractor = None
     else:
         return 'low'
+def create_highlighted_html(text, keywords, filter_level='all'):
     """Create HTML with highlighted keywords in the text"""
     if not keywords:
         return f"<div style='padding: 15px; border: 1px solid #ddd; border-radius: 5px; background-color: #fafafa;'><p>{text}</p></div>"
         color = get_score_color(score, max_score)
         relevance = get_relevance_level(score, max_score)
+        # Apply filtering based on filter_level
+        opacity = '1' if filter_level == 'all' or relevance == filter_level else '0.3'
         # Create regex pattern for whole word matching (case-insensitive)
         pattern = r'\b' + re.escape(keyword) + r'\b'
         # Replace with highlighted version
+        replacement = f'<span style="background-color: {color}; padding: 2px 4px; ' \
+                     f'border-radius: 3px; margin: 0 1px; opacity: {opacity}; ' \
                      f'border: 1px solid {color}; color: white; font-weight: bold;" ' \
                      f'title="Score: {score:.3f}">{keyword}</span>'
     return f"""
     <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
         <h4 style='margin: 0 0 15px 0; color: #333;'>📝 Text with Highlighted Keywords</h4>
+        <div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_text}</div>
     </div>
     """
+def create_keyword_table_html(keywords, filter_level='all'):
     """Create HTML table for keywords with filtering capability"""
     if not keywords:
         return "<p style='text-align: center; padding: 20px;'>No keywords found.</p>"
     sorted_keywords = sorted(keywords, key=lambda x: x['score'], reverse=True)
     max_score = sorted_keywords[0]['score'] if sorted_keywords else 1
+    # Filter keywords based on filter_level
+    if filter_level != 'all':
+        filtered_keywords = []
+        for kw in sorted_keywords:
+            relevance = get_relevance_level(kw['score'], max_score)
+            if relevance == filter_level:
+                filtered_keywords.append(kw)
+        sorted_keywords = filtered_keywords
+    if not sorted_keywords:
+        return f"<p style='text-align: center; padding: 20px;'>No {filter_level} relevance keywords found.</p>"
     table_html = """
     <div style='max-height: 600px; overflow-y: auto; border: 2px solid #ddd; border-radius: 8px; padding: 20px; background-color: #fafafa;'>
         <h3 style="margin: 0 0 20px 0;">🎯 Extracted Keywords</h3>
+        <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; background-color: white;">
             <thead>
                 <tr style="background-color: #4ECDC4; color: white;">
                     <th style="padding: 12px; text-align: left; border: 1px solid #ddd;">Rank</th>
     for i, kw_data in enumerate(sorted_keywords):
         score = kw_data['score']
         color = get_score_color(score, max_score)
         # Create relevance bar
         bar_width = int((score / max_score) * 100) if max_score > 0 else 0
         """
         table_html += f"""
+            <tr style="background-color: #fff;">
                 <td style="padding: 10px; border: 1px solid #ddd; text-align: center; font-weight: bold;">#{i+1}</td>
                 <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw_data['keyword']}</td>
                 <td style="padding: 10px; border: 1px solid #ddd;">
     return table_html
 def create_legend_html():
+    """Create a legend showing score colors"""
     html = """
     <div style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 8px;'>
+        <h4 style='margin: 0 0 15px 0;'>🎨 Relevance Score Legend</h4>
+        <p style='font-size: 14px; color: #666; margin-bottom: 15px;'>Use the radio buttons below to filter keywords by relevance level</p>
         <div style='display: flex; flex-wrap: wrap; gap: 15px;'>
+            <span style='background-color: #00B894; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
                 High Relevance (70%+)
+            </span>
+            <span style='background-color: #F9CA24; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
                 Medium Relevance (40-70%)
+            </span>
+            <span style='background-color: #FF6B6B; padding: 4px 12px; border-radius: 15px; color: white; font-weight: bold;'>
                 Low Relevance (<40%)
+            </span>
         </div>
     </div>
     """
     return html
 def process_text(text, selected_model, num_keywords, ngram_min, ngram_max, progress=gr.Progress()):
     """Main processing function for Gradio interface with progress tracking"""
+    global current_keywords, current_text
     if not text.strip():
+        return "❌ Please enter some text to analyse", "", "", gr.update(visible=False)
     progress(0.1, desc="Initialising...")
+    # Store the text globally for filtering
+    current_text = text
     # Extract keywords
     progress(0.2, desc="Extracting keywords...")
     keywords = keyword_manager.extract_keywords(
     )
     if not keywords:
+        return "❌ No keywords found. Try adjusting the parameters.", "", "", gr.update(visible=False)
+    # Store keywords globally for filtering
+    current_keywords = keywords
     progress(0.8, desc="Processing results...")
     progress(1.0, desc="Complete!")
+    # Make filter controls visible
+    return summary, legend_html + highlighted_html, results_html, gr.update(visible=True)
+def filter_results(filter_level):
+    """Filter the results based on relevance level"""
+    global current_keywords, current_text
+    if not current_keywords:
+        return "", ""
+    # Update highlighted text
+    highlighted_html = create_highlighted_html(current_text, current_keywords, filter_level)
+    # Update table
+    results_html = create_keyword_table_html(current_keywords, filter_level)
+    # Add legend to highlighted output
+    legend_html = create_legend_html()
+    return legend_html + highlighted_html, results_html
 # Create Gradio interface
 def create_interface():
         2. **🎯 Select a model** from the dropdown for keyword extraction
         3. **⚙️ Adjust parameters** (number of keywords, n-gram range)
         4. **🔍 Click "Extract Keywords"** to see results with organized output
+        5. **🎨 Use the filter buttons** to show keywords by relevance level
         """)
         # Add tip box
         extract_btn = gr.Button("🔍 Extract Keywords", variant="primary", size="lg")
+        # Filter controls (initially hidden)
+        with gr.Row(visible=False) as filter_row:
+            gr.Markdown("### 🎯 Filter by Relevance Level:")
+            filter_radio = gr.Radio(
+                choices=[
+                    ("Show All", "all"),
+                    ("High Relevance (70%+)", "high"),
+                    ("Medium Relevance (40-70%)", "medium"),
+                    ("Low Relevance (<40%)", "low")
+                ],
+                value="all",
+                label="",
+                interactive=True
+            )
         # Output sections
         with gr.Row():
             summary_output = gr.Markdown(label="Summary")
                 ngram_min,
                 ngram_max
             ],
+            outputs=[summary_output, highlighted_output, results_output, filter_row]
+        )
+        # Connect filter radio to filter function
+        filter_radio.change(
+            fn=filter_results,
+            inputs=[filter_radio],
+            outputs=[highlighted_output, results_output]
         )
         gr.Examples(