Spaces:

SorrelC
/

KeywordExtraction-Explorer-Tool

Sleeping

App Files Files Community

SorrelC commited on Jul 15, 2025

Commit

7ba3b38

verified ·

1 Parent(s): 3945fba

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -68

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import re
 nltk.download('stopwords')
-# Models list
 AVAILABLE_MODELS = [
     "kw_pke_multipartiterank",
     "kw_pke_singlerank",
@@ -15,7 +14,6 @@ AVAILABLE_MODELS = [
     "kw_pke_positionrank"
 ]
-# Keyword extraction using PKE
 def extract_keywords_pke(text, model_choice, num_keywords):
     if model_choice == "kw_pke_multipartiterank":
         extractor = pke.unsupervised.MultipartiteRank()
@@ -33,57 +31,46 @@ def extract_keywords_pke(text, model_choice, num_keywords):
         return ["Error: Unknown model"]
     extractor.load_document(input=text, language='en', normalization=None)
-    if model_choice == "kw_pke_tfidf":
-        extractor.candidate_selection(n=3)
-    else:
-        extractor.candidate_selection()
     extractor.candidate_weighting()
-    keywords = [kw for kw, score in extractor.get_n_best(n=num_keywords)]
-    return keywords
-# Highlight keywords in text with styled spans
 def highlight_keywords(text, keywords):
-    if not keywords:
-        return text
     highlighted = text
     for kw in sorted(keywords, key=lambda k: -len(k)):
         pattern = re.compile(re.escape(kw), re.IGNORECASE)
         highlighted = pattern.sub(
-            f'<span style="background-color: #4ECDC4; color: white; padding: 2px 5px; border-radius: 5px; font-weight: bold;">{kw}</span>',
             highlighted
         )
     return highlighted
-# Create keyword list as HTML table
 def create_keywords_table(keywords):
     if not keywords:
         return "<p>No keywords found.</p>"
     table_html = """
-    <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; font-size: 14px;">
         <thead>
             <tr style="background-color: #1976d2; color: white;">
-                <th style="padding: 8px; border: 1px solid #ddd; text-align: left;">Rank</th>
-                <th style="padding: 8px; border: 1px solid #ddd; text-align: left;">Keyword</th>
             </tr>
         </thead>
         <tbody>
     """
     for idx, kw in enumerate(keywords, 1):
         table_html += f"""
-        <tr>
-            <td style="padding: 8px; border: 1px solid #ddd;">{idx}</td>
-            <td style="padding: 8px; border: 1px solid #ddd; font-weight: bold;">{kw}</td>
-        </tr>
         """
     table_html += "</tbody></table>"
     return table_html
-# Main processing function
 def process_text(text, model_choice, num_keywords):
     if not text.strip():
         return "❌ Please enter text to analyse.", "", ""
@@ -93,59 +80,62 @@ def process_text(text, model_choice, num_keywords):
     keywords_table_html = create_keywords_table(keywords)
     summary_html = f"""
-    <div style="background-color: #f0f8ff; padding: 15px; border-radius: 8px; border: 1px solid #ddd; margin-bottom: 20px;">
-        <h3 style="margin-top: 0; color: #1976d2;">📊 Keyword Extraction Summary</h3>
         <p><strong>Model Used:</strong> {model_choice}</p>
-        <p><strong>Total Keywords Found:</strong> {len(keywords)}</p>
     </div>
     """
     highlighted_section = f"""
     <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
-        <h4 style='margin: 0 0 15px 0; color: #333;'>📝 Text with Highlighted Keywords</h4>
         <div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_html}</div>
     </div>
     """
-    return summary_html, highlighted_section, keywords_table_html
-# Build the Gradio interface
 def create_interface():
     with gr.Blocks(title="Keyword Explorer Tool") as demo:
         gr.Markdown("""
-        # 🔑 Keyword Explorer Tool
-        Discover the key concepts inside your text using established keyword extraction models.
-        **How to use:**
-        1. Paste your text in the input box.
-        2. Choose a keyword extraction model.
-        3. Set how many keywords you want to extract.
-        4. Click "Extract Keywords" to explore the results.
-        """)
         text_input = gr.Textbox(label="📝 Text to Analyse", placeholder="Paste your text here...", lines=10)
         with gr.Row():
-            model_dropdown = gr.Dropdown(
-                choices=AVAILABLE_MODELS,
-                value=AVAILABLE_MODELS[0],
-                label="Select Keyword Extraction Model"
-            )
-            num_keywords_slider = gr.Slider(
-                minimum=5,
-                maximum=50,
-                value=10,
-                step=1,
-                label="Number of Keywords"
-            )
-        analyse_btn = gr.Button("🚀 Extract Keywords")
-        summary_output = gr.HTML(label="Summary")
-        highlighted_output = gr.HTML(label="Highlighted Text")
-        keywords_table_output = gr.HTML(label="Keywords List")
         analyse_btn.click(
             fn=process_text,
@@ -154,18 +144,18 @@ def create_interface():
         )
         gr.HTML("""
-        <hr style="margin-top: 40px; margin-bottom: 20px;">
-        <div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-top: 20px; text-align: center;">
-            <p style="font-size: 14px; line-height: 1.8; margin: 0;">
-                This <strong>Keyword Explorer Tool</strong> was created as part of the
-                <a href="https://digitalscholarship.web.ox.ac.uk/" target="_blank" style="color: #1976d2;">
-                    Digital Scholarship at Oxford (DiSc)
-                </a>
-                funded research project:
-                <em>Extracting Keywords from Crowdsourced Collections</em>.
-            </p>
-        </div>
-        """)
     return demo

 nltk.download('stopwords')
 AVAILABLE_MODELS = [
     "kw_pke_multipartiterank",
     "kw_pke_singlerank",
     "kw_pke_positionrank"
 ]
 def extract_keywords_pke(text, model_choice, num_keywords):
     if model_choice == "kw_pke_multipartiterank":
         extractor = pke.unsupervised.MultipartiteRank()
         return ["Error: Unknown model"]
     extractor.load_document(input=text, language='en', normalization=None)
+    extractor.candidate_selection(n=3) if model_choice == "kw_pke_tfidf" else extractor.candidate_selection()
     extractor.candidate_weighting()
+    return [kw for kw, score in extractor.get_n_best(n=num_keywords)]
 def highlight_keywords(text, keywords):
     highlighted = text
     for kw in sorted(keywords, key=lambda k: -len(k)):
         pattern = re.compile(re.escape(kw), re.IGNORECASE)
         highlighted = pattern.sub(
+            f'<span style="background-color: #1976d2; color: white; padding: 2px 5px; '
+            f'border-radius: 4px; font-weight: bold;">{kw}</span>',
             highlighted
         )
     return highlighted
 def create_keywords_table(keywords):
     if not keywords:
         return "<p>No keywords found.</p>"
     table_html = """
+    <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd;">
         <thead>
             <tr style="background-color: #1976d2; color: white;">
+                <th style="padding: 10px; text-align: left;">Rank</th>
+                <th style="padding: 10px; text-align: left;">Keyword</th>
             </tr>
         </thead>
         <tbody>
     """
     for idx, kw in enumerate(keywords, 1):
         table_html += f"""
+            <tr>
+                <td style="padding: 10px; border: 1px solid #ddd;">{idx}</td>
+                <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw}</td>
+            </tr>
         """
     table_html += "</tbody></table>"
     return table_html
 def process_text(text, model_choice, num_keywords):
     if not text.strip():
         return "❌ Please enter text to analyse.", "", ""
     keywords_table_html = create_keywords_table(keywords)
     summary_html = f"""
+    <div style="background-color: #f8f9fa; padding: 15px; border-radius: 8px; border: 1px solid #ddd; margin-bottom: 20px;">
+        <h3 style="margin-top: 0; color: #1976d2;">📊 Analysis Summary</h3>
         <p><strong>Model Used:</strong> {model_choice}</p>
+        <p><strong>Keywords Found:</strong> {len(keywords)}</p>
     </div>
     """
     highlighted_section = f"""
     <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
+        <h4 style='margin: 0 0 15px 0; color: #1976d2;'>📝 Text with Highlighted Keywords</h4>
         <div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_html}</div>
     </div>
     """
+    table_section = f"""
+    <div style="margin-top: 20px;">
+        <h4 style="color: #1976d2; margin-bottom: 10px;">📋 Extracted Keywords</h4>
+        {keywords_table_html}
+    </div>
+    """
+    return summary_html, highlighted_section, table_section
 def create_interface():
     with gr.Blocks(title="Keyword Explorer Tool") as demo:
         gr.Markdown("""
+# 🔑 Keyword Extraction (KE) Explorer Tool
+See what different Keyword Extraction (KE) models can do, and if this is useful to you.
+---
+### 🛠️ How to use:
+1. **📝 Paste your text** below.
+2. **🎛️ Choose a model** and **set keyword count**.
+3. **🔍 Click "Extract Keywords"** to see:
+   - 📊 A summary of results.
+   - ✨ Highlighted keywords inside your text.
+   - 📋 A full keyword list.
+---
+""")
         text_input = gr.Textbox(label="📝 Text to Analyse", placeholder="Paste your text here...", lines=10)
         with gr.Row():
+            model_dropdown = gr.Dropdown(choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="🎛️ Select Model")
+            num_keywords_slider = gr.Slider(minimum=5, maximum=50, value=10, step=1, label="🔢 Number of Keywords")
+        analyse_btn = gr.Button("🔍 Extract Keywords", variant="primary")
+        summary_output = gr.HTML()
+        highlighted_output = gr.HTML()
+        keywords_table_output = gr.HTML()
         analyse_btn.click(
             fn=process_text,
         )
         gr.HTML("""
+<hr style="margin-top: 40px; margin-bottom: 20px;">
+<div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-top: 20px; text-align: center;">
+    <p style="font-size: 14px; line-height: 1.8; margin: 0;">
+        This <strong>Keyword Extraction Explorer Tool</strong> was created as part of the
+        <a href="https://digitalscholarship.web.ox.ac.uk/" target="_blank" style="color: #1976d2;">
+            Digital Scholarship at Oxford (DiSc)
+        </a>
+        funded research project:
+        <em>Extracting Keywords from Crowdsourced Collections</em>.
+    </p>
+</div>
+""")
     return demo