SorrelC commited on
Commit
7ba3b38
Β·
verified Β·
1 Parent(s): 3945fba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -68
app.py CHANGED
@@ -5,7 +5,6 @@ import re
5
 
6
  nltk.download('stopwords')
7
 
8
- # Models list
9
  AVAILABLE_MODELS = [
10
  "kw_pke_multipartiterank",
11
  "kw_pke_singlerank",
@@ -15,7 +14,6 @@ AVAILABLE_MODELS = [
15
  "kw_pke_positionrank"
16
  ]
17
 
18
- # Keyword extraction using PKE
19
  def extract_keywords_pke(text, model_choice, num_keywords):
20
  if model_choice == "kw_pke_multipartiterank":
21
  extractor = pke.unsupervised.MultipartiteRank()
@@ -33,57 +31,46 @@ def extract_keywords_pke(text, model_choice, num_keywords):
33
  return ["Error: Unknown model"]
34
 
35
  extractor.load_document(input=text, language='en', normalization=None)
36
-
37
- if model_choice == "kw_pke_tfidf":
38
- extractor.candidate_selection(n=3)
39
- else:
40
- extractor.candidate_selection()
41
-
42
  extractor.candidate_weighting()
43
 
44
- keywords = [kw for kw, score in extractor.get_n_best(n=num_keywords)]
45
- return keywords
46
 
47
- # Highlight keywords in text with styled spans
48
  def highlight_keywords(text, keywords):
49
- if not keywords:
50
- return text
51
-
52
  highlighted = text
53
  for kw in sorted(keywords, key=lambda k: -len(k)):
54
  pattern = re.compile(re.escape(kw), re.IGNORECASE)
55
  highlighted = pattern.sub(
56
- f'<span style="background-color: #4ECDC4; color: white; padding: 2px 5px; border-radius: 5px; font-weight: bold;">{kw}</span>',
 
57
  highlighted
58
  )
59
  return highlighted
60
 
61
- # Create keyword list as HTML table
62
  def create_keywords_table(keywords):
63
  if not keywords:
64
  return "<p>No keywords found.</p>"
65
 
66
  table_html = """
67
- <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; font-size: 14px;">
68
  <thead>
69
  <tr style="background-color: #1976d2; color: white;">
70
- <th style="padding: 8px; border: 1px solid #ddd; text-align: left;">Rank</th>
71
- <th style="padding: 8px; border: 1px solid #ddd; text-align: left;">Keyword</th>
72
  </tr>
73
  </thead>
74
  <tbody>
75
  """
76
  for idx, kw in enumerate(keywords, 1):
77
  table_html += f"""
78
- <tr>
79
- <td style="padding: 8px; border: 1px solid #ddd;">{idx}</td>
80
- <td style="padding: 8px; border: 1px solid #ddd; font-weight: bold;">{kw}</td>
81
- </tr>
82
  """
83
  table_html += "</tbody></table>"
84
  return table_html
85
 
86
- # Main processing function
87
  def process_text(text, model_choice, num_keywords):
88
  if not text.strip():
89
  return "❌ Please enter text to analyse.", "", ""
@@ -93,59 +80,62 @@ def process_text(text, model_choice, num_keywords):
93
  keywords_table_html = create_keywords_table(keywords)
94
 
95
  summary_html = f"""
96
- <div style="background-color: #f0f8ff; padding: 15px; border-radius: 8px; border: 1px solid #ddd; margin-bottom: 20px;">
97
- <h3 style="margin-top: 0; color: #1976d2;">πŸ“Š Keyword Extraction Summary</h3>
98
  <p><strong>Model Used:</strong> {model_choice}</p>
99
- <p><strong>Total Keywords Found:</strong> {len(keywords)}</p>
100
  </div>
101
  """
102
 
103
  highlighted_section = f"""
104
  <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
105
- <h4 style='margin: 0 0 15px 0; color: #333;'>πŸ“ Text with Highlighted Keywords</h4>
106
  <div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_html}</div>
107
  </div>
108
  """
109
 
110
- return summary_html, highlighted_section, keywords_table_html
 
 
 
 
 
 
 
111
 
112
- # Build the Gradio interface
113
  def create_interface():
114
  with gr.Blocks(title="Keyword Explorer Tool") as demo:
115
  gr.Markdown("""
116
- # πŸ”‘ Keyword Explorer Tool
 
 
 
 
117
 
118
- Discover the key concepts inside your text using established keyword extraction models.
119
 
120
- **How to use:**
121
- 1. Paste your text in the input box.
122
- 2. Choose a keyword extraction model.
123
- 3. Set how many keywords you want to extract.
124
- 4. Click "Extract Keywords" to explore the results.
 
125
 
126
- """)
 
 
127
 
128
  text_input = gr.Textbox(label="πŸ“ Text to Analyse", placeholder="Paste your text here...", lines=10)
129
 
130
  with gr.Row():
131
- model_dropdown = gr.Dropdown(
132
- choices=AVAILABLE_MODELS,
133
- value=AVAILABLE_MODELS[0],
134
- label="Select Keyword Extraction Model"
135
- )
136
- num_keywords_slider = gr.Slider(
137
- minimum=5,
138
- maximum=50,
139
- value=10,
140
- step=1,
141
- label="Number of Keywords"
142
- )
143
-
144
- analyse_btn = gr.Button("πŸš€ Extract Keywords")
145
-
146
- summary_output = gr.HTML(label="Summary")
147
- highlighted_output = gr.HTML(label="Highlighted Text")
148
- keywords_table_output = gr.HTML(label="Keywords List")
149
 
150
  analyse_btn.click(
151
  fn=process_text,
@@ -154,18 +144,18 @@ def create_interface():
154
  )
155
 
156
  gr.HTML("""
157
- <hr style="margin-top: 40px; margin-bottom: 20px;">
158
- <div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-top: 20px; text-align: center;">
159
- <p style="font-size: 14px; line-height: 1.8; margin: 0;">
160
- This <strong>Keyword Explorer Tool</strong> was created as part of the
161
- <a href="https://digitalscholarship.web.ox.ac.uk/" target="_blank" style="color: #1976d2;">
162
- Digital Scholarship at Oxford (DiSc)
163
- </a>
164
- funded research project:
165
- <em>Extracting Keywords from Crowdsourced Collections</em>.
166
- </p>
167
- </div>
168
- """)
169
 
170
  return demo
171
 
 
5
 
6
  nltk.download('stopwords')
7
 
 
8
  AVAILABLE_MODELS = [
9
  "kw_pke_multipartiterank",
10
  "kw_pke_singlerank",
 
14
  "kw_pke_positionrank"
15
  ]
16
 
 
17
  def extract_keywords_pke(text, model_choice, num_keywords):
18
  if model_choice == "kw_pke_multipartiterank":
19
  extractor = pke.unsupervised.MultipartiteRank()
 
31
  return ["Error: Unknown model"]
32
 
33
  extractor.load_document(input=text, language='en', normalization=None)
34
+ extractor.candidate_selection(n=3) if model_choice == "kw_pke_tfidf" else extractor.candidate_selection()
 
 
 
 
 
35
  extractor.candidate_weighting()
36
 
37
+ return [kw for kw, score in extractor.get_n_best(n=num_keywords)]
 
38
 
 
39
  def highlight_keywords(text, keywords):
 
 
 
40
  highlighted = text
41
  for kw in sorted(keywords, key=lambda k: -len(k)):
42
  pattern = re.compile(re.escape(kw), re.IGNORECASE)
43
  highlighted = pattern.sub(
44
+ f'<span style="background-color: #1976d2; color: white; padding: 2px 5px; '
45
+ f'border-radius: 4px; font-weight: bold;">{kw}</span>',
46
  highlighted
47
  )
48
  return highlighted
49
 
 
50
  def create_keywords_table(keywords):
51
  if not keywords:
52
  return "<p>No keywords found.</p>"
53
 
54
  table_html = """
55
+ <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd;">
56
  <thead>
57
  <tr style="background-color: #1976d2; color: white;">
58
+ <th style="padding: 10px; text-align: left;">Rank</th>
59
+ <th style="padding: 10px; text-align: left;">Keyword</th>
60
  </tr>
61
  </thead>
62
  <tbody>
63
  """
64
  for idx, kw in enumerate(keywords, 1):
65
  table_html += f"""
66
+ <tr>
67
+ <td style="padding: 10px; border: 1px solid #ddd;">{idx}</td>
68
+ <td style="padding: 10px; border: 1px solid #ddd; font-weight: bold;">{kw}</td>
69
+ </tr>
70
  """
71
  table_html += "</tbody></table>"
72
  return table_html
73
 
 
74
  def process_text(text, model_choice, num_keywords):
75
  if not text.strip():
76
  return "❌ Please enter text to analyse.", "", ""
 
80
  keywords_table_html = create_keywords_table(keywords)
81
 
82
  summary_html = f"""
83
+ <div style="background-color: #f8f9fa; padding: 15px; border-radius: 8px; border: 1px solid #ddd; margin-bottom: 20px;">
84
+ <h3 style="margin-top: 0; color: #1976d2;">πŸ“Š Analysis Summary</h3>
85
  <p><strong>Model Used:</strong> {model_choice}</p>
86
+ <p><strong>Keywords Found:</strong> {len(keywords)}</p>
87
  </div>
88
  """
89
 
90
  highlighted_section = f"""
91
  <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
92
+ <h4 style='margin: 0 0 15px 0; color: #1976d2;'>πŸ“ Text with Highlighted Keywords</h4>
93
  <div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_html}</div>
94
  </div>
95
  """
96
 
97
+ table_section = f"""
98
+ <div style="margin-top: 20px;">
99
+ <h4 style="color: #1976d2; margin-bottom: 10px;">πŸ“‹ Extracted Keywords</h4>
100
+ {keywords_table_html}
101
+ </div>
102
+ """
103
+
104
+ return summary_html, highlighted_section, table_section
105
 
 
106
  def create_interface():
107
  with gr.Blocks(title="Keyword Explorer Tool") as demo:
108
  gr.Markdown("""
109
+ # πŸ”‘ Keyword Extraction (KE) Explorer Tool
110
+
111
+ See what different Keyword Extraction (KE) models can do, and if this is useful to you.
112
+
113
+ ---
114
 
115
+ ### πŸ› οΈ How to use:
116
 
117
+ 1. **πŸ“ Paste your text** below.
118
+ 2. **πŸŽ›οΈ Choose a model** and **set keyword count**.
119
+ 3. **πŸ” Click "Extract Keywords"** to see:
120
+ - πŸ“Š A summary of results.
121
+ - ✨ Highlighted keywords inside your text.
122
+ - πŸ“‹ A full keyword list.
123
 
124
+ ---
125
+
126
+ """)
127
 
128
  text_input = gr.Textbox(label="πŸ“ Text to Analyse", placeholder="Paste your text here...", lines=10)
129
 
130
  with gr.Row():
131
+ model_dropdown = gr.Dropdown(choices=AVAILABLE_MODELS, value=AVAILABLE_MODELS[0], label="πŸŽ›οΈ Select Model")
132
+ num_keywords_slider = gr.Slider(minimum=5, maximum=50, value=10, step=1, label="πŸ”’ Number of Keywords")
133
+
134
+ analyse_btn = gr.Button("πŸ” Extract Keywords", variant="primary")
135
+
136
+ summary_output = gr.HTML()
137
+ highlighted_output = gr.HTML()
138
+ keywords_table_output = gr.HTML()
 
 
 
 
 
 
 
 
 
 
139
 
140
  analyse_btn.click(
141
  fn=process_text,
 
144
  )
145
 
146
  gr.HTML("""
147
+ <hr style="margin-top: 40px; margin-bottom: 20px;">
148
+ <div style="background-color: #f8f9fa; padding: 20px; border-radius: 8px; margin-top: 20px; text-align: center;">
149
+ <p style="font-size: 14px; line-height: 1.8; margin: 0;">
150
+ This <strong>Keyword Extraction Explorer Tool</strong> was created as part of the
151
+ <a href="https://digitalscholarship.web.ox.ac.uk/" target="_blank" style="color: #1976d2;">
152
+ Digital Scholarship at Oxford (DiSc)
153
+ </a>
154
+ funded research project:
155
+ <em>Extracting Keywords from Crowdsourced Collections</em>.
156
+ </p>
157
+ </div>
158
+ """)
159
 
160
  return demo
161