SorrelC commited on
Commit
3945fba
Β·
verified Β·
1 Parent(s): 5744fd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -32
app.py CHANGED
@@ -5,7 +5,7 @@ import re
5
 
6
  nltk.download('stopwords')
7
 
8
- # Models to offer
9
  AVAILABLE_MODELS = [
10
  "kw_pke_multipartiterank",
11
  "kw_pke_singlerank",
@@ -15,9 +15,8 @@ AVAILABLE_MODELS = [
15
  "kw_pke_positionrank"
16
  ]
17
 
 
18
  def extract_keywords_pke(text, model_choice, num_keywords):
19
- extractor = None
20
-
21
  if model_choice == "kw_pke_multipartiterank":
22
  extractor = pke.unsupervised.MultipartiteRank()
23
  elif model_choice == "kw_pke_singlerank":
@@ -43,10 +42,9 @@ def extract_keywords_pke(text, model_choice, num_keywords):
43
  extractor.candidate_weighting()
44
 
45
  keywords = [kw for kw, score in extractor.get_n_best(n=num_keywords)]
46
-
47
  return keywords
48
 
49
-
50
  def highlight_keywords(text, keywords):
51
  if not keywords:
52
  return text
@@ -55,36 +53,79 @@ def highlight_keywords(text, keywords):
55
  for kw in sorted(keywords, key=lambda k: -len(k)):
56
  pattern = re.compile(re.escape(kw), re.IGNORECASE)
57
  highlighted = pattern.sub(
58
- f'<mark style="background-color:#FFD54F; padding:2px 4px; border-radius:4px;">{kw}</mark>',
59
  highlighted
60
  )
61
  return highlighted
62
 
63
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def process_text(text, model_choice, num_keywords):
65
  if not text.strip():
66
  return "❌ Please enter text to analyse.", "", ""
67
 
68
  keywords = extract_keywords_pke(text, model_choice, num_keywords)
69
  highlighted_html = highlight_keywords(text, keywords)
70
-
71
- summary = f"""
72
- ## πŸ“Š Keyword Extraction Summary
73
- - **Model Used:** {model_choice}
74
- - **Keywords Found:** {len(keywords)}
75
- - **Displayed in Context Below**
 
 
76
  """
77
 
78
- keyword_list_html = "<ul>" + "".join([f"<li>{kw}</li>" for kw in keywords]) + "</ul>"
79
-
80
- return summary, highlighted_html, keyword_list_html
 
 
 
81
 
 
82
 
 
83
  def create_interface():
84
  with gr.Blocks(title="Keyword Explorer Tool") as demo:
85
- gr.Markdown("# πŸ”‘ Keyword Explorer Tool\n\nExtract and explore keywords using multiple extraction models.")
 
 
 
86
 
87
- text_input = gr.Textbox(label="πŸ“ Text to Analyse", placeholder="Paste your text here...", lines=8)
 
 
 
 
 
 
 
 
88
 
89
  with gr.Row():
90
  model_dropdown = gr.Dropdown(
@@ -92,7 +133,6 @@ def create_interface():
92
  value=AVAILABLE_MODELS[0],
93
  label="Select Keyword Extraction Model"
94
  )
95
-
96
  num_keywords_slider = gr.Slider(
97
  minimum=5,
98
  maximum=50,
@@ -103,20 +143,14 @@ def create_interface():
103
 
104
  analyse_btn = gr.Button("πŸš€ Extract Keywords")
105
 
106
- with gr.Row():
107
- summary_output = gr.Markdown(label="Summary")
108
-
109
- with gr.Row():
110
- highlighted_output = gr.HTML(label="Highlighted Text")
111
-
112
- with gr.Row():
113
- gr.Markdown("### πŸ“‹ Extracted Keywords List")
114
- keyword_list_output = gr.HTML(label="Keywords List")
115
 
116
  analyse_btn.click(
117
  fn=process_text,
118
  inputs=[text_input, model_dropdown, num_keywords_slider],
119
- outputs=[summary_output, highlighted_output, keyword_list_output]
120
  )
121
 
122
  gr.HTML("""
@@ -135,8 +169,6 @@ def create_interface():
135
 
136
  return demo
137
 
138
-
139
  if __name__ == "__main__":
140
  demo = create_interface()
141
- demo.launch()
142
-
 
5
 
6
  nltk.download('stopwords')
7
 
8
+ # Models list
9
  AVAILABLE_MODELS = [
10
  "kw_pke_multipartiterank",
11
  "kw_pke_singlerank",
 
15
  "kw_pke_positionrank"
16
  ]
17
 
18
+ # Keyword extraction using PKE
19
  def extract_keywords_pke(text, model_choice, num_keywords):
 
 
20
  if model_choice == "kw_pke_multipartiterank":
21
  extractor = pke.unsupervised.MultipartiteRank()
22
  elif model_choice == "kw_pke_singlerank":
 
42
  extractor.candidate_weighting()
43
 
44
  keywords = [kw for kw, score in extractor.get_n_best(n=num_keywords)]
 
45
  return keywords
46
 
47
+ # Highlight keywords in text with styled spans
48
  def highlight_keywords(text, keywords):
49
  if not keywords:
50
  return text
 
53
  for kw in sorted(keywords, key=lambda k: -len(k)):
54
  pattern = re.compile(re.escape(kw), re.IGNORECASE)
55
  highlighted = pattern.sub(
56
+ f'<span style="background-color: #4ECDC4; color: white; padding: 2px 5px; border-radius: 5px; font-weight: bold;">{kw}</span>',
57
  highlighted
58
  )
59
  return highlighted
60
 
61
+ # Create keyword list as HTML table
62
+ def create_keywords_table(keywords):
63
+ if not keywords:
64
+ return "<p>No keywords found.</p>"
65
+
66
+ table_html = """
67
+ <table style="width: 100%; border-collapse: collapse; border: 1px solid #ddd; font-size: 14px;">
68
+ <thead>
69
+ <tr style="background-color: #1976d2; color: white;">
70
+ <th style="padding: 8px; border: 1px solid #ddd; text-align: left;">Rank</th>
71
+ <th style="padding: 8px; border: 1px solid #ddd; text-align: left;">Keyword</th>
72
+ </tr>
73
+ </thead>
74
+ <tbody>
75
+ """
76
+ for idx, kw in enumerate(keywords, 1):
77
+ table_html += f"""
78
+ <tr>
79
+ <td style="padding: 8px; border: 1px solid #ddd;">{idx}</td>
80
+ <td style="padding: 8px; border: 1px solid #ddd; font-weight: bold;">{kw}</td>
81
+ </tr>
82
+ """
83
+ table_html += "</tbody></table>"
84
+ return table_html
85
+
86
+ # Main processing function
87
  def process_text(text, model_choice, num_keywords):
88
  if not text.strip():
89
  return "❌ Please enter text to analyse.", "", ""
90
 
91
  keywords = extract_keywords_pke(text, model_choice, num_keywords)
92
  highlighted_html = highlight_keywords(text, keywords)
93
+ keywords_table_html = create_keywords_table(keywords)
94
+
95
+ summary_html = f"""
96
+ <div style="background-color: #f0f8ff; padding: 15px; border-radius: 8px; border: 1px solid #ddd; margin-bottom: 20px;">
97
+ <h3 style="margin-top: 0; color: #1976d2;">πŸ“Š Keyword Extraction Summary</h3>
98
+ <p><strong>Model Used:</strong> {model_choice}</p>
99
+ <p><strong>Total Keywords Found:</strong> {len(keywords)}</p>
100
+ </div>
101
  """
102
 
103
+ highlighted_section = f"""
104
+ <div style='padding: 15px; border: 2px solid #ddd; border-radius: 8px; background-color: #fafafa; margin: 10px 0;'>
105
+ <h4 style='margin: 0 0 15px 0; color: #333;'>πŸ“ Text with Highlighted Keywords</h4>
106
+ <div style='line-height: 1.8; font-size: 16px; background-color: white; padding: 15px; border-radius: 5px;'>{highlighted_html}</div>
107
+ </div>
108
+ """
109
 
110
+ return summary_html, highlighted_section, keywords_table_html
111
 
112
+ # Build the Gradio interface
113
  def create_interface():
114
  with gr.Blocks(title="Keyword Explorer Tool") as demo:
115
+ gr.Markdown("""
116
+ # πŸ”‘ Keyword Explorer Tool
117
+
118
+ Discover the key concepts inside your text using established keyword extraction models.
119
 
120
+ **How to use:**
121
+ 1. Paste your text in the input box.
122
+ 2. Choose a keyword extraction model.
123
+ 3. Set how many keywords you want to extract.
124
+ 4. Click "Extract Keywords" to explore the results.
125
+
126
+ """)
127
+
128
+ text_input = gr.Textbox(label="πŸ“ Text to Analyse", placeholder="Paste your text here...", lines=10)
129
 
130
  with gr.Row():
131
  model_dropdown = gr.Dropdown(
 
133
  value=AVAILABLE_MODELS[0],
134
  label="Select Keyword Extraction Model"
135
  )
 
136
  num_keywords_slider = gr.Slider(
137
  minimum=5,
138
  maximum=50,
 
143
 
144
  analyse_btn = gr.Button("πŸš€ Extract Keywords")
145
 
146
+ summary_output = gr.HTML(label="Summary")
147
+ highlighted_output = gr.HTML(label="Highlighted Text")
148
+ keywords_table_output = gr.HTML(label="Keywords List")
 
 
 
 
 
 
149
 
150
  analyse_btn.click(
151
  fn=process_text,
152
  inputs=[text_input, model_dropdown, num_keywords_slider],
153
+ outputs=[summary_output, highlighted_output, keywords_table_output]
154
  )
155
 
156
  gr.HTML("""
 
169
 
170
  return demo
171
 
 
172
  if __name__ == "__main__":
173
  demo = create_interface()
174
+ demo.launch()