SorrelC commited on
Commit
40280db
Β·
verified Β·
1 Parent(s): 2b6ae9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -38
app.py CHANGED
@@ -2,18 +2,18 @@ import gradio as gr
2
  import re
3
  import pandas as pd
4
 
5
- def build_keywords_dict(primary_inputs, synonym_inputs):
6
- """Build keyword dictionary from separate primary and synonym inputs"""
7
  keywords_dict = {}
8
 
9
- for primary, synonyms in zip(primary_inputs, synonym_inputs):
10
  if primary and primary.strip(): # Only process if primary keyword exists
11
  primary_clean = primary.strip()
12
- if synonyms and synonyms.strip():
13
- synonym_list = [s.strip() for s in synonyms.split(';') if s.strip()]
14
  else:
15
- synonym_list = []
16
- keywords_dict[primary_clean] = synonym_list
17
 
18
  return keywords_dict
19
 
@@ -24,8 +24,8 @@ def find_keywords(story, keywords_dict):
24
 
25
  found_keywords = set()
26
 
27
- # Search for each primary keyword and its synonyms
28
- for primary_keyword, synonyms in keywords_dict.items():
29
  keyword_group_found = False
30
 
31
  # Check primary keyword
@@ -37,19 +37,19 @@ def find_keywords(story, keywords_dict):
37
  if re.search(pattern, story, re.IGNORECASE):
38
  keyword_group_found = True
39
 
40
- # Check each synonym
41
- for synonym in synonyms:
42
- if synonym.upper() == "US":
43
  if ' US ' in story or story.startswith('US ') or story.endswith(' US'):
44
  keyword_group_found = True
45
  else:
46
- if re.search(r'\b' + re.escape(synonym) + r'\b', story, re.IGNORECASE):
47
  keyword_group_found = True
48
 
49
  # If any keyword from this group was found, add ALL keywords from the group
50
  if keyword_group_found:
51
  found_keywords.add(primary_keyword) # Always include the primary
52
- found_keywords.update(synonyms) # Add all synonyms
53
 
54
  return '; '.join(sorted(found_keywords))
55
 
@@ -83,15 +83,15 @@ def create_keyword_results_table(found_keywords_str, keywords_dict, input_text):
83
 
84
  # Group keywords by their primary category
85
  keyword_groups = {}
86
- for primary, synonyms in keywords_dict.items():
87
  found_in_group = []
88
  # Check if primary keyword was found
89
  if primary in found_keywords:
90
  found_in_group.append(primary)
91
- # Check if any synonyms were found
92
- for synonym in synonyms:
93
- if synonym in found_keywords:
94
- found_in_group.append(synonym)
95
 
96
  if found_in_group:
97
  keyword_groups[primary] = found_in_group
@@ -189,15 +189,15 @@ def create_keyword_results_table(found_keywords_str, keywords_dict, input_text):
189
 
190
  return table_html
191
 
192
- def process_text(input_text, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5):
193
  """Main processing function with added results table"""
194
  if not input_text.strip():
195
  return "Please enter some text to analyse", "", "", "No keywords found"
196
 
197
  # Build keywords dictionary from separate inputs
198
  primary_inputs = [primary1, primary2, primary3, primary4, primary5]
199
- synonym_inputs = [synonyms1, synonyms2, synonyms3, synonyms4, synonyms5]
200
- keywords_dict = build_keywords_dict(primary_inputs, synonym_inputs)
201
 
202
  if not keywords_dict:
203
  return "Please enter at least one primary keyword", "", "", "No keyword dictionary provided"
@@ -222,7 +222,7 @@ def process_text(input_text, primary1, synonyms1, primary2, synonyms2, primary3,
222
  **Matched Keywords:** {found_keywords_str}
223
  **Keyword Dictionary Stats:**
224
  - Primary keywords loaded: {len(keywords_dict)}
225
- - Total searchable terms: {sum(len(synonyms) + 1 for synonyms in keywords_dict.values())}
226
  **Copy this result to your spreadsheet:**
227
  {found_keywords_str}
228
  """
@@ -234,15 +234,15 @@ def create_interface():
234
  # theme stays in gr.Blocks(), ssr_mode goes in launch()
235
  with gr.Blocks(title="Keyword Tagging Tool", theme=gr.themes.Soft()) as demo:
236
  gr.HTML("""
237
- <h1>Controlled Vocabluary Keyword Tagging Tool</h1>
238
 
239
- <p>This tool demonstrates how a simple python script can be used to extract keywords from text using a controlled vocabulary of primary keywords and associated keywords/synonyms.
240
  </p>
241
 
242
  <h2>How to use this tool:</h2>
243
  <ol>
244
  <li>πŸ“ <strong>Enter your text</strong> in the left panel</li>
245
- <li>πŸ“š <strong>Define your keyword dictionary</strong> in the right panel - enter primary keywords and their synonyms</li>
246
  <li>πŸ” <strong>Click "Find Keywords"</strong> to see results</li>
247
  <li>πŸ“‹ <strong>Copy the results</strong> to paste into your spreadsheet</li>
248
  </ol>
@@ -258,32 +258,32 @@ def create_interface():
258
  )
259
 
260
  with gr.Column(scale=1):
261
- gr.Markdown("**Keyword Dictionary** - Enter primary keywords and their synonyms:")
262
 
263
  # Row 1
264
  with gr.Row():
265
  primary1 = gr.Textbox(label="Primary Keyword 1", placeholder="e.g., Prisoner of War", scale=1)
266
- synonyms1 = gr.Textbox(label="Synonyms 1", placeholder="e.g., POW; POWs; prisoner of war", scale=2)
267
 
268
  # Row 2
269
  with gr.Row():
270
  primary2 = gr.Textbox(label="Primary Keyword 2", placeholder="e.g., United States", scale=1)
271
- synonyms2 = gr.Textbox(label="Synonyms 2", placeholder="e.g., USA; US; America", scale=2)
272
 
273
  # Row 3
274
  with gr.Row():
275
  primary3 = gr.Textbox(label="Primary Keyword 3", placeholder="e.g., University", scale=1)
276
- synonyms3 = gr.Textbox(label="Synonyms 3", placeholder="e.g., university; institution; college", scale=2)
277
 
278
  # Row 4
279
  with gr.Row():
280
  primary4 = gr.Textbox(label="Primary Keyword 4", placeholder="Optional", scale=1)
281
- synonyms4 = gr.Textbox(label="Synonyms 4", placeholder="Optional", scale=2)
282
 
283
  # Row 5
284
  with gr.Row():
285
  primary5 = gr.Textbox(label="Primary Keyword 5", placeholder="Optional", scale=1)
286
- synonyms5 = gr.Textbox(label="Synonyms 5", placeholder="Optional", scale=2)
287
 
288
  # Full width Find Keywords button
289
  with gr.Row():
@@ -335,7 +335,7 @@ def create_interface():
335
 
336
  gr.Examples(
337
  examples=[example1, example2],
338
- inputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5],
339
  label="Click an example to try it out"
340
  )
341
 
@@ -351,7 +351,7 @@ def create_interface():
351
  # Button functions
352
  find_btn.click(
353
  fn=process_text,
354
- inputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5],
355
  outputs=[results_output, highlighted_output, results_table_output, copy_output]
356
  )
357
 
@@ -362,7 +362,7 @@ def create_interface():
362
 
363
  clear_dict_btn.click(
364
  fn=clear_dictionary_only,
365
- outputs=[primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5]
366
  )
367
 
368
  # Instructions
@@ -371,13 +371,13 @@ def create_interface():
371
 
372
  **How to enter keywords:**
373
  - **Primary Keyword:** Enter the main/preferred term for a concept
374
- - **Synonyms:** Enter alternative terms separated by semicolons `;`
375
  - Leave rows blank if you don't need all 5 keyword groups
376
  - The tool will find ANY of these terms and return ALL related terms
377
 
378
  **Example:**
379
  - Primary: `Prisoner of War`
380
- - Synonyms: `POW; POWs; prisoner of war`
381
 
382
  **Special Handling:**
383
  - "US" is matched exactly to avoid confusion with the word "us"
@@ -385,7 +385,7 @@ def create_interface():
385
  - Results are alphabetised and deduplicated
386
 
387
  **How it works:**
388
- When ANY variant is found in your text (primary OR synonym), the tool returns the complete standardized set of terms for that concept.
389
  """)
390
 
391
  # Bottom horizontal line and footer
 
2
  import re
3
  import pandas as pd
4
 
5
+ def build_keywords_dict(primary_inputs, associated_inputs):
6
+ """Build keyword dictionary from separate primary and associated word inputs"""
7
  keywords_dict = {}
8
 
9
+ for primary, associated in zip(primary_inputs, associated_inputs):
10
  if primary and primary.strip(): # Only process if primary keyword exists
11
  primary_clean = primary.strip()
12
+ if associated and associated.strip():
13
+ associated_list = [s.strip() for s in associated.split(';') if s.strip()]
14
  else:
15
+ associated_list = []
16
+ keywords_dict[primary_clean] = associated_list
17
 
18
  return keywords_dict
19
 
 
24
 
25
  found_keywords = set()
26
 
27
+ # Search for each primary keyword and its associated words
28
+ for primary_keyword, associated_words in keywords_dict.items():
29
  keyword_group_found = False
30
 
31
  # Check primary keyword
 
37
  if re.search(pattern, story, re.IGNORECASE):
38
  keyword_group_found = True
39
 
40
+ # Check each associated word
41
+ for associated in associated_words:
42
+ if associated.upper() == "US":
43
  if ' US ' in story or story.startswith('US ') or story.endswith(' US'):
44
  keyword_group_found = True
45
  else:
46
+ if re.search(r'\b' + re.escape(associated) + r'\b', story, re.IGNORECASE):
47
  keyword_group_found = True
48
 
49
  # If any keyword from this group was found, add ALL keywords from the group
50
  if keyword_group_found:
51
  found_keywords.add(primary_keyword) # Always include the primary
52
+ found_keywords.update(associated_words) # Add all associated words
53
 
54
  return '; '.join(sorted(found_keywords))
55
 
 
83
 
84
  # Group keywords by their primary category
85
  keyword_groups = {}
86
+ for primary, associated_words in keywords_dict.items():
87
  found_in_group = []
88
  # Check if primary keyword was found
89
  if primary in found_keywords:
90
  found_in_group.append(primary)
91
+ # Check if any associated words were found
92
+ for associated in associated_words:
93
+ if associated in found_keywords:
94
+ found_in_group.append(associated)
95
 
96
  if found_in_group:
97
  keyword_groups[primary] = found_in_group
 
189
 
190
  return table_html
191
 
192
+ def process_text(input_text, primary1, associated1, primary2, associated2, primary3, associated3, primary4, associated4, primary5, associated5):
193
  """Main processing function with added results table"""
194
  if not input_text.strip():
195
  return "Please enter some text to analyse", "", "", "No keywords found"
196
 
197
  # Build keywords dictionary from separate inputs
198
  primary_inputs = [primary1, primary2, primary3, primary4, primary5]
199
+ associated_inputs = [associated1, associated2, associated3, associated4, associated5]
200
+ keywords_dict = build_keywords_dict(primary_inputs, associated_inputs)
201
 
202
  if not keywords_dict:
203
  return "Please enter at least one primary keyword", "", "", "No keyword dictionary provided"
 
222
  **Matched Keywords:** {found_keywords_str}
223
  **Keyword Dictionary Stats:**
224
  - Primary keywords loaded: {len(keywords_dict)}
225
+ - Total searchable terms: {sum(len(associated) + 1 for associated in keywords_dict.values())}
226
  **Copy this result to your spreadsheet:**
227
  {found_keywords_str}
228
  """
 
234
  # theme stays in gr.Blocks(), ssr_mode goes in launch()
235
  with gr.Blocks(title="Keyword Tagging Tool", theme=gr.themes.Soft()) as demo:
236
  gr.HTML("""
237
+ <h1>Controlled Vocabulary Keyword Tagging Tool</h1>
238
 
239
+ <p>This tool demonstrates how a simple python script can be used to extract keywords from text using a controlled vocabulary of primary keywords and associated words (abbreviations, alternate spellings, or related concepts).
240
  </p>
241
 
242
  <h2>How to use this tool:</h2>
243
  <ol>
244
  <li>πŸ“ <strong>Enter your text</strong> in the left panel</li>
245
+ <li>πŸ“š <strong>Define your keyword dictionary</strong> in the right panel - enter primary keywords and their associated words</li>
246
  <li>πŸ” <strong>Click "Find Keywords"</strong> to see results</li>
247
  <li>πŸ“‹ <strong>Copy the results</strong> to paste into your spreadsheet</li>
248
  </ol>
 
258
  )
259
 
260
  with gr.Column(scale=1):
261
+ gr.Markdown("**Keyword Dictionary** - Enter primary keywords and their associated words:")
262
 
263
  # Row 1
264
  with gr.Row():
265
  primary1 = gr.Textbox(label="Primary Keyword 1", placeholder="e.g., Prisoner of War", scale=1)
266
+ associated1 = gr.Textbox(label="Associated Words 1", placeholder="e.g., POW; POWs; prisoner of war", scale=2)
267
 
268
  # Row 2
269
  with gr.Row():
270
  primary2 = gr.Textbox(label="Primary Keyword 2", placeholder="e.g., United States", scale=1)
271
+ associated2 = gr.Textbox(label="Associated Words 2", placeholder="e.g., USA; US; America", scale=2)
272
 
273
  # Row 3
274
  with gr.Row():
275
  primary3 = gr.Textbox(label="Primary Keyword 3", placeholder="e.g., University", scale=1)
276
+ associated3 = gr.Textbox(label="Associated Words 3", placeholder="e.g., university; institution; college", scale=2)
277
 
278
  # Row 4
279
  with gr.Row():
280
  primary4 = gr.Textbox(label="Primary Keyword 4", placeholder="Optional", scale=1)
281
+ associated4 = gr.Textbox(label="Associated Words 4", placeholder="Optional", scale=2)
282
 
283
  # Row 5
284
  with gr.Row():
285
  primary5 = gr.Textbox(label="Primary Keyword 5", placeholder="Optional", scale=1)
286
+ associated5 = gr.Textbox(label="Associated Words 5", placeholder="Optional", scale=2)
287
 
288
  # Full width Find Keywords button
289
  with gr.Row():
 
335
 
336
  gr.Examples(
337
  examples=[example1, example2],
338
+ inputs=[text_input, primary1, associated1, primary2, associated2, primary3, associated3, primary4, associated4, primary5, associated5],
339
  label="Click an example to try it out"
340
  )
341
 
 
351
  # Button functions
352
  find_btn.click(
353
  fn=process_text,
354
+ inputs=[text_input, primary1, associated1, primary2, associated2, primary3, associated3, primary4, associated4, primary5, associated5],
355
  outputs=[results_output, highlighted_output, results_table_output, copy_output]
356
  )
357
 
 
362
 
363
  clear_dict_btn.click(
364
  fn=clear_dictionary_only,
365
+ outputs=[primary1, associated1, primary2, associated2, primary3, associated3, primary4, associated4, primary5, associated5]
366
  )
367
 
368
  # Instructions
 
371
 
372
  **How to enter keywords:**
373
  - **Primary Keyword:** Enter the main/preferred term for a concept
374
+ - **Associated Words:** Enter alternative terms separated by semicolons `;`
375
  - Leave rows blank if you don't need all 5 keyword groups
376
  - The tool will find ANY of these terms and return ALL related terms
377
 
378
  **Example:**
379
  - Primary: `Prisoner of War`
380
+ - Associated Words: `POW; POWs; prisoner of war`
381
 
382
  **Special Handling:**
383
  - "US" is matched exactly to avoid confusion with the word "us"
 
385
  - Results are alphabetised and deduplicated
386
 
387
  **How it works:**
388
+ When ANY variant is found in your text (primary OR associated word), the tool returns the complete standardised set of terms for that concept.
389
  """)
390
 
391
  # Bottom horizontal line and footer