SorrelC commited on
Commit
3295b9f
·
verified ·
1 Parent(s): d2fa60b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -133
app.py CHANGED
@@ -1,115 +1,3 @@
1
- import gradio as gr
2
- import re
3
- import pandas as pd
4
-
5
- def build_keywords_dict(primary_inputs, synonym_inputs):
6
- """Build keyword dictionary from separate primary and synonym inputs"""
7
- keywords_dict = {}
8
-
9
- for primary, synonyms in zip(primary_inputs, synonym_inputs):
10
- if primary and primary.strip(): # Only process if primary keyword exists
11
- primary_clean = primary.strip()
12
- if synonyms and synonyms.strip():
13
- synonym_list = [s.strip() for s in synonyms.split(';') if s.strip()]
14
- else:
15
- synonym_list = []
16
- keywords_dict[primary_clean] = synonym_list
17
-
18
- return keywords_dict
19
-
20
- def find_keywords(story, keywords_dict):
21
- """Find keywords in the story text"""
22
- if not story or not isinstance(story, str):
23
- return ''
24
-
25
- found_keywords = set()
26
-
27
- # Search for each primary keyword and its synonyms
28
- for primary_keyword, synonyms in keywords_dict.items():
29
- keyword_group_found = False
30
-
31
- # Check primary keyword
32
- if primary_keyword.upper() == "US":
33
- if ' US ' in story or story.startswith('US ') or story.endswith(' US'):
34
- keyword_group_found = True
35
- else:
36
- pattern = r'\b' + re.escape(primary_keyword) + r'\b'
37
- if re.search(pattern, story, re.IGNORECASE):
38
- keyword_group_found = True
39
-
40
- # Check each synonym
41
- for synonym in synonyms:
42
- if synonym.upper() == "US":
43
- if ' US ' in story or story.startswith('US ') or story.endswith(' US'):
44
- keyword_group_found = True
45
- else:
46
- if re.search(r'\b' + re.escape(synonym) + r'\b', story, re.IGNORECASE):
47
- keyword_group_found = True
48
-
49
- # If any keyword from this group was found, add ALL keywords from the group
50
- if keyword_group_found:
51
- found_keywords.add(primary_keyword) # Always include the primary
52
- found_keywords.update(synonyms) # Add all synonyms
53
-
54
- return '; '.join(sorted(found_keywords))
55
-
56
- def highlight_keywords_in_text(text, keywords_list):
57
- """Create HTML with highlighted keywords"""
58
- if not keywords_list:
59
- return text
60
-
61
- highlighted_text = text
62
- colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#F9CA24', '#6C5CE7', '#A0E7E5', '#FD79A8', '#55A3FF', '#00B894', '#E17055']
63
-
64
- for i, keyword in enumerate(keywords_list):
65
- if keyword:
66
- color = colors[i % len(colors)]
67
- pattern = r'\b' + re.escape(keyword) + r'\b'
68
- replacement = f'<span style="background-color: {color}; padding: 2px 4px; border-radius: 3px; color: white; font-weight: bold;">{keyword}</span>'
69
- highlighted_text = re.sub(pattern, replacement, highlighted_text, flags=re.IGNORECASE)
70
-
71
- return highlighted_text
72
-
73
- def process_text(input_text, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5):
74
- """Main processing function"""
75
- if not input_text.strip():
76
- return "Please enter some text to analyze", "", "No keywords found"
77
-
78
- # Build keywords dictionary from separate inputs
79
- primary_inputs = [primary1, primary2, primary3, primary4, primary5]
80
- synonym_inputs = [synonyms1, synonyms2, synonyms3, synonyms4, synonyms5]
81
- keywords_dict = build_keywords_dict(primary_inputs, synonym_inputs)
82
-
83
- if not keywords_dict:
84
- return "Please enter at least one primary keyword", "", "No keyword dictionary provided"
85
-
86
- # Find keywords in the text
87
- found_keywords_str = find_keywords(input_text, keywords_dict)
88
-
89
- if not found_keywords_str:
90
- return f"No keywords found in the text.\n\nKeyword dictionary loaded: {len(keywords_dict)} primary keywords", input_text, "No matches found"
91
-
92
- # Create highlighted version
93
- keywords_list = found_keywords_str.split('; ')
94
- highlighted_html = highlight_keywords_in_text(input_text, keywords_list)
95
-
96
- # Create results summary
97
- results_summary = f"""
98
- ## Results Summary
99
-
100
- **Keywords Found:** {len(keywords_list)}
101
- **Matched Keywords:** {found_keywords_str}
102
-
103
- **Keyword Dictionary Stats:**
104
- - Primary keywords loaded: {len(keywords_dict)}
105
- - Total searchable terms: {sum(len(synonyms) + 1 for synonyms in keywords_dict.values())}
106
-
107
- **Copy this result to your spreadsheet:**
108
- {found_keywords_str}
109
- """
110
-
111
- return results_summary, highlighted_html, found_keywords_str
112
-
113
  # Create the Gradio interface
114
  def create_interface():
115
  with gr.Blocks(title="Keyword Tagging Tool", theme=gr.themes.Soft()) as demo:
@@ -165,7 +53,8 @@ def create_interface():
165
 
166
  with gr.Row():
167
  find_btn = gr.Button("Find Keywords", variant="primary", size="lg")
168
- clear_btn = gr.Button("Clear All", size="lg")
 
169
 
170
  with gr.Row():
171
  results_output = gr.Markdown(label="Results Summary")
@@ -173,6 +62,9 @@ def create_interface():
173
  with gr.Row():
174
  highlighted_output = gr.HTML(label="Text with Highlighted Keywords")
175
 
 
 
 
176
  with gr.Row():
177
  copy_output = gr.Textbox(
178
  label="Keywords for Spreadsheet (copy this text)",
@@ -180,47 +72,87 @@ def create_interface():
180
  max_lines=5
181
  )
182
 
183
- # Examples section
184
  gr.Markdown("### Examples")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- example1 = [
 
187
  "During World War II, many prisoners of war were held in camps across Europe. The Geneva Convention established rules for POW treatment. American soldiers and British troops were among those captured.",
188
  "Prisoner of War", "POW; POWs; prisoner of war",
189
- "World War II", "WWII; Second World War",
190
  "United States", "USA; US; America; American",
191
  "", "", "", ""
192
  ]
193
 
194
- example2 = [
195
  "The University of Oxford is located in Oxford, England. Students from around the world study at this prestigious institution.",
196
  "University", "university; institution; college",
197
  "Oxford", "oxford",
198
- "England", "england; English",
199
  "Student", "student; students; pupils",
200
  "", ""
201
  ]
202
 
203
- gr.Examples(
204
- examples=[example1, example2],
205
- inputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5],
206
- label="Click an example to try it out"
207
- )
 
 
 
 
 
 
 
 
 
 
208
 
209
  # Button functions
210
  find_btn.click(
211
  fn=process_text,
212
  inputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5],
213
- outputs=[results_output, highlighted_output, copy_output]
214
  )
215
 
216
- def clear_all():
217
- return "", "", "", "", "", "", "", "", "", "", "", "", "", ""
 
 
218
 
219
- clear_btn.click(
220
- fn=clear_all,
221
  outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5, results_output, highlighted_output, copy_output]
222
  )
223
 
 
 
 
 
 
 
 
 
 
 
224
  # Instructions
225
  gr.Markdown("""
226
  ## Format Guide
@@ -254,8 +186,4 @@ def create_interface():
254
  </div>
255
  """)
256
 
257
- return demo
258
-
259
- if __name__ == "__main__":
260
- demo = create_interface()
261
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Create the Gradio interface
2
  def create_interface():
3
  with gr.Blocks(title="Keyword Tagging Tool", theme=gr.themes.Soft()) as demo:
 
53
 
54
  with gr.Row():
55
  find_btn = gr.Button("Find Keywords", variant="primary", size="lg")
56
+ clear_dict_btn = gr.Button("Clear Dictionary", size="lg", variant="secondary")
57
+ clear_all_btn = gr.Button("Clear All", size="lg")
58
 
59
  with gr.Row():
60
  results_output = gr.Markdown(label="Results Summary")
 
62
  with gr.Row():
63
  highlighted_output = gr.HTML(label="Text with Highlighted Keywords")
64
 
65
+ with gr.Row():
66
+ results_table_output = gr.HTML(label="Detailed Results Table")
67
+
68
  with gr.Row():
69
  copy_output = gr.Textbox(
70
  label="Keywords for Spreadsheet (copy this text)",
 
72
  max_lines=5
73
  )
74
 
75
+ # Examples section with improved layout
76
  gr.Markdown("### Examples")
77
+ gr.Markdown("Click on an example to load it into the tool:")
78
+
79
+ # Example 1
80
+ with gr.Row():
81
+ with gr.Column(scale=3):
82
+ gr.Markdown("**Example 1: WWII & POW Text**")
83
+ gr.Markdown("*During World War II, many prisoners of war were held in camps across Europe...*")
84
+ with gr.Column(scale=1):
85
+ example1_btn = gr.Button("Load Example 1", variant="secondary", size="sm")
86
+
87
+ # Example 2
88
+ with gr.Row():
89
+ with gr.Column(scale=3):
90
+ gr.Markdown("**Example 2: University Text**")
91
+ gr.Markdown("*The University of Oxford is located in Oxford, England...*")
92
+ with gr.Column(scale=1):
93
+ example2_btn = gr.Button("Load Example 2", variant="secondary", size="sm")
94
 
95
+ # Define example data
96
+ example1_data = [
97
  "During World War II, many prisoners of war were held in camps across Europe. The Geneva Convention established rules for POW treatment. American soldiers and British troops were among those captured.",
98
  "Prisoner of War", "POW; POWs; prisoner of war",
99
+ "World War II", "WWII; Second World War",
100
  "United States", "USA; US; America; American",
101
  "", "", "", ""
102
  ]
103
 
104
+ example2_data = [
105
  "The University of Oxford is located in Oxford, England. Students from around the world study at this prestigious institution.",
106
  "University", "university; institution; college",
107
  "Oxford", "oxford",
108
+ "England", "england; English",
109
  "Student", "student; students; pupils",
110
  "", ""
111
  ]
112
 
113
+ # Clear functions
114
+ def clear_dictionary_only():
115
+ """Clear only the keyword dictionary fields"""
116
+ return "", "", "", "", "", "", "", "", "", ""
117
+
118
+ def clear_everything():
119
+ """Clear all fields including text input"""
120
+ return "", "", "", "", "", "", "", "", "", "", "", "", "", ""
121
+
122
+ # Example loading functions
123
+ def load_example1():
124
+ return example1_data
125
+
126
+ def load_example2():
127
+ return example2_data
128
 
129
  # Button functions
130
  find_btn.click(
131
  fn=process_text,
132
  inputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5],
133
+ outputs=[results_output, highlighted_output, results_table_output, copy_output]
134
  )
135
 
136
+ clear_dict_btn.click(
137
+ fn=clear_dictionary_only,
138
+ outputs=[primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5]
139
+ )
140
 
141
+ clear_all_btn.click(
142
+ fn=clear_everything,
143
  outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5, results_output, highlighted_output, copy_output]
144
  )
145
 
146
+ example1_btn.click(
147
+ fn=load_example1,
148
+ outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5]
149
+ )
150
+
151
+ example2_btn.click(
152
+ fn=load_example2,
153
+ outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5]
154
+ )
155
+
156
  # Instructions
157
  gr.Markdown("""
158
  ## Format Guide
 
186
  </div>
187
  """)
188
 
189
+ return demo