Update app.py
Browse files
app.py
CHANGED
|
@@ -1,115 +1,3 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import re
|
| 3 |
-
import pandas as pd
|
| 4 |
-
|
| 5 |
-
def build_keywords_dict(primary_inputs, synonym_inputs):
|
| 6 |
-
"""Build keyword dictionary from separate primary and synonym inputs"""
|
| 7 |
-
keywords_dict = {}
|
| 8 |
-
|
| 9 |
-
for primary, synonyms in zip(primary_inputs, synonym_inputs):
|
| 10 |
-
if primary and primary.strip(): # Only process if primary keyword exists
|
| 11 |
-
primary_clean = primary.strip()
|
| 12 |
-
if synonyms and synonyms.strip():
|
| 13 |
-
synonym_list = [s.strip() for s in synonyms.split(';') if s.strip()]
|
| 14 |
-
else:
|
| 15 |
-
synonym_list = []
|
| 16 |
-
keywords_dict[primary_clean] = synonym_list
|
| 17 |
-
|
| 18 |
-
return keywords_dict
|
| 19 |
-
|
| 20 |
-
def find_keywords(story, keywords_dict):
|
| 21 |
-
"""Find keywords in the story text"""
|
| 22 |
-
if not story or not isinstance(story, str):
|
| 23 |
-
return ''
|
| 24 |
-
|
| 25 |
-
found_keywords = set()
|
| 26 |
-
|
| 27 |
-
# Search for each primary keyword and its synonyms
|
| 28 |
-
for primary_keyword, synonyms in keywords_dict.items():
|
| 29 |
-
keyword_group_found = False
|
| 30 |
-
|
| 31 |
-
# Check primary keyword
|
| 32 |
-
if primary_keyword.upper() == "US":
|
| 33 |
-
if ' US ' in story or story.startswith('US ') or story.endswith(' US'):
|
| 34 |
-
keyword_group_found = True
|
| 35 |
-
else:
|
| 36 |
-
pattern = r'\b' + re.escape(primary_keyword) + r'\b'
|
| 37 |
-
if re.search(pattern, story, re.IGNORECASE):
|
| 38 |
-
keyword_group_found = True
|
| 39 |
-
|
| 40 |
-
# Check each synonym
|
| 41 |
-
for synonym in synonyms:
|
| 42 |
-
if synonym.upper() == "US":
|
| 43 |
-
if ' US ' in story or story.startswith('US ') or story.endswith(' US'):
|
| 44 |
-
keyword_group_found = True
|
| 45 |
-
else:
|
| 46 |
-
if re.search(r'\b' + re.escape(synonym) + r'\b', story, re.IGNORECASE):
|
| 47 |
-
keyword_group_found = True
|
| 48 |
-
|
| 49 |
-
# If any keyword from this group was found, add ALL keywords from the group
|
| 50 |
-
if keyword_group_found:
|
| 51 |
-
found_keywords.add(primary_keyword) # Always include the primary
|
| 52 |
-
found_keywords.update(synonyms) # Add all synonyms
|
| 53 |
-
|
| 54 |
-
return '; '.join(sorted(found_keywords))
|
| 55 |
-
|
| 56 |
-
def highlight_keywords_in_text(text, keywords_list):
|
| 57 |
-
"""Create HTML with highlighted keywords"""
|
| 58 |
-
if not keywords_list:
|
| 59 |
-
return text
|
| 60 |
-
|
| 61 |
-
highlighted_text = text
|
| 62 |
-
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#F9CA24', '#6C5CE7', '#A0E7E5', '#FD79A8', '#55A3FF', '#00B894', '#E17055']
|
| 63 |
-
|
| 64 |
-
for i, keyword in enumerate(keywords_list):
|
| 65 |
-
if keyword:
|
| 66 |
-
color = colors[i % len(colors)]
|
| 67 |
-
pattern = r'\b' + re.escape(keyword) + r'\b'
|
| 68 |
-
replacement = f'<span style="background-color: {color}; padding: 2px 4px; border-radius: 3px; color: white; font-weight: bold;">{keyword}</span>'
|
| 69 |
-
highlighted_text = re.sub(pattern, replacement, highlighted_text, flags=re.IGNORECASE)
|
| 70 |
-
|
| 71 |
-
return highlighted_text
|
| 72 |
-
|
| 73 |
-
def process_text(input_text, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5):
|
| 74 |
-
"""Main processing function"""
|
| 75 |
-
if not input_text.strip():
|
| 76 |
-
return "Please enter some text to analyze", "", "No keywords found"
|
| 77 |
-
|
| 78 |
-
# Build keywords dictionary from separate inputs
|
| 79 |
-
primary_inputs = [primary1, primary2, primary3, primary4, primary5]
|
| 80 |
-
synonym_inputs = [synonyms1, synonyms2, synonyms3, synonyms4, synonyms5]
|
| 81 |
-
keywords_dict = build_keywords_dict(primary_inputs, synonym_inputs)
|
| 82 |
-
|
| 83 |
-
if not keywords_dict:
|
| 84 |
-
return "Please enter at least one primary keyword", "", "No keyword dictionary provided"
|
| 85 |
-
|
| 86 |
-
# Find keywords in the text
|
| 87 |
-
found_keywords_str = find_keywords(input_text, keywords_dict)
|
| 88 |
-
|
| 89 |
-
if not found_keywords_str:
|
| 90 |
-
return f"No keywords found in the text.\n\nKeyword dictionary loaded: {len(keywords_dict)} primary keywords", input_text, "No matches found"
|
| 91 |
-
|
| 92 |
-
# Create highlighted version
|
| 93 |
-
keywords_list = found_keywords_str.split('; ')
|
| 94 |
-
highlighted_html = highlight_keywords_in_text(input_text, keywords_list)
|
| 95 |
-
|
| 96 |
-
# Create results summary
|
| 97 |
-
results_summary = f"""
|
| 98 |
-
## Results Summary
|
| 99 |
-
|
| 100 |
-
**Keywords Found:** {len(keywords_list)}
|
| 101 |
-
**Matched Keywords:** {found_keywords_str}
|
| 102 |
-
|
| 103 |
-
**Keyword Dictionary Stats:**
|
| 104 |
-
- Primary keywords loaded: {len(keywords_dict)}
|
| 105 |
-
- Total searchable terms: {sum(len(synonyms) + 1 for synonyms in keywords_dict.values())}
|
| 106 |
-
|
| 107 |
-
**Copy this result to your spreadsheet:**
|
| 108 |
-
{found_keywords_str}
|
| 109 |
-
"""
|
| 110 |
-
|
| 111 |
-
return results_summary, highlighted_html, found_keywords_str
|
| 112 |
-
|
| 113 |
# Create the Gradio interface
|
| 114 |
def create_interface():
|
| 115 |
with gr.Blocks(title="Keyword Tagging Tool", theme=gr.themes.Soft()) as demo:
|
|
@@ -165,7 +53,8 @@ def create_interface():
|
|
| 165 |
|
| 166 |
with gr.Row():
|
| 167 |
find_btn = gr.Button("Find Keywords", variant="primary", size="lg")
|
| 168 |
-
|
|
|
|
| 169 |
|
| 170 |
with gr.Row():
|
| 171 |
results_output = gr.Markdown(label="Results Summary")
|
|
@@ -173,6 +62,9 @@ def create_interface():
|
|
| 173 |
with gr.Row():
|
| 174 |
highlighted_output = gr.HTML(label="Text with Highlighted Keywords")
|
| 175 |
|
|
|
|
|
|
|
|
|
|
| 176 |
with gr.Row():
|
| 177 |
copy_output = gr.Textbox(
|
| 178 |
label="Keywords for Spreadsheet (copy this text)",
|
|
@@ -180,47 +72,87 @@ def create_interface():
|
|
| 180 |
max_lines=5
|
| 181 |
)
|
| 182 |
|
| 183 |
-
# Examples section
|
| 184 |
gr.Markdown("### Examples")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
-
|
|
|
|
| 187 |
"During World War II, many prisoners of war were held in camps across Europe. The Geneva Convention established rules for POW treatment. American soldiers and British troops were among those captured.",
|
| 188 |
"Prisoner of War", "POW; POWs; prisoner of war",
|
| 189 |
-
"World War II", "WWII; Second World War",
|
| 190 |
"United States", "USA; US; America; American",
|
| 191 |
"", "", "", ""
|
| 192 |
]
|
| 193 |
|
| 194 |
-
|
| 195 |
"The University of Oxford is located in Oxford, England. Students from around the world study at this prestigious institution.",
|
| 196 |
"University", "university; institution; college",
|
| 197 |
"Oxford", "oxford",
|
| 198 |
-
"England", "england; English",
|
| 199 |
"Student", "student; students; pupils",
|
| 200 |
"", ""
|
| 201 |
]
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
# Button functions
|
| 210 |
find_btn.click(
|
| 211 |
fn=process_text,
|
| 212 |
inputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5],
|
| 213 |
-
outputs=[results_output, highlighted_output, copy_output]
|
| 214 |
)
|
| 215 |
|
| 216 |
-
|
| 217 |
-
|
|
|
|
|
|
|
| 218 |
|
| 219 |
-
|
| 220 |
-
fn=
|
| 221 |
outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5, results_output, highlighted_output, copy_output]
|
| 222 |
)
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
# Instructions
|
| 225 |
gr.Markdown("""
|
| 226 |
## Format Guide
|
|
@@ -254,8 +186,4 @@ def create_interface():
|
|
| 254 |
</div>
|
| 255 |
""")
|
| 256 |
|
| 257 |
-
return demo
|
| 258 |
-
|
| 259 |
-
if __name__ == "__main__":
|
| 260 |
-
demo = create_interface()
|
| 261 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Create the Gradio interface
|
| 2 |
def create_interface():
|
| 3 |
with gr.Blocks(title="Keyword Tagging Tool", theme=gr.themes.Soft()) as demo:
|
|
|
|
| 53 |
|
| 54 |
with gr.Row():
|
| 55 |
find_btn = gr.Button("Find Keywords", variant="primary", size="lg")
|
| 56 |
+
clear_dict_btn = gr.Button("Clear Dictionary", size="lg", variant="secondary")
|
| 57 |
+
clear_all_btn = gr.Button("Clear All", size="lg")
|
| 58 |
|
| 59 |
with gr.Row():
|
| 60 |
results_output = gr.Markdown(label="Results Summary")
|
|
|
|
| 62 |
with gr.Row():
|
| 63 |
highlighted_output = gr.HTML(label="Text with Highlighted Keywords")
|
| 64 |
|
| 65 |
+
with gr.Row():
|
| 66 |
+
results_table_output = gr.HTML(label="Detailed Results Table")
|
| 67 |
+
|
| 68 |
with gr.Row():
|
| 69 |
copy_output = gr.Textbox(
|
| 70 |
label="Keywords for Spreadsheet (copy this text)",
|
|
|
|
| 72 |
max_lines=5
|
| 73 |
)
|
| 74 |
|
| 75 |
+
# Examples section with improved layout
|
| 76 |
gr.Markdown("### Examples")
|
| 77 |
+
gr.Markdown("Click on an example to load it into the tool:")
|
| 78 |
+
|
| 79 |
+
# Example 1
|
| 80 |
+
with gr.Row():
|
| 81 |
+
with gr.Column(scale=3):
|
| 82 |
+
gr.Markdown("**Example 1: WWII & POW Text**")
|
| 83 |
+
gr.Markdown("*During World War II, many prisoners of war were held in camps across Europe...*")
|
| 84 |
+
with gr.Column(scale=1):
|
| 85 |
+
example1_btn = gr.Button("Load Example 1", variant="secondary", size="sm")
|
| 86 |
+
|
| 87 |
+
# Example 2
|
| 88 |
+
with gr.Row():
|
| 89 |
+
with gr.Column(scale=3):
|
| 90 |
+
gr.Markdown("**Example 2: University Text**")
|
| 91 |
+
gr.Markdown("*The University of Oxford is located in Oxford, England...*")
|
| 92 |
+
with gr.Column(scale=1):
|
| 93 |
+
example2_btn = gr.Button("Load Example 2", variant="secondary", size="sm")
|
| 94 |
|
| 95 |
+
# Define example data
|
| 96 |
+
example1_data = [
|
| 97 |
"During World War II, many prisoners of war were held in camps across Europe. The Geneva Convention established rules for POW treatment. American soldiers and British troops were among those captured.",
|
| 98 |
"Prisoner of War", "POW; POWs; prisoner of war",
|
| 99 |
+
"World War II", "WWII; Second World War",
|
| 100 |
"United States", "USA; US; America; American",
|
| 101 |
"", "", "", ""
|
| 102 |
]
|
| 103 |
|
| 104 |
+
example2_data = [
|
| 105 |
"The University of Oxford is located in Oxford, England. Students from around the world study at this prestigious institution.",
|
| 106 |
"University", "university; institution; college",
|
| 107 |
"Oxford", "oxford",
|
| 108 |
+
"England", "england; English",
|
| 109 |
"Student", "student; students; pupils",
|
| 110 |
"", ""
|
| 111 |
]
|
| 112 |
|
| 113 |
+
# Clear functions
|
| 114 |
+
def clear_dictionary_only():
|
| 115 |
+
"""Clear only the keyword dictionary fields"""
|
| 116 |
+
return "", "", "", "", "", "", "", "", "", ""
|
| 117 |
+
|
| 118 |
+
def clear_everything():
|
| 119 |
+
"""Clear all fields including text input"""
|
| 120 |
+
return "", "", "", "", "", "", "", "", "", "", "", "", "", ""
|
| 121 |
+
|
| 122 |
+
# Example loading functions
|
| 123 |
+
def load_example1():
|
| 124 |
+
return example1_data
|
| 125 |
+
|
| 126 |
+
def load_example2():
|
| 127 |
+
return example2_data
|
| 128 |
|
| 129 |
# Button functions
|
| 130 |
find_btn.click(
|
| 131 |
fn=process_text,
|
| 132 |
inputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5],
|
| 133 |
+
outputs=[results_output, highlighted_output, results_table_output, copy_output]
|
| 134 |
)
|
| 135 |
|
| 136 |
+
clear_dict_btn.click(
|
| 137 |
+
fn=clear_dictionary_only,
|
| 138 |
+
outputs=[primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5]
|
| 139 |
+
)
|
| 140 |
|
| 141 |
+
clear_all_btn.click(
|
| 142 |
+
fn=clear_everything,
|
| 143 |
outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5, results_output, highlighted_output, copy_output]
|
| 144 |
)
|
| 145 |
|
| 146 |
+
example1_btn.click(
|
| 147 |
+
fn=load_example1,
|
| 148 |
+
outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5]
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
example2_btn.click(
|
| 152 |
+
fn=load_example2,
|
| 153 |
+
outputs=[text_input, primary1, synonyms1, primary2, synonyms2, primary3, synonyms3, primary4, synonyms4, primary5, synonyms5]
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
# Instructions
|
| 157 |
gr.Markdown("""
|
| 158 |
## Format Guide
|
|
|
|
| 186 |
</div>
|
| 187 |
""")
|
| 188 |
|
| 189 |
+
return demo
|
|
|
|
|
|
|
|
|
|
|
|