mk1985 commited on
Commit
31914d5
·
verified ·
1 Parent(s): f3e456d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -29
app.py CHANGED
@@ -8,9 +8,10 @@ import anthropic
8
  import google.generativeai as genai
9
  import gradio as gr
10
  from gliner import GLiNER
11
- import traceback
12
- from collections import defaultdict, Counter # Import Counter for counting
13
  import re
 
14
 
15
  # 🧠 Supported models and their providers
16
  MODEL_OPTIONS = {
@@ -72,9 +73,9 @@ def generate_from_prompt(prompt, provider, key_dict):
72
  return ""
73
 
74
  TRADITIONAL_NER_LABELS = [
75
- "Person", "Organisation", "Country / City / State", "Location",
76
- "Nationality or Group", "Date", "Event", "Law / Legal Document",
77
- "Product", "Facility", "Work of Art", "Language", "Time", "Percentage",
78
  "Money / Currency", "Quantity / Measurement", "Ordinal Number", "Cardinal Number"
79
  ]
80
 
@@ -83,6 +84,22 @@ MAX_CATEGORIES = 8
83
  with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
84
  gr.Markdown("# Historical Text Analysis Tool")
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  gr.Markdown("## Step 1: Get Keyword Ideas")
87
  gr.Markdown("Start by entering a topic. The AI will populate a research framework with suggested categories and keywords to guide your analysis.")
88
  with gr.Row():
@@ -99,21 +116,26 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
99
  gr.Markdown("The AI's suggestions will appear below. Build your final list of keywords, then paste your text to find all the matches.")
100
 
101
  gr.Markdown("### 1. Review AI-Suggested Keywords")
102
- gr.Markdown("Click on a category to see its keywords. Uncheck any you do not want, or use the 'Deselect All' button for that category.")
103
 
104
- dynamic_components = []
105
  with gr.Column():
106
  for i in range(MAX_CATEGORIES):
107
  with gr.Accordion(f"Category {i+1}", visible=False) as acc:
108
  with gr.Row():
109
  cg = gr.CheckboxGroup(label="Keywords", interactive=True, container=False, scale=4)
 
 
110
  deselect_btn = gr.Button("Deselect All", size="sm", scale=1, min_width=80)
111
- dynamic_components.append((acc, cg, deselect_btn))
112
 
113
  gr.Markdown("### 2. Include Standard Keywords (Optional)")
114
  with gr.Group():
115
  ner_output = gr.CheckboxGroup(choices=TRADITIONAL_NER_LABELS, value=TRADITIONAL_NER_LABELS, label="Standard Search Terms", info="Common categories like people, places, and specific organizations.")
116
- deselect_ner_btn = gr.Button("Deselect All", size="sm")
 
 
 
117
 
118
  gr.Markdown("### 3. Add Your Own Keywords (Optional)")
119
  with gr.Group():
@@ -133,7 +155,6 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
133
  debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
134
 
135
  # --- Backend Functions ---
136
- import os # Make sure this import is at the top of your file
137
 
138
  def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
139
  # This function provides instant "working..." feedback
@@ -170,24 +191,25 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
170
  updates = {}
171
  categories = list(framework.items())
172
  for i in range(MAX_CATEGORIES):
173
- accordion_comp, checkbox_comp, button_comp = dynamic_components[i]
174
  if i < len(categories):
175
  category, entities = categories[i]
176
  sorted_entities = sorted(list(set(entities)))
177
  updates[accordion_comp] = gr.update(label=category, visible=True)
178
  updates[checkbox_comp] = gr.update(choices=sorted_entities, value=sorted_entities, visible=True)
179
- updates[button_comp] = gr.update(visible=True)
 
180
  else:
181
  updates[accordion_comp] = gr.update(visible=False)
182
  updates[checkbox_comp] = gr.update(visible=False)
183
- updates[button_comp] = gr.update(visible=False)
 
184
  updates[generate_btn] = gr.update(value="Suggest Categories and Keywords", interactive=True)
185
  yield updates
186
  except Exception as e:
187
  yield {generate_btn: gr.update(value="Suggest Categories and Keywords", interactive=True)}
188
  raise gr.Error(str(e))
189
 
190
- # --- THIS IS THE UPDATED FUNCTION ---
191
  def match_entities(text, ner_labels, custom_label_text, threshold, *selected_keywords):
192
  debug_info = []
193
  if gliner_model is None:
@@ -221,19 +243,16 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
221
 
222
  highlighted_entities = [{"start": ent["start"], "end": ent["end"], "entity": ent["label"]} for ent in unique_entities]
223
 
224
- # --- NEW LOGIC FOR AGGREGATED, TABLE-BASED RESULTS ---
225
- # 1. Count occurrences of each unique phrase (case-insensitively)
226
  aggregated_matches = defaultdict(Counter)
227
- original_casing_map = {} # To store the original casing of the first instance of a phrase
228
 
229
  for ent in unique_entities:
230
  match_text = text[ent['start']:ent['end']]
231
  match_text_lower = match_text.lower()
232
 
233
  aggregated_matches[ent['label']][match_text_lower] += 1
234
- original_casing_map.setdefault(match_text_lower, match_text) # Store original casing
235
 
236
- # 2. Build the new Markdown string with tables
237
  markdown_string = ""
238
  for label, counter in sorted(aggregated_matches.items()):
239
  total_matches = sum(counter.values())
@@ -242,7 +261,6 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
242
  markdown_string += "| Found Phrase | Occurrences |\n"
243
  markdown_string += "|--------------|-------------|\n"
244
 
245
- # Sort phrases by most frequent first
246
  for phrase_lower, count in counter.most_common():
247
  original_phrase = original_casing_map[phrase_lower]
248
  markdown_string += f"| {original_phrase} | {count} |\n"
@@ -254,22 +272,42 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
254
  return {"text": text, "entities": highlighted_entities}, markdown_string, "\n".join(debug_info)
255
 
256
  # --- Wire up UI events ---
257
- generate_btn.click(
258
- fn=handle_generate,
259
- inputs=[topic, provider, openai_key, anthropic_key, google_key],
260
- outputs=[generate_btn] + [comp for pair in dynamic_components for comp in pair]
261
- )
262
 
 
 
 
 
 
 
 
 
 
 
 
263
  def deselect_all():
264
  return gr.update(value=[])
 
 
 
 
 
 
 
 
 
265
  deselect_ner_btn.click(fn=deselect_all, inputs=None, outputs=[ner_output])
266
- for _, cg, btn in dynamic_components:
267
- btn.click(fn=deselect_all, inputs=None, outputs=[cg])
268
 
 
 
 
 
 
 
269
  match_btn.click(
270
  fn=match_entities,
271
- inputs=[text_input, ner_output, custom_labels, threshold_slider] + [cg for acc, cg, btn in dynamic_components],
272
- outputs=[matched_output, detailed_results_output, debug_output]
 
273
  )
274
 
275
  demo.launch(share=True, debug=True)
 
8
  import google.generativeai as genai
9
  import gradio as gr
10
  from gliner import GLiNER
11
+ import traceback
12
+ from collections import defaultdict, Counter
13
  import re
14
+ import os # Make sure this import is at the top of your file
15
 
16
  # 🧠 Supported models and their providers
17
  MODEL_OPTIONS = {
 
73
  return ""
74
 
75
  TRADITIONAL_NER_LABELS = [
76
+ "Person", "Organisation", "Country / City / State", "Location",
77
+ "Nationality or Group", "Date", "Event", "Law / Legal Document",
78
+ "Product", "Facility", "Work of Art", "Language", "Time", "Percentage",
79
  "Money / Currency", "Quantity / Measurement", "Ordinal Number", "Cardinal Number"
80
  ]
81
 
 
84
  with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
85
  gr.Markdown("# Historical Text Analysis Tool")
86
 
87
+ # --- NEW: Added introductory text ---
88
+ gr.Markdown(
89
+ """
90
+ **Welcome! This tool uses two different kinds of AI to help you quickly analyze documents.**
91
+
92
+ 1. **The "Creative Assistant" (Step 1: OpenAI, Anthropic, Google):**
93
+ When you enter a topic, this AI acts like a research assistant. It brainstorms and **suggests** useful categories and keywords for your analysis. It's the idea generator.
94
+
95
+ 2. **The "Expert Searcher" (Step 2: GLiNER):**
96
+ After you've chosen your keywords, this specialized AI meticulously **finds** every single match in the text you provide. It's a fast and precise search tool that runs locally.
97
+
98
+ **Pro Tip:** After the analysis, you can manually add or correct a label! In the "Highlighted Text" tab, just click on any word or phrase, type your new label, and press Enter.
99
+ """
100
+ )
101
+ gr.Markdown("---")
102
+
103
  gr.Markdown("## Step 1: Get Keyword Ideas")
104
  gr.Markdown("Start by entering a topic. The AI will populate a research framework with suggested categories and keywords to guide your analysis.")
105
  with gr.Row():
 
116
  gr.Markdown("The AI's suggestions will appear below. Build your final list of keywords, then paste your text to find all the matches.")
117
 
118
  gr.Markdown("### 1. Review AI-Suggested Keywords")
119
+ gr.Markdown("Click on a category to see its keywords. Use the buttons to select or deselect all keywords for that category.")
120
 
121
+ category_components = []
122
  with gr.Column():
123
  for i in range(MAX_CATEGORIES):
124
  with gr.Accordion(f"Category {i+1}", visible=False) as acc:
125
  with gr.Row():
126
  cg = gr.CheckboxGroup(label="Keywords", interactive=True, container=False, scale=4)
127
+ # --- NEW: Added Select All button for categories ---
128
+ select_btn = gr.Button("Select All", size="sm", scale=1, min_width=80)
129
  deselect_btn = gr.Button("Deselect All", size="sm", scale=1, min_width=80)
130
+ category_components.append((acc, cg, select_btn, deselect_btn))
131
 
132
  gr.Markdown("### 2. Include Standard Keywords (Optional)")
133
  with gr.Group():
134
  ner_output = gr.CheckboxGroup(choices=TRADITIONAL_NER_LABELS, value=TRADITIONAL_NER_LABELS, label="Standard Search Terms", info="Common categories like people, places, and specific organizations.")
135
+ # --- NEW: Added Select All button for standard keywords ---
136
+ with gr.Row():
137
+ select_ner_btn = gr.Button("Select All", size="sm")
138
+ deselect_ner_btn = gr.Button("Deselect All", size="sm")
139
 
140
  gr.Markdown("### 3. Add Your Own Keywords (Optional)")
141
  with gr.Group():
 
155
  debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
156
 
157
  # --- Backend Functions ---
 
158
 
159
  def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
160
  # This function provides instant "working..." feedback
 
191
  updates = {}
192
  categories = list(framework.items())
193
  for i in range(MAX_CATEGORIES):
194
+ accordion_comp, checkbox_comp, sel_btn, desel_btn = category_components[i]
195
  if i < len(categories):
196
  category, entities = categories[i]
197
  sorted_entities = sorted(list(set(entities)))
198
  updates[accordion_comp] = gr.update(label=category, visible=True)
199
  updates[checkbox_comp] = gr.update(choices=sorted_entities, value=sorted_entities, visible=True)
200
+ updates[sel_btn] = gr.update(visible=True)
201
+ updates[desel_btn] = gr.update(visible=True)
202
  else:
203
  updates[accordion_comp] = gr.update(visible=False)
204
  updates[checkbox_comp] = gr.update(visible=False)
205
+ updates[sel_btn] = gr.update(visible=False)
206
+ updates[desel_btn] = gr.update(visible=False)
207
  updates[generate_btn] = gr.update(value="Suggest Categories and Keywords", interactive=True)
208
  yield updates
209
  except Exception as e:
210
  yield {generate_btn: gr.update(value="Suggest Categories and Keywords", interactive=True)}
211
  raise gr.Error(str(e))
212
 
 
213
  def match_entities(text, ner_labels, custom_label_text, threshold, *selected_keywords):
214
  debug_info = []
215
  if gliner_model is None:
 
243
 
244
  highlighted_entities = [{"start": ent["start"], "end": ent["end"], "entity": ent["label"]} for ent in unique_entities]
245
 
 
 
246
  aggregated_matches = defaultdict(Counter)
247
+ original_casing_map = {}
248
 
249
  for ent in unique_entities:
250
  match_text = text[ent['start']:ent['end']]
251
  match_text_lower = match_text.lower()
252
 
253
  aggregated_matches[ent['label']][match_text_lower] += 1
254
+ original_casing_map.setdefault(match_text_lower, match_text)
255
 
 
256
  markdown_string = ""
257
  for label, counter in sorted(aggregated_matches.items()):
258
  total_matches = sum(counter.values())
 
261
  markdown_string += "| Found Phrase | Occurrences |\n"
262
  markdown_string += "|--------------|-------------|\n"
263
 
 
264
  for phrase_lower, count in counter.most_common():
265
  original_phrase = original_casing_map[phrase_lower]
266
  markdown_string += f"| {original_phrase} | {count} |\n"
 
272
  return {"text": text, "entities": highlighted_entities}, markdown_string, "\n".join(debug_info)
273
 
274
  # --- Wire up UI events ---
 
 
 
 
 
275
 
276
+ # NEW: Handle "Enter" key press on the topic textbox and show progress bar
277
+ submit_event_args = {
278
+ "fn": handle_generate,
279
+ "inputs": [topic, provider, openai_key, anthropic_key, google_key],
280
+ "outputs": [generate_btn] + [comp for pair in category_components for comp in pair],
281
+ "show_progress": "full"
282
+ }
283
+ generate_btn.click(**submit_event_args)
284
+ topic.submit(**submit_event_args)
285
+
286
+ # --- NEW: Helper functions for select/deselect ---
287
  def deselect_all():
288
  return gr.update(value=[])
289
+
290
+ def select_all_ner():
291
+ return gr.update(value=TRADITIONAL_NER_LABELS)
292
+
293
+ def select_all_from_group(checkbox_group_state):
294
+ return gr.update(value=checkbox_group_state.choices)
295
+
296
+ # --- NEW: Wire up select/deselect for standard keywords ---
297
+ select_ner_btn.click(fn=select_all_ner, inputs=None, outputs=[ner_output])
298
  deselect_ner_btn.click(fn=deselect_all, inputs=None, outputs=[ner_output])
 
 
299
 
300
+ # --- UPDATED: Wire up select/deselect for dynamic categories ---
301
+ for acc, cg, select_btn, deselect_btn in category_components:
302
+ select_btn.click(fn=select_all_from_group, inputs=[cg], outputs=[cg])
303
+ deselect_btn.click(fn=deselect_all, inputs=None, outputs=[cg])
304
+
305
+ # NEW: Show progress bar for the matching process
306
  match_btn.click(
307
  fn=match_entities,
308
+ inputs=[text_input, ner_output, custom_labels, threshold_slider] + [cg for acc, cg, sel, desel in category_components],
309
+ outputs=[matched_output, detailed_results_output, debug_output],
310
+ show_progress="full"
311
  )
312
 
313
  demo.launch(share=True, debug=True)