Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# 📚 Install dependencies
|
| 2 |
# Make sure to run this in your environment if you haven't already
|
| 3 |
-
# !pip install openai anthropic google-generativeai gradio transformers torch gliner numpy --quiet
|
| 4 |
|
| 5 |
# ⚙️ Imports
|
| 6 |
import openai
|
|
@@ -10,7 +10,9 @@ import gradio as gr
|
|
| 10 |
from gliner import GLiNER
|
| 11 |
from collections import defaultdict
|
| 12 |
import numpy as np
|
|
|
|
| 13 |
import os
|
|
|
|
| 14 |
|
| 15 |
# 🧠 Supported models and their providers
|
| 16 |
MODEL_OPTIONS = {
|
|
@@ -34,15 +36,12 @@ except Exception as e:
|
|
| 34 |
# 🧠 Prompt for the Conceptual AI to generate a research framework
|
| 35 |
FRAMEWORK_PROMPT_TEMPLATE = """
|
| 36 |
You are an expert research assistant specializing in history. For the provided topic: **"{topic}"**, your task is to generate a conceptual research framework.
|
| 37 |
-
|
| 38 |
**Instructions:**
|
| 39 |
1. Identify 4-6 high-level **Conceptual Categories** relevant to analyzing this historical topic (e.g., 'Key Figures', 'Core Ideologies', 'Significant Events').
|
| 40 |
2. For each category, list specific, searchable **Labels** that would appear in a primary or secondary source document.
|
| 41 |
3. **Crucial Rule for Labels:** Use concise, singular, and fundamental terms (e.g., use `Treaty` not `Diplomatic Treaties`). Use Title Case (e.g. `Working Class`).
|
| 42 |
-
|
| 43 |
**Output Format:**
|
| 44 |
Use Markdown. Each category must be a Level 3 Header (###), followed by a comma-separated list of its labels.
|
| 45 |
-
|
| 46 |
### Example Category: Political Actions
|
| 47 |
- Petition, Charter, Protest, Rally, Legislation
|
| 48 |
### Example Category: Social Groups
|
|
@@ -51,6 +50,7 @@ Use Markdown. Each category must be a Level 3 Header (###), followed by a comma-
|
|
| 51 |
|
| 52 |
# 🧠 Generator Function (The "Conceptual AI")
|
| 53 |
def generate_from_prompt(prompt, provider, key_dict):
|
|
|
|
| 54 |
provider_id = MODEL_OPTIONS.get(provider)
|
| 55 |
api_key = key_dict.get(f"{provider_id}_key")
|
| 56 |
if not api_key:
|
|
@@ -73,7 +73,6 @@ def generate_from_prompt(prompt, provider, key_dict):
|
|
| 73 |
|
| 74 |
# --- UI Definitions ---
|
| 75 |
|
| 76 |
-
# REFORMATTED: No underscores, uses Title Case
|
| 77 |
STANDARD_LABELS = [
|
| 78 |
"Person", "Organization", "Location", "Country", "City", "State",
|
| 79 |
"Nationality", "Group", "Date", "Event", "Law", "Legal Document",
|
|
@@ -84,7 +83,11 @@ STANDARD_LABELS = [
|
|
| 84 |
MAX_CATEGORIES = 8
|
| 85 |
|
| 86 |
with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
|
|
|
|
|
|
|
|
|
|
| 87 |
gr.Markdown("# Historical Text Analysis Tool")
|
|
|
|
| 88 |
gr.Markdown(
|
| 89 |
"""
|
| 90 |
This tool uses two forms of AI to accelerate historical research. First, a **Conceptual AI** generates a research framework with relevant search terms for your topic. Second, an **Extraction AI** scans your source text to find and highlight those terms with high precision.
|
|
@@ -142,7 +145,6 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 142 |
gr.Markdown("--- \n## Step 3: Run Analysis")
|
| 143 |
threshold_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Confidence Threshold", info="Controls the strictness of the Extraction AI. Lower values find more potential matches. Higher values return fewer, more precise matches.")
|
| 144 |
text_input = gr.Textbox(label="Paste Your Source Text Here for Analysis", lines=15, placeholder="Paste a historical document, an article, or a chapter...")
|
| 145 |
-
# UPDATED BUTTON TEXT
|
| 146 |
analyze_btn = gr.Button("Find Entities", variant="primary")
|
| 147 |
|
| 148 |
analysis_status = gr.Markdown(visible=False)
|
|
@@ -159,13 +161,28 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 159 |
with gr.TabItem("Highlighted Text"):
|
| 160 |
highlighted_text_output = gr.HighlightedText(label="Found Entities", interactive=True)
|
| 161 |
with gr.TabItem("Detailed Results"):
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
with gr.TabItem("Debug Log"):
|
| 164 |
debug_output = gr.Textbox(label="Extraction Process Log", interactive=False, lines=8)
|
| 165 |
|
| 166 |
# --- Backend Functions ---
|
| 167 |
|
| 168 |
def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
|
|
|
|
| 169 |
yield {generate_btn: gr.update(value="Generating...", interactive=False)}
|
| 170 |
|
| 171 |
try:
|
|
@@ -214,24 +231,22 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 214 |
raise gr.Error(str(e))
|
| 215 |
|
| 216 |
def analyze_text(text, standard_labels, custom_label_text, threshold, *suggested_labels_from_groups):
|
| 217 |
-
# UPDATED PROGRESS MESSAGE
|
| 218 |
yield {
|
| 219 |
analyze_btn: gr.update(value="Finding Entities...", interactive=False),
|
| 220 |
-
analysis_status: gr.update(value="The Extraction AI is scanning your text.
|
| 221 |
-
highlighted_text_output: None, detailed_results_output: None, debug_output: "Starting analysis..."
|
|
|
|
| 222 |
}
|
| 223 |
|
|
|
|
| 224 |
debug_info = []
|
| 225 |
-
if gliner_model is None:
|
| 226 |
-
raise gr.Error("Extraction AI (GLiNER model) is not loaded. Cannot analyze text. Please check logs and restart.")
|
| 227 |
-
|
| 228 |
labels_to_use = set()
|
| 229 |
for group in suggested_labels_from_groups:
|
| 230 |
if group: labels_to_use.update(group)
|
| 231 |
if standard_labels: labels_to_use.update(standard_labels)
|
| 232 |
custom = {l.strip() for l in custom_label_text.split(',') if l.strip()}
|
| 233 |
if custom: labels_to_use.update(custom)
|
| 234 |
-
|
| 235 |
final_labels = sorted(list(labels_to_use))
|
| 236 |
debug_info.append(f"Searching for {len(final_labels)} unique labels.")
|
| 237 |
debug_info.append(f"Confidence Threshold set to: {threshold}")
|
|
@@ -241,11 +256,13 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 241 |
analyze_btn: gr.update(value="Find Entities", interactive=True),
|
| 242 |
analysis_status: gr.update(visible=False),
|
| 243 |
highlighted_text_output: {"text": text, "entities": []},
|
| 244 |
-
detailed_results_output:
|
|
|
|
| 245 |
debug_output: "Analysis stopped: No text or no labels provided."
|
| 246 |
}
|
| 247 |
return
|
| 248 |
|
|
|
|
| 249 |
all_entities = []
|
| 250 |
chunk_size, overlap = 1024, 100
|
| 251 |
for i in range(0, len(text), chunk_size - overlap):
|
|
@@ -254,14 +271,10 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 254 |
for ent in chunk_entities:
|
| 255 |
ent['start'] += i; ent['end'] += i
|
| 256 |
all_entities.append(ent)
|
| 257 |
-
|
| 258 |
unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
|
| 259 |
debug_info.append(f"Found {len(unique_entities)} raw entity mentions.")
|
| 260 |
|
| 261 |
-
highlighted_output_data = {
|
| 262 |
-
"text": text,
|
| 263 |
-
"entities": [{"start": ent["start"], "end": ent["end"], "entity": ent["label"]} for ent in unique_entities]
|
| 264 |
-
}
|
| 265 |
|
| 266 |
aggregated_matches = defaultdict(lambda: {'count': 0, 'scores': [], 'original_casing': ''})
|
| 267 |
for ent in unique_entities:
|
|
@@ -269,44 +282,46 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 269 |
key = (ent['label'], match_text.lower())
|
| 270 |
aggregated_matches[key]['count'] += 1
|
| 271 |
aggregated_matches[key]['scores'].append(ent['score'])
|
| 272 |
-
if not aggregated_matches[key]['original_casing']:
|
| 273 |
-
aggregated_matches[key]['original_casing'] = match_text
|
| 274 |
|
| 275 |
-
# --- NEW LOGIC FOR
|
| 276 |
table_rows = []
|
| 277 |
for (label, _), data in aggregated_matches.items():
|
| 278 |
avg_score = np.mean(data['scores'])
|
| 279 |
table_rows.append({
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
})
|
| 285 |
|
| 286 |
-
#
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
markdown_string = "No entities found. Consider lowering the confidence threshold or refining your labels."
|
| 292 |
-
else:
|
| 293 |
-
# Build the Markdown table string
|
| 294 |
-
markdown_string += "| Label | Text Found | Instances | Avg. Confidence Score* |\n"
|
| 295 |
-
markdown_string += "|-------|------------|-----------|--------------------------|\n"
|
| 296 |
-
for row in table_rows:
|
| 297 |
-
markdown_string += f"| {row['label']} | {row['text']} | {row['count']} | {row['avg_score']:.2f} |\n"
|
| 298 |
-
|
| 299 |
-
markdown_string += "\n---\n<small><i>*<b>Confidence Score:</b> How sure the Extraction AI is that it found the correct label (1.00 = 100% certain). The score is an average across all instances of that text.</i></small>"
|
| 300 |
-
|
| 301 |
debug_info.append("Analysis complete.")
|
| 302 |
|
| 303 |
yield {
|
| 304 |
analyze_btn: gr.update(value="Find Entities", interactive=True),
|
| 305 |
analysis_status: gr.update(visible=False),
|
| 306 |
highlighted_text_output: highlighted_output_data,
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
| 308 |
debug_output: "\n".join(debug_info)
|
| 309 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
# --- Wire up UI events ---
|
| 312 |
generate_btn.click(
|
|
@@ -315,24 +330,28 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 315 |
outputs=[generate_btn] + [comp for pair in dynamic_components for comp in pair]
|
| 316 |
)
|
| 317 |
|
| 318 |
-
def deselect_all():
|
| 319 |
-
|
| 320 |
-
def select_all(choices):
|
| 321 |
-
return gr.update(value=choices)
|
| 322 |
|
| 323 |
deselect_all_std_btn.click(fn=deselect_all, inputs=None, outputs=[standard_labels_checkbox])
|
| 324 |
select_all_std_btn.click(lambda: select_all(STANDARD_LABELS), inputs=None, outputs=[standard_labels_checkbox])
|
| 325 |
|
| 326 |
-
# Wire up the dynamic select/deselect buttons
|
| 327 |
for _, cg, sel_btn, desel_btn in dynamic_components:
|
| 328 |
-
# BUG FIX: Use a lambda to capture the component `cg` itself, allowing `cg.choices` to provide the full list of options.
|
| 329 |
sel_btn.click(fn=lambda c=cg: gr.update(value=c.choices), inputs=None, outputs=[cg])
|
| 330 |
desel_btn.click(fn=deselect_all, inputs=None, outputs=[cg])
|
| 331 |
|
| 332 |
analyze_btn.click(
|
| 333 |
fn=analyze_text,
|
| 334 |
inputs=[text_input, standard_labels_checkbox, custom_labels_textbox, threshold_slider] + [cg for acc, cg, sel, desel in dynamic_components],
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
)
|
| 337 |
|
| 338 |
demo.launch(share=True, debug=True)
|
|
|
|
| 1 |
# 📚 Install dependencies
|
| 2 |
# Make sure to run this in your environment if you haven't already
|
| 3 |
+
# !pip install openai anthropic google-generativeai gradio transformers torch gliner numpy pandas --quiet
|
| 4 |
|
| 5 |
# ⚙️ Imports
|
| 6 |
import openai
|
|
|
|
| 10 |
from gliner import GLiNER
|
| 11 |
from collections import defaultdict
|
| 12 |
import numpy as np
|
| 13 |
+
import pandas as pd # Import pandas for DataFrame
|
| 14 |
import os
|
| 15 |
+
import tempfile # For creating temporary CSV files
|
| 16 |
|
| 17 |
# 🧠 Supported models and their providers
|
| 18 |
MODEL_OPTIONS = {
|
|
|
|
| 36 |
# 🧠 Prompt for the Conceptual AI to generate a research framework
|
| 37 |
FRAMEWORK_PROMPT_TEMPLATE = """
|
| 38 |
You are an expert research assistant specializing in history. For the provided topic: **"{topic}"**, your task is to generate a conceptual research framework.
|
|
|
|
| 39 |
**Instructions:**
|
| 40 |
1. Identify 4-6 high-level **Conceptual Categories** relevant to analyzing this historical topic (e.g., 'Key Figures', 'Core Ideologies', 'Significant Events').
|
| 41 |
2. For each category, list specific, searchable **Labels** that would appear in a primary or secondary source document.
|
| 42 |
3. **Crucial Rule for Labels:** Use concise, singular, and fundamental terms (e.g., use `Treaty` not `Diplomatic Treaties`). Use Title Case (e.g. `Working Class`).
|
|
|
|
| 43 |
**Output Format:**
|
| 44 |
Use Markdown. Each category must be a Level 3 Header (###), followed by a comma-separated list of its labels.
|
|
|
|
| 45 |
### Example Category: Political Actions
|
| 46 |
- Petition, Charter, Protest, Rally, Legislation
|
| 47 |
### Example Category: Social Groups
|
|
|
|
| 50 |
|
| 51 |
# 🧠 Generator Function (The "Conceptual AI")
|
| 52 |
def generate_from_prompt(prompt, provider, key_dict):
|
| 53 |
+
# (This function remains unchanged)
|
| 54 |
provider_id = MODEL_OPTIONS.get(provider)
|
| 55 |
api_key = key_dict.get(f"{provider_id}_key")
|
| 56 |
if not api_key:
|
|
|
|
| 73 |
|
| 74 |
# --- UI Definitions ---
|
| 75 |
|
|
|
|
| 76 |
STANDARD_LABELS = [
|
| 77 |
"Person", "Organization", "Location", "Country", "City", "State",
|
| 78 |
"Nationality", "Group", "Date", "Event", "Law", "Legal Document",
|
|
|
|
| 83 |
MAX_CATEGORIES = 8
|
| 84 |
|
| 85 |
with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
|
| 86 |
+
# Invisible component to store the results DataFrame for later use (like exporting)
|
| 87 |
+
results_state = gr.State()
|
| 88 |
+
|
| 89 |
gr.Markdown("# Historical Text Analysis Tool")
|
| 90 |
+
# ... (Introduction and Step 1-3 UI remains the same)
|
| 91 |
gr.Markdown(
|
| 92 |
"""
|
| 93 |
This tool uses two forms of AI to accelerate historical research. First, a **Conceptual AI** generates a research framework with relevant search terms for your topic. Second, an **Extraction AI** scans your source text to find and highlight those terms with high precision.
|
|
|
|
| 145 |
gr.Markdown("--- \n## Step 3: Run Analysis")
|
| 146 |
threshold_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Confidence Threshold", info="Controls the strictness of the Extraction AI. Lower values find more potential matches. Higher values return fewer, more precise matches.")
|
| 147 |
text_input = gr.Textbox(label="Paste Your Source Text Here for Analysis", lines=15, placeholder="Paste a historical document, an article, or a chapter...")
|
|
|
|
| 148 |
analyze_btn = gr.Button("Find Entities", variant="primary")
|
| 149 |
|
| 150 |
analysis_status = gr.Markdown(visible=False)
|
|
|
|
| 161 |
with gr.TabItem("Highlighted Text"):
|
| 162 |
highlighted_text_output = gr.HighlightedText(label="Found Entities", interactive=True)
|
| 163 |
with gr.TabItem("Detailed Results"):
|
| 164 |
+
# NEW: Helpful text about copy/pasting and exporting
|
| 165 |
+
gr.Markdown("You can sort the table by clicking on column headers or filter by typing in the search box below. Use the button to export the full table to a CSV file.")
|
| 166 |
+
with gr.Row():
|
| 167 |
+
export_btn = gr.Button("Export Results to CSV")
|
| 168 |
+
|
| 169 |
+
# NEW: Switched to gr.DataFrame for interactive results
|
| 170 |
+
detailed_results_output = gr.DataFrame(
|
| 171 |
+
headers=["Label", "Text Found", "Instances", "Confidence Score"],
|
| 172 |
+
datatype=["str", "str", "number", "number"],
|
| 173 |
+
label="Aggregated List of Found Entities"
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# NEW: File output component for the download link
|
| 177 |
+
csv_file_output = gr.File(label="Download CSV", visible=False)
|
| 178 |
+
|
| 179 |
with gr.TabItem("Debug Log"):
|
| 180 |
debug_output = gr.Textbox(label="Extraction Process Log", interactive=False, lines=8)
|
| 181 |
|
| 182 |
# --- Backend Functions ---
|
| 183 |
|
| 184 |
def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
|
| 185 |
+
# (This function remains unchanged)
|
| 186 |
yield {generate_btn: gr.update(value="Generating...", interactive=False)}
|
| 187 |
|
| 188 |
try:
|
|
|
|
| 231 |
raise gr.Error(str(e))
|
| 232 |
|
| 233 |
def analyze_text(text, standard_labels, custom_label_text, threshold, *suggested_labels_from_groups):
|
|
|
|
| 234 |
yield {
|
| 235 |
analyze_btn: gr.update(value="Finding Entities...", interactive=False),
|
| 236 |
+
analysis_status: gr.update(value="The Extraction AI is scanning your text...", visible=True),
|
| 237 |
+
highlighted_text_output: None, detailed_results_output: None, debug_output: "Starting analysis...",
|
| 238 |
+
csv_file_output: gr.update(visible=False) # Hide old CSV link
|
| 239 |
}
|
| 240 |
|
| 241 |
+
# ... (Label collection logic is the same)
|
| 242 |
debug_info = []
|
| 243 |
+
if gliner_model is None: raise gr.Error("Extraction AI (GLiNER model) is not loaded.")
|
|
|
|
|
|
|
| 244 |
labels_to_use = set()
|
| 245 |
for group in suggested_labels_from_groups:
|
| 246 |
if group: labels_to_use.update(group)
|
| 247 |
if standard_labels: labels_to_use.update(standard_labels)
|
| 248 |
custom = {l.strip() for l in custom_label_text.split(',') if l.strip()}
|
| 249 |
if custom: labels_to_use.update(custom)
|
|
|
|
| 250 |
final_labels = sorted(list(labels_to_use))
|
| 251 |
debug_info.append(f"Searching for {len(final_labels)} unique labels.")
|
| 252 |
debug_info.append(f"Confidence Threshold set to: {threshold}")
|
|
|
|
| 256 |
analyze_btn: gr.update(value="Find Entities", interactive=True),
|
| 257 |
analysis_status: gr.update(visible=False),
|
| 258 |
highlighted_text_output: {"text": text, "entities": []},
|
| 259 |
+
detailed_results_output: None,
|
| 260 |
+
results_state: None, # Clear state
|
| 261 |
debug_output: "Analysis stopped: No text or no labels provided."
|
| 262 |
}
|
| 263 |
return
|
| 264 |
|
| 265 |
+
# ... (GLiNER prediction logic is the same)
|
| 266 |
all_entities = []
|
| 267 |
chunk_size, overlap = 1024, 100
|
| 268 |
for i in range(0, len(text), chunk_size - overlap):
|
|
|
|
| 271 |
for ent in chunk_entities:
|
| 272 |
ent['start'] += i; ent['end'] += i
|
| 273 |
all_entities.append(ent)
|
|
|
|
| 274 |
unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
|
| 275 |
debug_info.append(f"Found {len(unique_entities)} raw entity mentions.")
|
| 276 |
|
| 277 |
+
highlighted_output_data = {"text": text, "entities": [{"start": ent["start"], "end": ent["end"], "entity": ent["label"]} for ent in unique_entities]}
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
aggregated_matches = defaultdict(lambda: {'count': 0, 'scores': [], 'original_casing': ''})
|
| 280 |
for ent in unique_entities:
|
|
|
|
| 282 |
key = (ent['label'], match_text.lower())
|
| 283 |
aggregated_matches[key]['count'] += 1
|
| 284 |
aggregated_matches[key]['scores'].append(ent['score'])
|
| 285 |
+
if not aggregated_matches[key]['original_casing']: aggregated_matches[key]['original_casing'] = match_text
|
|
|
|
| 286 |
|
| 287 |
+
# --- NEW LOGIC FOR PANDAS DATAFRAME ---
|
| 288 |
table_rows = []
|
| 289 |
for (label, _), data in aggregated_matches.items():
|
| 290 |
avg_score = np.mean(data['scores'])
|
| 291 |
table_rows.append({
|
| 292 |
+
"Label": label,
|
| 293 |
+
"Text Found": data['original_casing'],
|
| 294 |
+
"Instances": data['count'],
|
| 295 |
+
"Confidence Score": round(avg_score, 2)
|
| 296 |
})
|
| 297 |
|
| 298 |
+
# Create DataFrame and sort it
|
| 299 |
+
results_df = pd.DataFrame(table_rows)
|
| 300 |
+
if not results_df.empty:
|
| 301 |
+
results_df = results_df.sort_values(by=["Label", "Instances"], ascending=[True, False])
|
| 302 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
debug_info.append("Analysis complete.")
|
| 304 |
|
| 305 |
yield {
|
| 306 |
analyze_btn: gr.update(value="Find Entities", interactive=True),
|
| 307 |
analysis_status: gr.update(visible=False),
|
| 308 |
highlighted_text_output: highlighted_output_data,
|
| 309 |
+
# Output the DataFrame to the gr.DataFrame component
|
| 310 |
+
detailed_results_output: results_df,
|
| 311 |
+
# Store the DataFrame in the invisible gr.State component
|
| 312 |
+
results_state: results_df,
|
| 313 |
debug_output: "\n".join(debug_info)
|
| 314 |
}
|
| 315 |
+
|
| 316 |
+
# --- NEW FUNCTION TO HANDLE CSV EXPORT ---
|
| 317 |
+
def export_to_csv(df):
|
| 318 |
+
if df is None or df.empty:
|
| 319 |
+
gr.Info("No data to export. Please run 'Find Entities' first.")
|
| 320 |
+
return None # Return None to keep the file component hidden
|
| 321 |
+
|
| 322 |
+
with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv', encoding='utf-8') as tmpfile:
|
| 323 |
+
df.to_csv(tmpfile.name, index=False)
|
| 324 |
+
return gr.update(value=tmpfile.name, visible=True)
|
| 325 |
|
| 326 |
# --- Wire up UI events ---
|
| 327 |
generate_btn.click(
|
|
|
|
| 330 |
outputs=[generate_btn] + [comp for pair in dynamic_components for comp in pair]
|
| 331 |
)
|
| 332 |
|
| 333 |
+
def deselect_all(): return gr.update(value=[])
|
| 334 |
+
def select_all(choices): return gr.update(value=choices)
|
|
|
|
|
|
|
| 335 |
|
| 336 |
deselect_all_std_btn.click(fn=deselect_all, inputs=None, outputs=[standard_labels_checkbox])
|
| 337 |
select_all_std_btn.click(lambda: select_all(STANDARD_LABELS), inputs=None, outputs=[standard_labels_checkbox])
|
| 338 |
|
|
|
|
| 339 |
for _, cg, sel_btn, desel_btn in dynamic_components:
|
|
|
|
| 340 |
sel_btn.click(fn=lambda c=cg: gr.update(value=c.choices), inputs=None, outputs=[cg])
|
| 341 |
desel_btn.click(fn=deselect_all, inputs=None, outputs=[cg])
|
| 342 |
|
| 343 |
analyze_btn.click(
|
| 344 |
fn=analyze_text,
|
| 345 |
inputs=[text_input, standard_labels_checkbox, custom_labels_textbox, threshold_slider] + [cg for acc, cg, sel, desel in dynamic_components],
|
| 346 |
+
# Add results_state to the outputs list
|
| 347 |
+
outputs=[analyze_btn, analysis_status, highlighted_text_output, detailed_results_output, results_state, debug_output, csv_file_output]
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
# Wire up the new export button
|
| 351 |
+
export_btn.click(
|
| 352 |
+
fn=export_to_csv,
|
| 353 |
+
inputs=[results_state],
|
| 354 |
+
outputs=[csv_file_output]
|
| 355 |
)
|
| 356 |
|
| 357 |
demo.launch(share=True, debug=True)
|