Spaces:

mk1985
/

Historical-Text-Analyser

Sleeping

App Files Files Community

mk1985 commited on Jul 23, 2025

Commit

5f5b923

verified ·

1 Parent(s): e9738aa

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -130

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # 📚 Install dependencies
 # Make sure to run this in your environment if you haven't already
-# !pip install openai anthropic google-generativeai gradio transformers torch gliner --quiet
 # ⚙️ Imports
 import openai
@@ -8,9 +8,8 @@ import anthropic
 import google.generativeai as genai
 import gradio as gr
 from gliner import GLiNER
-import traceback
 from collections import defaultdict, Counter
-import numpy as np # For calculating average score
 import os
 # 🧠 Supported models and their providers
@@ -25,32 +24,32 @@ GLINER_MODEL_NAME = "urchade/gliner_large-v2.1"
 # --- Load the model only once at startup ---
 try:
-    print("Loading AI Detective (GLiNER model)... This may take a moment.")
     gliner_model = GLiNER.from_pretrained(GLINER_MODEL_NAME)
-    print("AI Detective loaded successfully.")
 except Exception as e:
     print(f"FATAL ERROR: Could not load GLiNER model. The app will not be able to find entities. Error: {e}")
     gliner_model = None
-# 🧠 Prompt for the Creative AI to generate label ideas
-HIERARCHICAL_PROMPT_TEMPLATE = """
-You are a helpful research assistant. For the historical topic: **"{topic}"**, your job is to suggest a research framework.
 **Instructions:**
-1.  First, think of 4-6 **Conceptual Categories** that are useful for analyzing this topic (e.g., 'Forms of Protest', 'Key Demands'). These will become the labels.
-2.  For each category, list specific **Examples** someone could search for in a text.
-3.  **Crucial Rule for Labels:** Use the most basic, fundamental form (e.g., `Petition`, not `Political Petition`).
 **Output Format:**
-Use Markdown. Each category must be a Level 3 Header (###), followed by a comma-separated list of its examples.
-### Example Category 1
-- Example A, Example B, Example C
-### Example Category 2
-- Example D, Example E
 """
-# 🧠 Generator Function (The "Creative Brain")
 def generate_from_prompt(prompt, provider, key_dict):
     provider_id = MODEL_OPTIONS.get(provider)
     api_key = key_dict.get(f"{provider_id}_key")
@@ -74,7 +73,6 @@ def generate_from_prompt(prompt, provider, key_dict):
 # --- UI Definitions ---
-# A list of standard, common labels the user can always choose from
 STANDARD_LABELS = [
     "PERSON", "ORGANIZATION", "LOCATION", "COUNTRY", "CITY", "STATE",
     "NATIONALITY", "GROUP", "DATE", "EVENT", "LAW", "LEGAL_DOCUMENT",
@@ -82,81 +80,76 @@ STANDARD_LABELS = [
     "MONEY", "CURRENCY", "QUANTITY", "ORDINAL_NUMBER", "CARDINAL_NUMBER"
 ]
-MAX_CATEGORIES = 8 # The maximum number of AI-suggested categories to show
-with gr.Blocks(title="Smart Text Analyzer", css=".prose { word-break: break-word; }") as demo:
-    gr.Markdown("# Smart Text Analyzer")
     gr.Markdown(
         """
-        Welcome! Paste your text below to automatically find and highlight key information. It's like having two smart assistants read your document for you.
-        ### How It Works: Two Brains are Better Than One!
-        We use two different types of AI to give you the best results.
-        🧠 **1. The Creative Brain (Generative AI - like GPT)**
-        This AI is a brainstormer. It reads your topic to understand the context, then *imagines* and *suggests* useful labels that fit your document. It helps you discover what to look for!
-        🕵️ **2. The Detective (Extractive AI - GLiNER)**
-        This AI is a precise detective. Once you give it a list of labels, it meticulously scans the text and *pulls out* (extracts) the exact words that match. It's fantastic at finding specific information with high accuracy.
         """
     )
-    gr.Markdown("--- \n## Step 1: Get Label Ideas from the Creative AI")
     with gr.Row():
-        topic = gr.Textbox(label="Enter a Topic", placeholder="e.g., The Chartist Movement, The Protestant Reformation")
-        provider = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose Creative AI Model")
     with gr.Row():
         openai_key = gr.Textbox(label="OpenAI API Key", type="password")
         anthropic_key = gr.Textbox(label="Anthropic API Key", type="password")
         google_key = gr.Textbox(label="Google API Key", type="password")
-    generate_btn = gr.Button("Generate Label Suggestions", variant="primary")
-    gr.Markdown("--- \n## Step 2: Build Your Search & Analyze Text")
-    gr.Markdown(
-        """
-        ### What are Entities or Labels?
-        Think of them as special highlighters! They find and color-code specific types of information in your text, like `PERSON`, `DATE`, `LOCATION`, or custom things you define.
-        """
-    )
-    gr.Markdown("#### 1. Review AI-Suggested Labels")
-    gr.Markdown("The AI's suggestions appear below. Uncheck any you don't want.")
     dynamic_components = []
     with gr.Column():
         for i in range(MAX_CATEGORIES):
-            with gr.Accordion(f"Suggested Label Category {i+1}", visible=False) as acc:
                 with gr.Row():
-                    # The CheckboxGroup holds the actual labels (e.g., "Protest", "Petition")
-                    cg = gr.CheckboxGroup(label="Labels in this category", interactive=True, container=False, scale=4)
-                    deselect_btn = gr.Button("Deselect All", size="sm", scale=1, min_width=80)
-                dynamic_components.append((acc, cg, deselect_btn))
-    gr.Markdown("#### 2. Include Standard Labels (Optional)")
     with gr.Group():
         standard_labels_checkbox = gr.CheckboxGroup(choices=STANDARD_LABELS, value=STANDARD_LABELS, label="Standard Entity Labels", info="Common categories like people, places, and dates.")
         with gr.Row():
             select_all_std_btn = gr.Button("Select All", size="sm")
             deselect_all_std_btn = gr.Button("Deselect All", size="sm")
-    gr.Markdown("#### 3. Add Your Own Custom Labels (Optional)")
     with gr.Group():
         custom_labels_textbox = gr.Textbox(label="Enter Custom Labels (comma-separated)", placeholder="e.g., Technology, Weapon, Secret Society...")
-    gr.Markdown("--- \n## Step 3: Analyze Your Document")
-    threshold_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Confidence Threshold", info="Controls how strict the AI Detective is. Lower to find more matches. Higher for fewer, more precise matches.")
-    text_input = gr.Textbox(label="Paste Your Full Text Here for Analysis", lines=10, placeholder="Paste a historical document, an article, or a chapter...")
-    analyze_btn = gr.Button("Analyze Text & Find Entities", variant="primary")
-    analysis_status = gr.Markdown(visible=False) # For the "Analyzing..." message
-    gr.Markdown("--- \n## Step 4: Review Your Results")
     gr.Markdown(
         """
-        ✨ **Pro Tip: Create Your Own Labels!**
-        Did our AI miss something? In the **"Highlighted Text"** view below, simply **click and drag to highlight any piece of text**. A small box will appear, allowing you to name and add your own custom label!
         """
     )
@@ -164,32 +157,26 @@ with gr.Blocks(title="Smart Text Analyzer", css=".prose { word-break: break-word
         with gr.TabItem("Highlighted Text"):
             highlighted_text_output = gr.HighlightedText(label="Found Entities", interactive=True)
         with gr.TabItem("Detailed Results"):
-            detailed_results_output = gr.Markdown(label="List of Found Entities by Label")
-        with gr.TabItem("Debug Info"):
-            debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
     # --- Backend Functions ---
     def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
         yield {
-            generate_btn: gr.update(value="🧠 Generating suggestions...", interactive=False)
         }
         try:
-            key_dict = {
-                "openai_key": os.environ.get("OPENAI_API_KEY", openai_k),
-                "anthropic_key": os.environ.get("ANTHROPIC_API_KEY", anthropic_k),
-                "google_key": os.environ.get("GOOGLE_API_KEY", google_k)
-            }
             provider_id = MODEL_OPTIONS.get(provider)
             if not topic or not provider or not key_dict.get(f"{provider_id}_key"):
-                raise gr.Error("Topic, Provider, and the correct API Key are required.")
-            prompt = HIERARCHICAL_PROMPT_TEMPLATE.format(topic=topic)
             raw_framework = generate_from_prompt(prompt, provider, key_dict)
-            # This parsing is simplified for the new structure
             framework = defaultdict(list)
             current_category = None
             for line in raw_framework.split('\n'):
@@ -201,137 +188,113 @@ with gr.Blocks(title="Smart Text Analyzer", css=".prose { word-break: break-word
                     framework[current_category].extend([e.strip() for e in entities.split(',') if e.strip()])
             if not framework:
-                raise gr.Error("AI failed to generate categories. Please try again or rephrase your topic.")
             updates = {}
             categories = list(framework.items())
             for i in range(MAX_CATEGORIES):
-                accordion_comp, checkbox_comp, button_comp = dynamic_components[i]
                 if i < len(categories):
                     category_name, entities = categories[i]
-                    # The labels are the entities themselves, grouped by the category name
                     sorted_entities = sorted(list(set(entities)))
                     updates[accordion_comp] = gr.update(label=f"Category: {category_name}", visible=True)
                     updates[checkbox_comp] = gr.update(choices=sorted_entities, value=sorted_entities, label="Suggested Labels", visible=True)
-                    updates[button_comp] = gr.update(visible=True)
                 else:
                     updates[accordion_comp] = gr.update(visible=False)
-                    updates[checkbox_comp] = gr.update(visible=False)
-                    updates[button_comp] = gr.update(visible=False)
-            updates[generate_btn] = gr.update(value="Generate Label Suggestions", interactive=True)
             yield updates
         except Exception as e:
-            yield {generate_btn: gr.update(value="Generate Label Suggestions", interactive=True)}
             raise gr.Error(str(e))
-    def analyze_text_and_find_entities(text, standard_labels, custom_label_text, threshold, *suggested_labels_from_groups):
-        # --- 1. Show Progress to User ---
         yield {
-            analyze_btn: gr.update(value="🕵️ Analyzing...", interactive=False),
-            analysis_status: gr.update(value="Our AI Detective is scanning your text. This may take a moment...", visible=True),
-            highlighted_text_output: None,
-            detailed_results_output: None,
-            debug_output: "Starting analysis..."
         }
         debug_info = []
         if gliner_model is None:
-            raise gr.Error("GLiNER model failed to load at startup. Cannot analyze text. Please check logs.")
-        # --- 2. Collect All Labels from UI ---
         labels_to_use = set()
-        # Add labels from the dynamically generated suggestion groups
         for group in suggested_labels_from_groups:
             if group: labels_to_use.update(group)
-        # Add labels from the standard list
         if standard_labels: labels_to_use.update(standard_labels)
-        # Add labels from the custom textbox
         custom = {l.strip() for l in custom_label_text.split(',') if l.strip()}
         if custom: labels_to_use.update(custom)
         final_labels = sorted(list(labels_to_use))
-        debug_info.append(f"🧠 Searching for {len(final_labels)} unique labels.")
-        debug_info.append(f"⚙️ Confidence Threshold: {threshold}")
         if not text or not final_labels:
             yield {
-                analyze_btn: gr.update(value="Analyze Text & Find Entities", interactive=True),
                 analysis_status: gr.update(visible=False),
                 highlighted_text_output: {"text": text, "entities": []},
-                detailed_results_output: "Please provide text and select at least one label to search for.",
                 debug_output: "Analysis stopped: No text or no labels provided."
             }
             return
-        # --- 3. Run the GLiNER Model (The "Detective") ---
         all_entities = []
-        # Process text in chunks to handle very long documents
         chunk_size, overlap = 1024, 100
         for i in range(0, len(text), chunk_size - overlap):
             chunk = text[i : i + chunk_size]
             chunk_entities = gliner_model.predict_entities(chunk, final_labels, threshold=threshold)
             for ent in chunk_entities:
-                ent['start'] += i
-                ent['end'] += i
                 all_entities.append(ent)
-        # Deduplicate entities that might span across chunk overlaps
         unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
-        debug_info.append(f"📊 Found {len(unique_entities)} raw entity mentions.")
-        # --- 4. Prepare Highlighted Text Output ---
         highlighted_output_data = {
             "text": text,
-            "entities": [{"start": ent["start"], "end": ent["end"], "label": ent["label"]} for ent in unique_entities]
         }
-        # --- 5. Prepare Detailed Table-Based Results ---
         aggregated_matches = defaultdict(lambda: {'count': 0, 'scores': [], 'original_casing': ''})
         for ent in unique_entities:
             match_text = text[ent['start']:ent['end']]
-            # Use a key of (label, lowercase_text) to group similar items
             key = (ent['label'], match_text.lower())
             aggregated_matches[key]['count'] += 1
             aggregated_matches[key]['scores'].append(ent['score'])
-            # Store the first-seen casing of the text
             if not aggregated_matches[key]['original_casing']:
                 aggregated_matches[key]['original_casing'] = match_text
-        # Group aggregated results by label for final display
         results_by_label = defaultdict(list)
         for (label, _), data in aggregated_matches.items():
             avg_score = np.mean(data['scores'])
-            results_by_label[label].append({
-                'text': data['original_casing'],
-                'count': data['count'],
-                'avg_score': avg_score
-            })
-        # --- 6. Build the Markdown String for the Detailed Table ---
         markdown_string = ""
         for label, items in sorted(results_by_label.items()):
             markdown_string += f"### {label}\n"
-            markdown_string += "| Text Found | Instances Found | Avg. Confidence Score* |\n"
-            markdown_string += "|------------|-----------------|--------------------------|\n"
-            # Sort items by count (most frequent first)
             for item in sorted(items, key=lambda x: x['count'], reverse=True):
                 markdown_string += f"| {item['text']} | {item['count']} | {item['avg_score']:.2f} |\n"
             markdown_string += "\n"
         if not markdown_string:
-            markdown_string = "No entities found. Try lowering the confidence threshold or changing your labels."
         else:
-            markdown_string += "\n---\n<small><i>*<b>Confidence Score:</b> How sure the AI Detective (GLiNER) is that it found the correct label (1.00 = 100% certain). The score shown is the average across all instances of that text.</i></small>"
-        debug_info.append("✅ Analysis complete.")
-        # --- 7. Yield Final Results to UI ---
         yield {
-            analyze_btn: gr.update(value="Analyze Text & Find Entities", interactive=True),
             analysis_status: gr.update(visible=False),
             highlighted_text_output: highlighted_output_data,
             detailed_results_output: markdown_string,
@@ -345,7 +308,6 @@ with gr.Blocks(title="Smart Text Analyzer", css=".prose { word-break: break-word
         outputs=[generate_btn] + [comp for pair in dynamic_components for comp in pair]
     )
-    # Functions for Select/Deselect All buttons
     def deselect_all():
         return gr.update(value=[])
     def select_all(choices):
@@ -354,12 +316,14 @@ with gr.Blocks(title="Smart Text Analyzer", css=".prose { word-break: break-word
     deselect_all_std_btn.click(fn=deselect_all, inputs=None, outputs=[standard_labels_checkbox])
     select_all_std_btn.click(lambda: select_all(STANDARD_LABELS), inputs=None, outputs=[standard_labels_checkbox])
-    for _, cg, btn in dynamic_components:
-        btn.click(fn=deselect_all, inputs=None, outputs=[cg])
     analyze_btn.click(
-        fn=analyze_text_and_find_entities,
-        inputs=[text_input, standard_labels_checkbox, custom_labels_textbox, threshold_slider] + [cg for acc, cg, btn in dynamic_components],
         outputs=[analyze_btn, analysis_status, highlighted_text_output, detailed_results_output, debug_output]
     )

 # 📚 Install dependencies
 # Make sure to run this in your environment if you haven't already
+# !pip install openai anthropic google-generativeai gradio transformers torch gliner numpy --quiet
 # ⚙️ Imports
 import openai
 import google.generativeai as genai
 import gradio as gr
 from gliner import GLiNER
 from collections import defaultdict, Counter
+import numpy as np
 import os
 # 🧠 Supported models and their providers
 # --- Load the model only once at startup ---
 try:
+    print("Loading Extraction AI (GLiNER model)... This may take a moment.")
     gliner_model = GLiNER.from_pretrained(GLINER_MODEL_NAME)
+    print("Extraction AI loaded successfully.")
 except Exception as e:
     print(f"FATAL ERROR: Could not load GLiNER model. The app will not be able to find entities. Error: {e}")
     gliner_model = None
+# 🧠 Prompt for the Conceptual AI to generate a research framework
+FRAMEWORK_PROMPT_TEMPLATE = """
+You are an expert research assistant specializing in history. For the provided topic: **"{topic}"**, your task is to generate a conceptual research framework.
 **Instructions:**
+1.  Identify 4-6 high-level **Conceptual Categories** relevant to analyzing this historical topic (e.g., 'Key Figures', 'Core Ideologies', 'Significant Events').
+2.  For each category, list specific, searchable **Labels** that would appear in a primary or secondary source document.
+3.  **Crucial Rule for Labels:** Use concise, singular, and fundamental terms (e.g., use `Treaty` not `Diplomatic Treaties`).
 **Output Format:**
+Use Markdown. Each category must be a Level 3 Header (###), followed by a comma-separated list of its labels.
+### Example Category: Political Actions
+- Petition, Charter, Protest, Rally, Legislation
+### Example Category: Social Groups
+- Working Class, Aristocracy, Clergy
 """
+# 🧠 Generator Function (The "Conceptual AI")
 def generate_from_prompt(prompt, provider, key_dict):
     provider_id = MODEL_OPTIONS.get(provider)
     api_key = key_dict.get(f"{provider_id}_key")
 # --- UI Definitions ---
 STANDARD_LABELS = [
     "PERSON", "ORGANIZATION", "LOCATION", "COUNTRY", "CITY", "STATE",
     "NATIONALITY", "GROUP", "DATE", "EVENT", "LAW", "LEGAL_DOCUMENT",
     "MONEY", "CURRENCY", "QUANTITY", "ORDINAL_NUMBER", "CARDINAL_NUMBER"
 ]
+MAX_CATEGORIES = 8
+with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
+    gr.Markdown("# Historical Text Analysis Tool")
     gr.Markdown(
         """
+        This tool uses two forms of AI to accelerate historical research. First, a **Conceptual AI** generates a research framework with relevant search terms for your topic. Second, an **Extraction AI** scans your source text to find and highlight those terms with high precision.
+        """
+    )
+    gr.Markdown(
+        """
+        ### Understanding "Entities" and "Labels"
+        In text analysis, this process is often called "Named Entity Recognition" (NER).
+        - An **Entity** is a specific piece of text in your document, like a name, a place, or a date (e.g., `Queen Victoria`, `1848`, `London`).
+        - A **Label** is the category that entity belongs to (e.g., `PERSON`, `DATE`, `LOCATION`).
+        This tool helps you define your labels and then automatically finds the corresponding entities in your text.
         """
     )
+    gr.Markdown("--- \n## Step 1: Generate a Research Framework")
+    gr.Markdown("Enter a historical topic to get AI-suggested categories and labels for your analysis.")
     with gr.Row():
+        topic = gr.Textbox(label="Enter a Historical Topic", placeholder="e.g., The Chartist Movement, The Protestant Reformation")
+        provider = gr.Dropdown(choices=list(MODEL_OPTIONS.keys()), label="Choose Conceptual AI Model")
     with gr.Row():
         openai_key = gr.Textbox(label="OpenAI API Key", type="password")
         anthropic_key = gr.Textbox(label="Anthropic API Key", type="password")
         google_key = gr.Textbox(label="Google API Key", type="password")
+    generate_btn = gr.Button("Generate Framework", variant="primary")
+    gr.Markdown("--- \n## Step 2: Define Labels and Analyze Source Text")
+    gr.Markdown("#### 1. AI-Suggested Labels")
+    gr.Markdown("Review the suggested labels below. Select or deselect them as needed for your specific research goals.")
     dynamic_components = []
     with gr.Column():
         for i in range(MAX_CATEGORIES):
+            with gr.Accordion(f"Suggested Category {i+1}", visible=False) as acc:
+                cg = gr.CheckboxGroup(label="Labels in this category", interactive=True)
                 with gr.Row():
+                    select_btn = gr.Button("Select All", size="sm")
+                    deselect_btn = gr.Button("Deselect All", size="sm")
+                dynamic_components.append((acc, cg, select_btn, deselect_btn))
+    gr.Markdown("#### 2. Standard Labels (Optional)")
     with gr.Group():
         standard_labels_checkbox = gr.CheckboxGroup(choices=STANDARD_LABELS, value=STANDARD_LABELS, label="Standard Entity Labels", info="Common categories like people, places, and dates.")
         with gr.Row():
             select_all_std_btn = gr.Button("Select All", size="sm")
             deselect_all_std_btn = gr.Button("Deselect All", size="sm")
+    gr.Markdown("#### 3. Custom Labels (Optional)")
     with gr.Group():
         custom_labels_textbox = gr.Textbox(label="Enter Custom Labels (comma-separated)", placeholder="e.g., Technology, Weapon, Secret Society...")
+    gr.Markdown("--- \n## Step 3: Run Analysis")
+    threshold_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.4, step=0.05, label="Confidence Threshold", info="Controls the strictness of the Extraction AI. Lower values find more potential matches (less strict). Higher values return fewer, more precise matches (more strict).")
+    text_input = gr.Textbox(label="Paste Your Source Text Here for Analysis", lines=15, placeholder="Paste a historical document, an article, or a chapter...")
+    analyze_btn = gr.Button("Analyze Text", variant="primary")
+    analysis_status = gr.Markdown(visible=False)
+    gr.Markdown("--- \n## Step 4: Review Results")
     gr.Markdown(
         """
+        ✨ **Pro Tip: Add Labels Manually.**
+        If the AI missed an entity, you can add it yourself. In the **"Highlighted Text"** view, simply **click and drag to highlight any piece of text**. A dialog will appear, allowing you to assign it a new or existing label.
         """
     )
         with gr.TabItem("Highlighted Text"):
             highlighted_text_output = gr.HighlightedText(label="Found Entities", interactive=True)
         with gr.TabItem("Detailed Results"):
+            detailed_results_output = gr.Markdown(label="Aggregated List of Found Entities")
+        with gr.TabItem("Debug Log"):
+            debug_output = gr.Textbox(label="Extraction Process Log", interactive=False, lines=8)
     # --- Backend Functions ---
     def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
         yield {
+            generate_btn: gr.update(value="Generating...", interactive=False)
         }
         try:
+            key_dict = {"openai_key": os.environ.get("OPENAI_API_KEY", openai_k), "anthropic_key": os.environ.get("ANTHROPIC_API_KEY", anthropic_k), "google_key": os.environ.get("GOOGLE_API_KEY", google_k)}
             provider_id = MODEL_OPTIONS.get(provider)
             if not topic or not provider or not key_dict.get(f"{provider_id}_key"):
+                raise gr.Error("A topic, provider, and valid API Key for that provider are required.")
+            prompt = FRAMEWORK_PROMPT_TEMPLATE.format(topic=topic)
             raw_framework = generate_from_prompt(prompt, provider, key_dict)
             framework = defaultdict(list)
             current_category = None
             for line in raw_framework.split('\n'):
                     framework[current_category].extend([e.strip() for e in entities.split(',') if e.strip()])
             if not framework:
+                raise gr.Error("The AI failed to generate categories. Please try again or rephrase your topic.")
             updates = {}
             categories = list(framework.items())
             for i in range(MAX_CATEGORIES):
+                accordion_comp, checkbox_comp, sel_btn, desel_btn = dynamic_components[i]
                 if i < len(categories):
                     category_name, entities = categories[i]
                     sorted_entities = sorted(list(set(entities)))
                     updates[accordion_comp] = gr.update(label=f"Category: {category_name}", visible=True)
                     updates[checkbox_comp] = gr.update(choices=sorted_entities, value=sorted_entities, label="Suggested Labels", visible=True)
+                    updates[sel_btn] = gr.update(visible=True)
+                    updates[desel_btn] = gr.update(visible=True)
                 else:
                     updates[accordion_comp] = gr.update(visible=False)
+                    updates[checkbox_comp] = gr.update(choices=[], value=[], visible=False)
+                    updates[sel_btn] = gr.update(visible=False)
+                    updates[desel_btn] = gr.update(visible=False)
+            updates[generate_btn] = gr.update(value="Generate Framework", interactive=True)
             yield updates
         except Exception as e:
+            yield {generate_btn: gr.update(value="Generate Framework", interactive=True)}
             raise gr.Error(str(e))
+    def analyze_text(text, standard_labels, custom_label_text, threshold, *suggested_labels_from_groups):
         yield {
+            analyze_btn: gr.update(value="Analyzing...", interactive=False),
+            analysis_status: gr.update(value="The Extraction AI is scanning your text. This may take a moment...", visible=True),
+            highlighted_text_output: None, detailed_results_output: None, debug_output: "Starting analysis..."
         }
         debug_info = []
         if gliner_model is None:
+            raise gr.Error("Extraction AI (GLiNER model) is not loaded. Cannot analyze text. Please check logs and restart.")
         labels_to_use = set()
         for group in suggested_labels_from_groups:
             if group: labels_to_use.update(group)
         if standard_labels: labels_to_use.update(standard_labels)
         custom = {l.strip() for l in custom_label_text.split(',') if l.strip()}
         if custom: labels_to_use.update(custom)
         final_labels = sorted(list(labels_to_use))
+        debug_info.append(f"Searching for {len(final_labels)} unique labels.")
+        debug_info.append(f"Confidence Threshold set to: {threshold}")
         if not text or not final_labels:
             yield {
+                analyze_btn: gr.update(value="Analyze Text", interactive=True),
                 analysis_status: gr.update(visible=False),
                 highlighted_text_output: {"text": text, "entities": []},
+                detailed_results_output: "Analysis stopped: Please provide text and select at least one label to search for.",
                 debug_output: "Analysis stopped: No text or no labels provided."
             }
             return
         all_entities = []
         chunk_size, overlap = 1024, 100
         for i in range(0, len(text), chunk_size - overlap):
             chunk = text[i : i + chunk_size]
             chunk_entities = gliner_model.predict_entities(chunk, final_labels, threshold=threshold)
             for ent in chunk_entities:
+                ent['start'] += i; ent['end'] += i
                 all_entities.append(ent)
         unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
+        debug_info.append(f"Found {len(unique_entities)} raw entity mentions.")
+        # --- BUG FIX: Map 'label' to 'entity' for Gradio's HighlightedText component ---
         highlighted_output_data = {
             "text": text,
+            "entities": [{"start": ent["start"], "end": ent["end"], "entity": ent["label"]} for ent in unique_entities]
         }
         aggregated_matches = defaultdict(lambda: {'count': 0, 'scores': [], 'original_casing': ''})
         for ent in unique_entities:
             match_text = text[ent['start']:ent['end']]
             key = (ent['label'], match_text.lower())
             aggregated_matches[key]['count'] += 1
             aggregated_matches[key]['scores'].append(ent['score'])
             if not aggregated_matches[key]['original_casing']:
                 aggregated_matches[key]['original_casing'] = match_text
         results_by_label = defaultdict(list)
         for (label, _), data in aggregated_matches.items():
             avg_score = np.mean(data['scores'])
+            results_by_label[label].append({'text': data['original_casing'], 'count': data['count'], 'avg_score': avg_score})
         markdown_string = ""
         for label, items in sorted(results_by_label.items()):
             markdown_string += f"### {label}\n"
+            markdown_string += "| Text Found | Instances | Avg. Confidence Score* |\n"
+            markdown_string += "|------------|-----------|--------------------------|\n"
             for item in sorted(items, key=lambda x: x['count'], reverse=True):
                 markdown_string += f"| {item['text']} | {item['count']} | {item['avg_score']:.2f} |\n"
             markdown_string += "\n"
         if not markdown_string:
+            markdown_string = "No entities found. Consider lowering the confidence threshold or refining your labels."
         else:
+            markdown_string += "\n---\n<small><i>*<b>Confidence Score:</b> How sure the Extraction AI is that it found the correct label (1.00 = 100% certain). The score is an average across all instances of that text.</i></small>"
+        debug_info.append("Analysis complete.")
         yield {
+            analyze_btn: gr.update(value="Analyze Text", interactive=True),
             analysis_status: gr.update(visible=False),
             highlighted_text_output: highlighted_output_data,
             detailed_results_output: markdown_string,
         outputs=[generate_btn] + [comp for pair in dynamic_components for comp in pair]
     )
     def deselect_all():
         return gr.update(value=[])
     def select_all(choices):
     deselect_all_std_btn.click(fn=deselect_all, inputs=None, outputs=[standard_labels_checkbox])
     select_all_std_btn.click(lambda: select_all(STANDARD_LABELS), inputs=None, outputs=[standard_labels_checkbox])
+    # Wire up the dynamic select/deselect buttons
+    for _, cg, sel_btn, desel_btn in dynamic_components:
+        sel_btn.click(fn=select_all, inputs=[cg], outputs=[cg])
+        desel_btn.click(fn=deselect_all, inputs=None, outputs=[cg])
     analyze_btn.click(
+        fn=analyze_text,
+        inputs=[text_input, standard_labels_checkbox, custom_labels_textbox, threshold_slider] + [cg for acc, cg, sel, desel in dynamic_components],
         outputs=[analyze_btn, analysis_status, highlighted_text_output, detailed_results_output, debug_output]
     )