Spaces:

mk1985
/

Historical-Text-Analyser

Sleeping

App Files Files Community

mk1985 commited on Jul 23

Commit

35ef54e

verified ·

1 Parent(s): 80cecba

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -6

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 # 📚 Install dependencies
 # Make sure to run this in your environment if you haven't already
 # !pip install openai anthropic google-generativeai gradio transformers torch gliner pandas --quiet
@@ -35,14 +37,38 @@ except Exception as e:
     gliner_model = None
 # --- Prompt and other constants remain the same ---
-HIERARCHICAL_PROMPT_TEMPLATE = "..." # (Keeping this collapsed for brevity, no changes needed)
-TRADITIONAL_NER_LABELS = ["..."] # (Keeping this collapsed for brevity, no changes needed)
 MAX_CATEGORIES = 8
 with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
     # --- UI remains the same up to the output tabs ---
     gr.Markdown("# Historical Text Analysis Tool")
-    gr.Markdown("...") # Welcome text collapsed for brevity
     gr.Markdown("---")
     gr.Markdown("## Step 1: Get Keyword Ideas")
     with gr.Row():
@@ -101,6 +127,38 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
             debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
     # --- Backend Functions ---
     def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
         # ... (This function remains unchanged) ...
         yield {generate_btn: gr.update(value="Consulting the Archives...", interactive=False)}
@@ -196,7 +254,7 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
         unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
         debug_info.append(f"📊 Found {len(unique_entities)} unique matches.")
-        highlighted_entities = [{"start": ent["start"], "end": ent["end"], "entity": ent["label"]} for ent in unique_entities]
         # --- NEW: Use helper to create DataFrame ---
         results_df = process_entities_to_df(highlighted_entities, text)
@@ -218,6 +276,7 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
         It re-calculates the DataFrame and updates the UI.
         """
         # new_highlighted_entities is the full value of the component, not just a diff
         results_df = process_entities_to_df(new_highlighted_entities, original_text)
         return {
@@ -248,10 +307,19 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
     ner_output.change(fn=update_button_on_check, inputs=[ner_output], outputs=[toggle_ner_btn])
     def create_toggle_handler(cg_component):
-        def handler(button_text): return handle_toggle_click(button_text, cg_component.choices)
         return handler
     for acc, cg, toggle_btn in category_components:
-        toggle_btn.click(fn=create_toggle_handler(cg), inputs=[toggle_btn], outputs=[cg, toggle_btn])
         cg.change(fn=update_button_on_check, inputs=[cg], outputs=[toggle_btn])
     match_btn.click(

+--- START OF FILE app.py ---
 # 📚 Install dependencies
 # Make sure to run this in your environment if you haven't already
 # !pip install openai anthropic google-generativeai gradio transformers torch gliner pandas --quiet
     gliner_model = None
 # --- Prompt and other constants remain the same ---
+HIERARCHICAL_PROMPT_TEMPLATE = """You are a helpful research assistant specializing in history. Your task is to brainstorm a hierarchical set of keywords and named entities related to a historical topic.
+The user will provide a topic. You should generate a structured list of categories and, for each category, a comma-separated list of relevant keywords or phrases. These keywords should be things a researcher might want to search for in a historical text.
+Rules:
+1.  Structure your response using Markdown.
+2.  Use '###' for each category title (e.g., '### Key Figures').
+3.  Beneath each category, provide a single bullet point '-' followed by a comma-separated list of 5-10 specific keywords or entities.
+4.  Do not add any introductory or concluding sentences. Just provide the structured list.
+5.  The keywords should be specific and likely to appear in primary or secondary source documents.
+Example for the topic "The Protestant Reformation":
+### Key Figures
+- Martin Luther, John Calvin, Huldrych Zwingli, Henry VIII, Charles V, Pope Leo X
+### Core Theological Concepts
+- Sola Scriptura, Sola Fide, Indulgences, Priesthood of all believers, Justification by faith
+### Key Events
+- Diet of Worms, Ninety-five Theses, Marburg Colloquy, Council of Trent, Edict of Worms
+### Important Locations
+- Wittenberg, Geneva, Rome, Wartburg Castle, Augsburg
+### Associated Groups
+- Protestants, Lutherans, Calvinists, Anabaptists, Huguenots, Catholic Church
+Now, generate the framework for the following topic:
+Topic: {topic}"""
+TRADITIONAL_NER_LABELS = ["PERSON", "ORGANIZATION", "LOCATION", "DATE", "EVENT", "WORK_OF_ART", "LAW"]
 MAX_CATEGORIES = 8
 with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
     # --- UI remains the same up to the output tabs ---
     gr.Markdown("# Historical Text Analysis Tool")
+    gr.Markdown("A tool to help historians and researchers quickly identify key terms and concepts in texts. Start by generating keyword ideas for a topic, then paste your text to find all occurrences.") # Welcome text collapsed for brevity
     gr.Markdown("---")
     gr.Markdown("## Step 1: Get Keyword Ideas")
     with gr.Row():
             debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
     # --- Backend Functions ---
+    # --- THIS IS THE MISSING FUNCTION THAT WAS ADDED ---
+    def generate_from_prompt(prompt, provider, key_dict):
+        """Calls the appropriate LLM API based on the selected provider."""
+        provider_id = MODEL_OPTIONS.get(provider)
+        if provider_id == "openai":
+            client = openai.OpenAI(api_key=key_dict["openai_key"])
+            response = client.chat.completions.create(
+                model="gpt-4o",
+                messages=[{"role": "user", "content": prompt}]
+            )
+            return response.choices[0].message.content
+        elif provider_id == "anthropic":
+            client = anthropic.Anthropic(api_key=key_dict["anthropic_key"])
+            response = client.messages.create(
+                model="claude-3-opus-20240229",
+                max_tokens=1024,
+                messages=[{"role": "user", "content": prompt}]
+            )
+            return response.content[0].text
+        elif provider_id == "google":
+            genai.configure(api_key=key_dict["google_key"])
+            model = genai.GenerativeModel('gemini-1.5-pro-latest')
+            response = model.generate_content(prompt)
+            return response.text
+        else:
+            raise ValueError("Invalid provider selected")
     def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
         # ... (This function remains unchanged) ...
         yield {generate_btn: gr.update(value="Consulting the Archives...", interactive=False)}
         unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
         debug_info.append(f"📊 Found {len(unique_entities)} unique matches.")
+        highlighted_entities = [{"start": ent["start"], "end": ent["end"], "label": ent["label"]} for ent in unique_entities]
         # --- NEW: Use helper to create DataFrame ---
         results_df = process_entities_to_df(highlighted_entities, text)
         It re-calculates the DataFrame and updates the UI.
         """
         # new_highlighted_entities is the full value of the component, not just a diff
+        # In Gradio > 4, the format is a list of dictionaries with 'entity', 'start', 'end'
         results_df = process_entities_to_df(new_highlighted_entities, original_text)
         return {
     ner_output.change(fn=update_button_on_check, inputs=[ner_output], outputs=[toggle_ner_btn])
     def create_toggle_handler(cg_component):
+        # We need a closure to capture the correct cg_component for each button
+        def handler(button_text):
+            # Gradio provides the component's choices at runtime, so we can access them here
+            return handle_toggle_click(button_text, cg_component.choices)
         return handler
     for acc, cg, toggle_btn in category_components:
+        # Note: We pass the component itself to gr.State to get its properties in the handler
+        toggle_btn.click(
+            fn=lambda btn_txt, choices: handle_toggle_click(btn_txt, choices),
+            inputs=[toggle_btn, gr.State(cg.choices)],
+            outputs=[cg, toggle_btn]
+        )
         cg.change(fn=update_button_on_check, inputs=[cg], outputs=[toggle_btn])
     match_btn.click(