Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
# π Install dependencies
|
| 2 |
# Make sure to run this in your environment if you haven't already
|
| 3 |
# !pip install openai anthropic google-generativeai gradio transformers torch gliner pandas --quiet
|
|
@@ -35,14 +37,38 @@ except Exception as e:
|
|
| 35 |
gliner_model = None
|
| 36 |
|
| 37 |
# --- Prompt and other constants remain the same ---
|
| 38 |
-
HIERARCHICAL_PROMPT_TEMPLATE = "
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
MAX_CATEGORIES = 8
|
| 41 |
|
| 42 |
with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
|
| 43 |
# --- UI remains the same up to the output tabs ---
|
| 44 |
gr.Markdown("# Historical Text Analysis Tool")
|
| 45 |
-
gr.Markdown("
|
| 46 |
gr.Markdown("---")
|
| 47 |
gr.Markdown("## Step 1: Get Keyword Ideas")
|
| 48 |
with gr.Row():
|
|
@@ -101,6 +127,38 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 101 |
debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
|
| 102 |
|
| 103 |
# --- Backend Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
|
| 105 |
# ... (This function remains unchanged) ...
|
| 106 |
yield {generate_btn: gr.update(value="Consulting the Archives...", interactive=False)}
|
|
@@ -196,7 +254,7 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 196 |
unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
|
| 197 |
debug_info.append(f"π Found {len(unique_entities)} unique matches.")
|
| 198 |
|
| 199 |
-
highlighted_entities = [{"start": ent["start"], "end": ent["end"], "
|
| 200 |
|
| 201 |
# --- NEW: Use helper to create DataFrame ---
|
| 202 |
results_df = process_entities_to_df(highlighted_entities, text)
|
|
@@ -218,6 +276,7 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 218 |
It re-calculates the DataFrame and updates the UI.
|
| 219 |
"""
|
| 220 |
# new_highlighted_entities is the full value of the component, not just a diff
|
|
|
|
| 221 |
results_df = process_entities_to_df(new_highlighted_entities, original_text)
|
| 222 |
|
| 223 |
return {
|
|
@@ -248,10 +307,19 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
|
|
| 248 |
ner_output.change(fn=update_button_on_check, inputs=[ner_output], outputs=[toggle_ner_btn])
|
| 249 |
|
| 250 |
def create_toggle_handler(cg_component):
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
| 252 |
return handler
|
|
|
|
| 253 |
for acc, cg, toggle_btn in category_components:
|
| 254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
cg.change(fn=update_button_on_check, inputs=[cg], outputs=[toggle_btn])
|
| 256 |
|
| 257 |
match_btn.click(
|
|
|
|
| 1 |
+
--- START OF FILE app.py ---
|
| 2 |
+
|
| 3 |
# π Install dependencies
|
| 4 |
# Make sure to run this in your environment if you haven't already
|
| 5 |
# !pip install openai anthropic google-generativeai gradio transformers torch gliner pandas --quiet
|
|
|
|
| 37 |
gliner_model = None
|
| 38 |
|
| 39 |
# --- Prompt and other constants remain the same ---
|
| 40 |
+
HIERARCHICAL_PROMPT_TEMPLATE = """You are a helpful research assistant specializing in history. Your task is to brainstorm a hierarchical set of keywords and named entities related to a historical topic.
|
| 41 |
+
|
| 42 |
+
The user will provide a topic. You should generate a structured list of categories and, for each category, a comma-separated list of relevant keywords or phrases. These keywords should be things a researcher might want to search for in a historical text.
|
| 43 |
+
|
| 44 |
+
Rules:
|
| 45 |
+
1. Structure your response using Markdown.
|
| 46 |
+
2. Use '###' for each category title (e.g., '### Key Figures').
|
| 47 |
+
3. Beneath each category, provide a single bullet point '-' followed by a comma-separated list of 5-10 specific keywords or entities.
|
| 48 |
+
4. Do not add any introductory or concluding sentences. Just provide the structured list.
|
| 49 |
+
5. The keywords should be specific and likely to appear in primary or secondary source documents.
|
| 50 |
+
|
| 51 |
+
Example for the topic "The Protestant Reformation":
|
| 52 |
+
### Key Figures
|
| 53 |
+
- Martin Luther, John Calvin, Huldrych Zwingli, Henry VIII, Charles V, Pope Leo X
|
| 54 |
+
### Core Theological Concepts
|
| 55 |
+
- Sola Scriptura, Sola Fide, Indulgences, Priesthood of all believers, Justification by faith
|
| 56 |
+
### Key Events
|
| 57 |
+
- Diet of Worms, Ninety-five Theses, Marburg Colloquy, Council of Trent, Edict of Worms
|
| 58 |
+
### Important Locations
|
| 59 |
+
- Wittenberg, Geneva, Rome, Wartburg Castle, Augsburg
|
| 60 |
+
### Associated Groups
|
| 61 |
+
- Protestants, Lutherans, Calvinists, Anabaptists, Huguenots, Catholic Church
|
| 62 |
+
|
| 63 |
+
Now, generate the framework for the following topic:
|
| 64 |
+
Topic: {topic}"""
|
| 65 |
+
TRADITIONAL_NER_LABELS = ["PERSON", "ORGANIZATION", "LOCATION", "DATE", "EVENT", "WORK_OF_ART", "LAW"]
|
| 66 |
MAX_CATEGORIES = 8
|
| 67 |
|
| 68 |
with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
|
| 69 |
# --- UI remains the same up to the output tabs ---
|
| 70 |
gr.Markdown("# Historical Text Analysis Tool")
|
| 71 |
+
gr.Markdown("A tool to help historians and researchers quickly identify key terms and concepts in texts. Start by generating keyword ideas for a topic, then paste your text to find all occurrences.") # Welcome text collapsed for brevity
|
| 72 |
gr.Markdown("---")
|
| 73 |
gr.Markdown("## Step 1: Get Keyword Ideas")
|
| 74 |
with gr.Row():
|
|
|
|
| 127 |
debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
|
| 128 |
|
| 129 |
# --- Backend Functions ---
|
| 130 |
+
|
| 131 |
+
# --- THIS IS THE MISSING FUNCTION THAT WAS ADDED ---
|
| 132 |
+
def generate_from_prompt(prompt, provider, key_dict):
|
| 133 |
+
"""Calls the appropriate LLM API based on the selected provider."""
|
| 134 |
+
provider_id = MODEL_OPTIONS.get(provider)
|
| 135 |
+
|
| 136 |
+
if provider_id == "openai":
|
| 137 |
+
client = openai.OpenAI(api_key=key_dict["openai_key"])
|
| 138 |
+
response = client.chat.completions.create(
|
| 139 |
+
model="gpt-4o",
|
| 140 |
+
messages=[{"role": "user", "content": prompt}]
|
| 141 |
+
)
|
| 142 |
+
return response.choices[0].message.content
|
| 143 |
+
|
| 144 |
+
elif provider_id == "anthropic":
|
| 145 |
+
client = anthropic.Anthropic(api_key=key_dict["anthropic_key"])
|
| 146 |
+
response = client.messages.create(
|
| 147 |
+
model="claude-3-opus-20240229",
|
| 148 |
+
max_tokens=1024,
|
| 149 |
+
messages=[{"role": "user", "content": prompt}]
|
| 150 |
+
)
|
| 151 |
+
return response.content[0].text
|
| 152 |
+
|
| 153 |
+
elif provider_id == "google":
|
| 154 |
+
genai.configure(api_key=key_dict["google_key"])
|
| 155 |
+
model = genai.GenerativeModel('gemini-1.5-pro-latest')
|
| 156 |
+
response = model.generate_content(prompt)
|
| 157 |
+
return response.text
|
| 158 |
+
|
| 159 |
+
else:
|
| 160 |
+
raise ValueError("Invalid provider selected")
|
| 161 |
+
|
| 162 |
def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
|
| 163 |
# ... (This function remains unchanged) ...
|
| 164 |
yield {generate_btn: gr.update(value="Consulting the Archives...", interactive=False)}
|
|
|
|
| 254 |
unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
|
| 255 |
debug_info.append(f"π Found {len(unique_entities)} unique matches.")
|
| 256 |
|
| 257 |
+
highlighted_entities = [{"start": ent["start"], "end": ent["end"], "label": ent["label"]} for ent in unique_entities]
|
| 258 |
|
| 259 |
# --- NEW: Use helper to create DataFrame ---
|
| 260 |
results_df = process_entities_to_df(highlighted_entities, text)
|
|
|
|
| 276 |
It re-calculates the DataFrame and updates the UI.
|
| 277 |
"""
|
| 278 |
# new_highlighted_entities is the full value of the component, not just a diff
|
| 279 |
+
# In Gradio > 4, the format is a list of dictionaries with 'entity', 'start', 'end'
|
| 280 |
results_df = process_entities_to_df(new_highlighted_entities, original_text)
|
| 281 |
|
| 282 |
return {
|
|
|
|
| 307 |
ner_output.change(fn=update_button_on_check, inputs=[ner_output], outputs=[toggle_ner_btn])
|
| 308 |
|
| 309 |
def create_toggle_handler(cg_component):
|
| 310 |
+
# We need a closure to capture the correct cg_component for each button
|
| 311 |
+
def handler(button_text):
|
| 312 |
+
# Gradio provides the component's choices at runtime, so we can access them here
|
| 313 |
+
return handle_toggle_click(button_text, cg_component.choices)
|
| 314 |
return handler
|
| 315 |
+
|
| 316 |
for acc, cg, toggle_btn in category_components:
|
| 317 |
+
# Note: We pass the component itself to gr.State to get its properties in the handler
|
| 318 |
+
toggle_btn.click(
|
| 319 |
+
fn=lambda btn_txt, choices: handle_toggle_click(btn_txt, choices),
|
| 320 |
+
inputs=[toggle_btn, gr.State(cg.choices)],
|
| 321 |
+
outputs=[cg, toggle_btn]
|
| 322 |
+
)
|
| 323 |
cg.change(fn=update_button_on_check, inputs=[cg], outputs=[toggle_btn])
|
| 324 |
|
| 325 |
match_btn.click(
|