mk1985 commited on
Commit
35ef54e
Β·
verified Β·
1 Parent(s): 80cecba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -6
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  # πŸ“š Install dependencies
2
  # Make sure to run this in your environment if you haven't already
3
  # !pip install openai anthropic google-generativeai gradio transformers torch gliner pandas --quiet
@@ -35,14 +37,38 @@ except Exception as e:
35
  gliner_model = None
36
 
37
  # --- Prompt and other constants remain the same ---
38
- HIERARCHICAL_PROMPT_TEMPLATE = "..." # (Keeping this collapsed for brevity, no changes needed)
39
- TRADITIONAL_NER_LABELS = ["..."] # (Keeping this collapsed for brevity, no changes needed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  MAX_CATEGORIES = 8
41
 
42
  with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
43
  # --- UI remains the same up to the output tabs ---
44
  gr.Markdown("# Historical Text Analysis Tool")
45
- gr.Markdown("...") # Welcome text collapsed for brevity
46
  gr.Markdown("---")
47
  gr.Markdown("## Step 1: Get Keyword Ideas")
48
  with gr.Row():
@@ -101,6 +127,38 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
101
  debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
102
 
103
  # --- Backend Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
105
  # ... (This function remains unchanged) ...
106
  yield {generate_btn: gr.update(value="Consulting the Archives...", interactive=False)}
@@ -196,7 +254,7 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
196
  unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
197
  debug_info.append(f"πŸ“Š Found {len(unique_entities)} unique matches.")
198
 
199
- highlighted_entities = [{"start": ent["start"], "end": ent["end"], "entity": ent["label"]} for ent in unique_entities]
200
 
201
  # --- NEW: Use helper to create DataFrame ---
202
  results_df = process_entities_to_df(highlighted_entities, text)
@@ -218,6 +276,7 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
218
  It re-calculates the DataFrame and updates the UI.
219
  """
220
  # new_highlighted_entities is the full value of the component, not just a diff
 
221
  results_df = process_entities_to_df(new_highlighted_entities, original_text)
222
 
223
  return {
@@ -248,10 +307,19 @@ with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break:
248
  ner_output.change(fn=update_button_on_check, inputs=[ner_output], outputs=[toggle_ner_btn])
249
 
250
  def create_toggle_handler(cg_component):
251
- def handler(button_text): return handle_toggle_click(button_text, cg_component.choices)
 
 
 
252
  return handler
 
253
  for acc, cg, toggle_btn in category_components:
254
- toggle_btn.click(fn=create_toggle_handler(cg), inputs=[toggle_btn], outputs=[cg, toggle_btn])
 
 
 
 
 
255
  cg.change(fn=update_button_on_check, inputs=[cg], outputs=[toggle_btn])
256
 
257
  match_btn.click(
 
1
+ --- START OF FILE app.py ---
2
+
3
  # πŸ“š Install dependencies
4
  # Make sure to run this in your environment if you haven't already
5
  # !pip install openai anthropic google-generativeai gradio transformers torch gliner pandas --quiet
 
37
  gliner_model = None
38
 
39
  # --- Prompt and other constants remain the same ---
40
+ HIERARCHICAL_PROMPT_TEMPLATE = """You are a helpful research assistant specializing in history. Your task is to brainstorm a hierarchical set of keywords and named entities related to a historical topic.
41
+
42
+ The user will provide a topic. You should generate a structured list of categories and, for each category, a comma-separated list of relevant keywords or phrases. These keywords should be things a researcher might want to search for in a historical text.
43
+
44
+ Rules:
45
+ 1. Structure your response using Markdown.
46
+ 2. Use '###' for each category title (e.g., '### Key Figures').
47
+ 3. Beneath each category, provide a single bullet point '-' followed by a comma-separated list of 5-10 specific keywords or entities.
48
+ 4. Do not add any introductory or concluding sentences. Just provide the structured list.
49
+ 5. The keywords should be specific and likely to appear in primary or secondary source documents.
50
+
51
+ Example for the topic "The Protestant Reformation":
52
+ ### Key Figures
53
+ - Martin Luther, John Calvin, Huldrych Zwingli, Henry VIII, Charles V, Pope Leo X
54
+ ### Core Theological Concepts
55
+ - Sola Scriptura, Sola Fide, Indulgences, Priesthood of all believers, Justification by faith
56
+ ### Key Events
57
+ - Diet of Worms, Ninety-five Theses, Marburg Colloquy, Council of Trent, Edict of Worms
58
+ ### Important Locations
59
+ - Wittenberg, Geneva, Rome, Wartburg Castle, Augsburg
60
+ ### Associated Groups
61
+ - Protestants, Lutherans, Calvinists, Anabaptists, Huguenots, Catholic Church
62
+
63
+ Now, generate the framework for the following topic:
64
+ Topic: {topic}"""
65
+ TRADITIONAL_NER_LABELS = ["PERSON", "ORGANIZATION", "LOCATION", "DATE", "EVENT", "WORK_OF_ART", "LAW"]
66
  MAX_CATEGORIES = 8
67
 
68
  with gr.Blocks(title="Historical Text Analysis Tool", css=".prose { word-break: break-word; }") as demo:
69
  # --- UI remains the same up to the output tabs ---
70
  gr.Markdown("# Historical Text Analysis Tool")
71
+ gr.Markdown("A tool to help historians and researchers quickly identify key terms and concepts in texts. Start by generating keyword ideas for a topic, then paste your text to find all occurrences.") # Welcome text collapsed for brevity
72
  gr.Markdown("---")
73
  gr.Markdown("## Step 1: Get Keyword Ideas")
74
  with gr.Row():
 
127
  debug_output = gr.Textbox(label="Extraction Log", interactive=False, lines=8)
128
 
129
  # --- Backend Functions ---
130
+
131
+ # --- THIS IS THE MISSING FUNCTION THAT WAS ADDED ---
132
+ def generate_from_prompt(prompt, provider, key_dict):
133
+ """Calls the appropriate LLM API based on the selected provider."""
134
+ provider_id = MODEL_OPTIONS.get(provider)
135
+
136
+ if provider_id == "openai":
137
+ client = openai.OpenAI(api_key=key_dict["openai_key"])
138
+ response = client.chat.completions.create(
139
+ model="gpt-4o",
140
+ messages=[{"role": "user", "content": prompt}]
141
+ )
142
+ return response.choices[0].message.content
143
+
144
+ elif provider_id == "anthropic":
145
+ client = anthropic.Anthropic(api_key=key_dict["anthropic_key"])
146
+ response = client.messages.create(
147
+ model="claude-3-opus-20240229",
148
+ max_tokens=1024,
149
+ messages=[{"role": "user", "content": prompt}]
150
+ )
151
+ return response.content[0].text
152
+
153
+ elif provider_id == "google":
154
+ genai.configure(api_key=key_dict["google_key"])
155
+ model = genai.GenerativeModel('gemini-1.5-pro-latest')
156
+ response = model.generate_content(prompt)
157
+ return response.text
158
+
159
+ else:
160
+ raise ValueError("Invalid provider selected")
161
+
162
  def handle_generate(topic, provider, openai_k, anthropic_k, google_k):
163
  # ... (This function remains unchanged) ...
164
  yield {generate_btn: gr.update(value="Consulting the Archives...", interactive=False)}
 
254
  unique_entities = [dict(t) for t in {tuple(d.items()) for d in all_entities}]
255
  debug_info.append(f"πŸ“Š Found {len(unique_entities)} unique matches.")
256
 
257
+ highlighted_entities = [{"start": ent["start"], "end": ent["end"], "label": ent["label"]} for ent in unique_entities]
258
 
259
  # --- NEW: Use helper to create DataFrame ---
260
  results_df = process_entities_to_df(highlighted_entities, text)
 
276
  It re-calculates the DataFrame and updates the UI.
277
  """
278
  # new_highlighted_entities is the full value of the component, not just a diff
279
+ # In Gradio > 4, the format is a list of dictionaries with 'entity', 'start', 'end'
280
  results_df = process_entities_to_df(new_highlighted_entities, original_text)
281
 
282
  return {
 
307
  ner_output.change(fn=update_button_on_check, inputs=[ner_output], outputs=[toggle_ner_btn])
308
 
309
  def create_toggle_handler(cg_component):
310
+ # We need a closure to capture the correct cg_component for each button
311
+ def handler(button_text):
312
+ # Gradio provides the component's choices at runtime, so we can access them here
313
+ return handle_toggle_click(button_text, cg_component.choices)
314
  return handler
315
+
316
  for acc, cg, toggle_btn in category_components:
317
+ # Note: We pass the component itself to gr.State to get its properties in the handler
318
+ toggle_btn.click(
319
+ fn=lambda btn_txt, choices: handle_toggle_click(btn_txt, choices),
320
+ inputs=[toggle_btn, gr.State(cg.choices)],
321
+ outputs=[cg, toggle_btn]
322
+ )
323
  cg.change(fn=update_button_on_check, inputs=[cg], outputs=[toggle_btn])
324
 
325
  match_btn.click(