Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
| 6 |
import re
|
| 7 |
from uuid import uuid4
|
| 8 |
from datetime import datetime
|
| 9 |
-
from duckduckgo_search import
|
| 10 |
from sentence_transformers import SentenceTransformer, util
|
| 11 |
from typing import List, Dict, Any, Optional, Union, Tuple
|
| 12 |
import logging
|
|
@@ -24,7 +24,7 @@ if not HF_API_KEY:
|
|
| 24 |
raise ValueError("Please set the HF_API_KEY environment variable.")
|
| 25 |
|
| 26 |
# You can use different models for different tasks
|
| 27 |
-
MAIN_LLM_ENDPOINT = "your-main-llm-endpoint"
|
| 28 |
REASONING_LLM_ENDPOINT = "your-reasoning-llm-endpoint" # Can be the same as main if needed
|
| 29 |
CRITIC_LLM_ENDPOINT = "your-critic-llm-endpoint" # Can be the same as main if needed
|
| 30 |
|
|
@@ -48,7 +48,7 @@ except Exception as e:
|
|
| 48 |
def hf_inference(endpoint, inputs, parameters=None, retries=5):
|
| 49 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
| 50 |
payload = {"inputs": inputs, "parameters": parameters or {}}
|
| 51 |
-
|
| 52 |
for attempt in range(retries):
|
| 53 |
try:
|
| 54 |
response = requests.post(endpoint, headers=headers, json=payload, timeout=TIMEOUT)
|
|
@@ -61,40 +61,41 @@ def hf_inference(endpoint, inputs, parameters=None, retries=5):
|
|
| 61 |
time.sleep(RETRY_DELAY * (1 + attempt)) # Exponential backoff
|
| 62 |
return {"error": "Request failed after multiple retries."}
|
| 63 |
|
| 64 |
-
def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
|
| 65 |
time_filter: str = "", region: str = "wt-wt", language: str = "en-us") -> list:
|
| 66 |
try:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
except Exception as e:
|
| 74 |
logger.error(f"DuckDuckGo search error: {e}")
|
| 75 |
return []
|
| 76 |
|
| 77 |
-
def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
|
| 78 |
critique: str = "", focus_areas: list = []) -> str:
|
| 79 |
if not search_results:
|
| 80 |
return "No search results to reason about."
|
| 81 |
-
|
| 82 |
reasoning_input = "Reason about the following search results in relation to the prompt:\n\n"
|
| 83 |
reasoning_input += f"Prompt: {prompt}\n\n"
|
| 84 |
-
|
| 85 |
if focus_areas:
|
| 86 |
reasoning_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n\n"
|
| 87 |
-
|
| 88 |
for i, result in enumerate(search_results):
|
| 89 |
reasoning_input += f"- Result {i + 1}: Title: {result['title']}, Snippet: {result['snippet']}\n"
|
| 90 |
-
|
| 91 |
if reasoning_context:
|
| 92 |
recent_context = reasoning_context[-MAX_HISTORY_ITEMS:]
|
| 93 |
reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
|
| 94 |
-
|
| 95 |
if critique:
|
| 96 |
reasoning_input += f"\n\nRecent critique to address: {critique}\n"
|
| 97 |
-
|
| 98 |
reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives and potential contradictions in the search results."
|
| 99 |
|
| 100 |
reasoning_output = hf_inference(REASONING_LLM_ENDPOINT, reasoning_input)
|
|
@@ -108,15 +109,15 @@ def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
|
|
| 108 |
def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> str:
|
| 109 |
if not insights:
|
| 110 |
return "No insights to summarize."
|
| 111 |
-
|
| 112 |
summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
|
| 113 |
summarization_input += "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) # Only use most recent insights
|
| 114 |
-
|
| 115 |
if contradictions:
|
| 116 |
summarization_input += "\n\nAddress these specific contradictions:\n" + "\n".join(contradictions)
|
| 117 |
-
|
| 118 |
summarization_input += "\n\nProvide a well-structured summary that:\n1. Presents the main findings\n2. Acknowledges limitations and uncertainties\n3. Highlights areas of consensus and disagreement\n4. Suggests potential directions for further inquiry"
|
| 119 |
-
|
| 120 |
summarization_output = hf_inference(MAIN_LLM_ENDPOINT, summarization_input)
|
| 121 |
|
| 122 |
if isinstance(summarization_output, dict) and "generated_text" in summarization_output:
|
|
@@ -125,120 +126,120 @@ def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> st
|
|
| 125 |
logger.error(f"Failed to generate summary: {summarization_output}")
|
| 126 |
return "Could not generate a summary due to an error."
|
| 127 |
|
| 128 |
-
def tool_generate_search_query(prompt: str, previous_queries: list = [],
|
| 129 |
failed_queries: list = [], focus_areas: list = []) -> str:
|
| 130 |
query_gen_input = f"Generate an effective search query for the following prompt: {prompt}\n"
|
| 131 |
-
|
| 132 |
if previous_queries:
|
| 133 |
recent_queries = previous_queries[-MAX_HISTORY_ITEMS:]
|
| 134 |
query_gen_input += "Previous search queries:\n" + "\n".join(recent_queries) + "\n"
|
| 135 |
-
|
| 136 |
if failed_queries:
|
| 137 |
query_gen_input += "These queries didn't yield useful results:\n" + "\n".join(failed_queries) + "\n"
|
| 138 |
-
|
| 139 |
if focus_areas:
|
| 140 |
query_gen_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n"
|
| 141 |
-
|
| 142 |
query_gen_input += "Refine the search query based on previous queries, aiming for more precise results.\n"
|
| 143 |
query_gen_input += "Search Query:"
|
| 144 |
-
|
| 145 |
query_gen_output = hf_inference(MAIN_LLM_ENDPOINT, query_gen_input)
|
| 146 |
|
| 147 |
if isinstance(query_gen_output, dict) and 'generated_text' in query_gen_output:
|
| 148 |
return query_gen_output['generated_text'].strip()
|
| 149 |
-
|
| 150 |
logger.error(f"Failed to generate search query: {query_gen_output}")
|
| 151 |
return ""
|
| 152 |
|
| 153 |
-
def tool_critique_reasoning(reasoning_output: str, prompt: str,
|
| 154 |
previous_critiques: list = []) -> str:
|
| 155 |
critique_input = f"Critically evaluate the following reasoning output in relation to the prompt:\n\nPrompt: {prompt}\n\nReasoning: {reasoning_output}\n\n"
|
| 156 |
-
|
| 157 |
if previous_critiques:
|
| 158 |
critique_input += "Previous critiques that should be addressed:\n" + "\n".join(previous_critiques[-MAX_HISTORY_ITEMS:]) + "\n\n"
|
| 159 |
-
|
| 160 |
critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning."
|
| 161 |
-
|
| 162 |
critique_output = hf_inference(CRITIC_LLM_ENDPOINT, critique_input)
|
| 163 |
-
|
| 164 |
if isinstance(critique_output, dict) and "generated_text" in critique_output:
|
| 165 |
return critique_output["generated_text"].strip()
|
| 166 |
-
|
| 167 |
logger.error(f"Failed to generate critique: {critique_output}")
|
| 168 |
return "Could not generate a critique due to an error."
|
| 169 |
|
| 170 |
def tool_identify_contradictions(insights: list) -> list:
|
| 171 |
if len(insights) < 2:
|
| 172 |
return []
|
| 173 |
-
|
| 174 |
contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(insights[-MAX_HISTORY_ITEMS:])
|
| 175 |
contradiction_input += "\n\nList each contradiction as a separate numbered point. If no contradictions exist, respond with 'No contradictions found.'"
|
| 176 |
-
|
| 177 |
contradiction_output = hf_inference(CRITIC_LLM_ENDPOINT, contradiction_input)
|
| 178 |
-
|
| 179 |
if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
|
| 180 |
result = contradiction_output["generated_text"].strip()
|
| 181 |
if result == "No contradictions found.":
|
| 182 |
return []
|
| 183 |
-
|
| 184 |
# Extract numbered contradictions
|
| 185 |
contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
|
| 186 |
return [c.strip() for c in contradictions if c.strip()]
|
| 187 |
-
|
| 188 |
logger.error(f"Failed to identify contradictions: {contradiction_output}")
|
| 189 |
return []
|
| 190 |
|
| 191 |
-
def tool_identify_focus_areas(prompt: str, insights: list = [],
|
| 192 |
failed_areas: list = []) -> list:
|
| 193 |
focus_input = f"Based on this research prompt: '{prompt}'\n\n"
|
| 194 |
-
|
| 195 |
if insights:
|
| 196 |
focus_input += "And these existing insights:\n" + "\n".join(insights[-3:]) + "\n\n" # Last 3 insights
|
| 197 |
-
|
| 198 |
if failed_areas:
|
| 199 |
focus_input += f"These focus areas didn't yield useful results: {', '.join(failed_areas)}\n\n"
|
| 200 |
-
|
| 201 |
focus_input += "Identify 2-3 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas."
|
| 202 |
-
|
| 203 |
focus_output = hf_inference(MAIN_LLM_ENDPOINT, focus_input)
|
| 204 |
-
|
| 205 |
if isinstance(focus_output, dict) and "generated_text" in focus_output:
|
| 206 |
result = focus_output["generated_text"].strip()
|
| 207 |
# Extract areas, assuming they're listed with numbers, bullets, or in separate lines
|
| 208 |
areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
|
| 209 |
return [area.strip() for area in areas if area.strip()][:3] # Limit to top 3
|
| 210 |
-
|
| 211 |
logger.error(f"Failed to identify focus areas: {focus_output}")
|
| 212 |
return []
|
| 213 |
|
| 214 |
def filter_results(search_results, prompt, previous_snippets=None):
|
| 215 |
if not main_similarity_model or not search_results:
|
| 216 |
return search_results
|
| 217 |
-
|
| 218 |
try:
|
| 219 |
prompt_embedding = main_similarity_model.encode(prompt, convert_to_tensor=True)
|
| 220 |
filtered_results = []
|
| 221 |
-
|
| 222 |
# Keep track of snippets we've already seen
|
| 223 |
seen_snippets = set()
|
| 224 |
if previous_snippets:
|
| 225 |
seen_snippets.update(previous_snippets)
|
| 226 |
-
|
| 227 |
for result in search_results:
|
| 228 |
combined_text = result['title'] + " " + result['snippet']
|
| 229 |
-
|
| 230 |
# Skip if we've seen this exact snippet before
|
| 231 |
if result['snippet'] in seen_snippets:
|
| 232 |
continue
|
| 233 |
-
|
| 234 |
result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
|
| 235 |
cosine_score = util.pytorch_cos_sim(prompt_embedding, result_embedding)[0][0].item()
|
| 236 |
-
|
| 237 |
if cosine_score >= SIMILARITY_THRESHOLD:
|
| 238 |
result['relevance_score'] = cosine_score
|
| 239 |
filtered_results.append(result)
|
| 240 |
seen_snippets.add(result['snippet'])
|
| 241 |
-
|
| 242 |
# Sort by relevance score
|
| 243 |
filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
| 244 |
return filtered_results
|
|
@@ -250,15 +251,15 @@ def filter_results(search_results, prompt, previous_snippets=None):
|
|
| 250 |
# New tool: Extract entities for focused research
|
| 251 |
def tool_extract_key_entities(prompt: str) -> list:
|
| 252 |
entity_input = f"Extract the key entities (people, organizations, concepts, technologies, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList only the most important 3-5 entities, one per line."
|
| 253 |
-
|
| 254 |
entity_output = hf_inference(MAIN_LLM_ENDPOINT, entity_input)
|
| 255 |
-
|
| 256 |
if isinstance(entity_output, dict) and "generated_text" in entity_output:
|
| 257 |
result = entity_output["generated_text"].strip()
|
| 258 |
# Split by lines and clean up
|
| 259 |
entities = [e.strip() for e in result.split('\n') if e.strip()]
|
| 260 |
return entities[:5] # Limit to 5 entities
|
| 261 |
-
|
| 262 |
logger.error(f"Failed to extract key entities: {entity_output}")
|
| 263 |
return []
|
| 264 |
|
|
@@ -266,20 +267,20 @@ def tool_extract_key_entities(prompt: str) -> list:
|
|
| 266 |
def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str:
|
| 267 |
if not entity_insights:
|
| 268 |
return "No entity insights to analyze."
|
| 269 |
-
|
| 270 |
meta_input = f"Perform a meta-analysis across these different entities related to the prompt: '{prompt}'\n\n"
|
| 271 |
-
|
| 272 |
for entity, insights in entity_insights.items():
|
| 273 |
if insights:
|
| 274 |
meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Just use the latest insight for each entity
|
| 275 |
-
|
| 276 |
meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences\n3. How these entities interact or influence each other\n4. The broader implications for the original research question"
|
| 277 |
-
|
| 278 |
meta_output = hf_inference(MAIN_LLM_ENDPOINT, meta_input)
|
| 279 |
-
|
| 280 |
if isinstance(meta_output, dict) and "generated_text" in meta_output:
|
| 281 |
return meta_output["generated_text"].strip()
|
| 282 |
-
|
| 283 |
logger.error(f"Failed to perform meta-analysis: {meta_output}")
|
| 284 |
return "Could not generate a meta-analysis due to an error."
|
| 285 |
|
|
@@ -384,7 +385,7 @@ Available Tools:
|
|
| 384 |
|
| 385 |
# Only include most recent context items to avoid exceeding context limits
|
| 386 |
recent_context = context[-MAX_CONTEXT_ITEMS:] if len(context) > MAX_CONTEXT_ITEMS else context
|
| 387 |
-
|
| 388 |
prompt += "\nContext (most recent items):\n"
|
| 389 |
for item in recent_context:
|
| 390 |
prompt += f"- {item}\n"
|
|
@@ -416,17 +417,17 @@ def deep_research(prompt):
|
|
| 416 |
seen_snippets = set()
|
| 417 |
contradictions = []
|
| 418 |
research_session_id = str(uuid4())
|
| 419 |
-
|
| 420 |
# Start with entity extraction for multi-pronged research
|
| 421 |
key_entities = tool_extract_key_entities(prompt=prompt)
|
| 422 |
if key_entities:
|
| 423 |
context.append(f"Identified key entities: {key_entities}")
|
| 424 |
intermediate_output += f"Identified key entities for focused research: {key_entities}\n"
|
| 425 |
-
|
| 426 |
# Tracking progress for each entity
|
| 427 |
entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
|
| 428 |
entity_progress['general'] = {'queries': [], 'insights': []} # For general research not tied to specific entities
|
| 429 |
-
|
| 430 |
for i in range(MAX_ITERATIONS):
|
| 431 |
# Decide which entity to focus on this iteration, or general research
|
| 432 |
if key_entities and i > 0:
|
|
@@ -435,9 +436,9 @@ def deep_research(prompt):
|
|
| 435 |
current_entity = entities_to_process[i % len(entities_to_process)]
|
| 436 |
else:
|
| 437 |
current_entity = 'general'
|
| 438 |
-
|
| 439 |
context.append(f"Current focus: {current_entity}")
|
| 440 |
-
|
| 441 |
# First iteration: general query and initial research
|
| 442 |
if i == 0:
|
| 443 |
initial_query = tool_generate_search_query(prompt=prompt)
|
|
@@ -446,10 +447,10 @@ def deep_research(prompt):
|
|
| 446 |
entity_progress['general']['queries'].append(initial_query)
|
| 447 |
search_results = tool_search_web(query=initial_query)
|
| 448 |
filtered_search_results = filter_results(search_results, prompt)
|
| 449 |
-
|
| 450 |
for result in filtered_search_results:
|
| 451 |
seen_snippets.add(result['snippet'])
|
| 452 |
-
|
| 453 |
if filtered_search_results:
|
| 454 |
context.append(f"Initial Search Results: {len(filtered_search_results)} items found")
|
| 455 |
reasoning_output = tool_reason(prompt, filtered_search_results)
|
|
@@ -461,7 +462,7 @@ def deep_research(prompt):
|
|
| 461 |
else:
|
| 462 |
failed_queries.append(initial_query)
|
| 463 |
context.append(f"Initial query yielded no relevant results: {initial_query}")
|
| 464 |
-
|
| 465 |
# Generate current entity-specific query if applicable
|
| 466 |
elif current_entity != 'general':
|
| 467 |
entity_query = tool_generate_search_query(
|
|
@@ -469,24 +470,24 @@ def deep_research(prompt):
|
|
| 469 |
previous_queries=entity_progress[current_entity]['queries'],
|
| 470 |
focus_areas=focus_areas
|
| 471 |
)
|
| 472 |
-
|
| 473 |
if entity_query:
|
| 474 |
previous_queries.append(entity_query)
|
| 475 |
entity_progress[current_entity]['queries'].append(entity_query)
|
| 476 |
-
|
| 477 |
# Search with entity focus
|
| 478 |
search_results = tool_search_web(query=entity_query)
|
| 479 |
-
filtered_search_results = filter_results(search_results,
|
| 480 |
f"{prompt} {current_entity}",
|
| 481 |
previous_snippets=seen_snippets)
|
| 482 |
-
|
| 483 |
# Update seen snippets
|
| 484 |
for result in filtered_search_results:
|
| 485 |
seen_snippets.add(result['snippet'])
|
| 486 |
-
|
| 487 |
if filtered_search_results:
|
| 488 |
context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
|
| 489 |
-
|
| 490 |
# Get entity-specific reasoning
|
| 491 |
entity_reasoning = tool_reason(
|
| 492 |
prompt=f"{prompt} focusing on {current_entity}",
|
|
@@ -494,29 +495,29 @@ def deep_research(prompt):
|
|
| 494 |
reasoning_context=entity_progress[current_entity]['insights'],
|
| 495 |
focus_areas=focus_areas
|
| 496 |
)
|
| 497 |
-
|
| 498 |
if entity_reasoning:
|
| 499 |
all_insights.append(entity_reasoning)
|
| 500 |
entity_progress[current_entity]['insights'].append(entity_reasoning)
|
| 501 |
-
|
| 502 |
# Store in entity-specific insights dictionary for meta-analysis
|
| 503 |
if current_entity not in entity_specific_insights:
|
| 504 |
entity_specific_insights[current_entity] = []
|
| 505 |
entity_specific_insights[current_entity].append(entity_reasoning)
|
| 506 |
-
|
| 507 |
context.append(f"Reasoning about {current_entity}: {entity_reasoning[:200]}...")
|
| 508 |
else:
|
| 509 |
failed_queries.append(entity_query)
|
| 510 |
context.append(f"Entity query for {current_entity} yielded no relevant results")
|
| 511 |
-
|
| 512 |
# Generate LLM decision for next tool
|
| 513 |
llm_prompt = create_prompt(task_description, prompt, tools, context)
|
| 514 |
llm_response = hf_inference(MAIN_LLM_ENDPOINT, llm_prompt)
|
| 515 |
-
|
| 516 |
if isinstance(llm_response, dict) and "error" in llm_response:
|
| 517 |
intermediate_output += f"LLM Error: {llm_response['error']}\n"
|
| 518 |
continue
|
| 519 |
-
|
| 520 |
if not isinstance(llm_response, dict) or "generated_text" not in llm_response:
|
| 521 |
intermediate_output += "Error: Invalid LLM response.\n"
|
| 522 |
continue
|
|
@@ -554,32 +555,32 @@ def deep_research(prompt):
|
|
| 554 |
parameters['failed_queries'] = failed_queries
|
| 555 |
parameters['focus_areas'] = focus_areas
|
| 556 |
result = tool["function"](**parameters)
|
| 557 |
-
|
| 558 |
if current_entity != 'general':
|
| 559 |
entity_progress[current_entity]['queries'].append(result)
|
| 560 |
-
|
| 561 |
previous_queries.append(result)
|
| 562 |
-
|
| 563 |
elif tool_name == "reason":
|
| 564 |
if current_entity != 'general' and 'reasoning_context' not in parameters:
|
| 565 |
parameters['reasoning_context'] = entity_progress[current_entity]['insights']
|
| 566 |
elif 'reasoning_context' not in parameters:
|
| 567 |
parameters['reasoning_context'] = reasoning_context[:]
|
| 568 |
-
|
| 569 |
if 'prompt' not in parameters:
|
| 570 |
if current_entity != 'general':
|
| 571 |
parameters['prompt'] = f"{prompt} focusing on {current_entity}"
|
| 572 |
else:
|
| 573 |
parameters['prompt'] = prompt
|
| 574 |
-
|
| 575 |
if 'search_results' not in parameters:
|
| 576 |
parameters['search_results'] = []
|
| 577 |
-
|
| 578 |
if 'focus_areas' not in parameters and focus_areas:
|
| 579 |
parameters['focus_areas'] = focus_areas
|
| 580 |
-
|
| 581 |
result = tool["function"](**parameters)
|
| 582 |
-
|
| 583 |
if current_entity != 'general':
|
| 584 |
entity_progress[current_entity]['insights'].append(result)
|
| 585 |
if current_entity not in entity_specific_insights:
|
|
@@ -587,48 +588,48 @@ def deep_research(prompt):
|
|
| 587 |
entity_specific_insights[current_entity].append(result)
|
| 588 |
else:
|
| 589 |
reasoning_context.append(result)
|
| 590 |
-
|
| 591 |
all_insights.append(result)
|
| 592 |
-
|
| 593 |
elif tool_name == "search_web":
|
| 594 |
result = tool_search_web(**parameters)
|
| 595 |
-
filtered_result = filter_results(result,
|
| 596 |
prompt if current_entity == 'general' else f"{prompt} {current_entity}",
|
| 597 |
previous_snippets=seen_snippets)
|
| 598 |
-
|
| 599 |
# Update seen snippets
|
| 600 |
for r in filtered_result:
|
| 601 |
seen_snippets.add(r['snippet'])
|
| 602 |
-
|
| 603 |
result = filtered_result
|
| 604 |
-
|
| 605 |
if not result:
|
| 606 |
query = parameters.get('query', '')
|
| 607 |
if query:
|
| 608 |
failed_queries.append(query)
|
| 609 |
-
|
| 610 |
elif tool_name == "critique_reasoning":
|
| 611 |
if 'previous_critiques' not in parameters:
|
| 612 |
parameters['previous_critiques'] = previous_critiques
|
| 613 |
-
|
| 614 |
if all_insights:
|
| 615 |
if 'reasoning_output' not in parameters:
|
| 616 |
parameters['reasoning_output'] = all_insights[-1]
|
| 617 |
if 'prompt' not in parameters:
|
| 618 |
parameters['prompt'] = prompt
|
| 619 |
-
|
| 620 |
result = tool["function"](**parameters)
|
| 621 |
previous_critiques.append(result)
|
| 622 |
context.append(f"Critique: {result[:200]}...")
|
| 623 |
else:
|
| 624 |
result = "No reasoning to critique yet."
|
| 625 |
-
|
| 626 |
elif tool_name == "identify_contradictions":
|
| 627 |
result = tool["function"](**parameters)
|
| 628 |
if result:
|
| 629 |
contradictions = result # Store for later use in summarization
|
| 630 |
context.append(f"Identified contradictions: {result}")
|
| 631 |
-
|
| 632 |
elif tool_name == "identify_focus_areas":
|
| 633 |
if 'failed_areas' not in parameters:
|
| 634 |
parameters['failed_areas'] = failed_areas
|
|
@@ -639,7 +640,7 @@ def deep_research(prompt):
|
|
| 639 |
focus_areas = result
|
| 640 |
failed_areas.extend([area for area in old_focus if area not in result])
|
| 641 |
context.append(f"New focus areas: {result}")
|
| 642 |
-
|
| 643 |
elif tool_name == "meta_analyze":
|
| 644 |
if 'entity_insights' not in parameters:
|
| 645 |
parameters['entity_insights'] = entity_specific_insights
|
|
@@ -649,7 +650,7 @@ def deep_research(prompt):
|
|
| 649 |
if result:
|
| 650 |
all_insights.append(result) # Add meta-analysis to insights
|
| 651 |
context.append(f"Meta-analysis across entities: {result[:200]}...")
|
| 652 |
-
|
| 653 |
else:
|
| 654 |
result = tool["function"](**parameters)
|
| 655 |
|
|
@@ -657,9 +658,9 @@ def deep_research(prompt):
|
|
| 657 |
result_str = str(result)
|
| 658 |
if len(result_str) > 500:
|
| 659 |
result_str = result_str[:500] + "..."
|
| 660 |
-
|
| 661 |
intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
|
| 662 |
-
|
| 663 |
# Add truncated result to context
|
| 664 |
result_context = result_str
|
| 665 |
if len(result_str) > 300: # Even shorter for context
|
|
@@ -669,8 +670,7 @@ def deep_research(prompt):
|
|
| 669 |
except Exception as e:
|
| 670 |
logger.error(f"Error with {tool_name}: {str(e)}")
|
| 671 |
context.append(f"Error with {tool_name}: {str(e)}")
|
| 672 |
-
intermediate_output += f"Iteration {i+1} - Error: {str(e)}\
|
| 673 |
-
continue
|
| 674 |
|
| 675 |
# Perform final meta-analysis if we have entity-specific insights
|
| 676 |
if len(entity_specific_insights) > 1 and len(all_insights) > 2:
|
|
@@ -687,26 +687,26 @@ def deep_research(prompt):
|
|
| 687 |
|
| 688 |
# Prepare the full output with detailed tracking
|
| 689 |
full_output = f"**Research Prompt:** {prompt}\n\n"
|
| 690 |
-
|
| 691 |
if key_entities:
|
| 692 |
full_output += f"**Key Entities Identified:** {', '.join(key_entities)}\n\n"
|
| 693 |
-
|
| 694 |
full_output += "**Research Process:**\n" + intermediate_output + "\n"
|
| 695 |
-
|
| 696 |
if contradictions:
|
| 697 |
full_output += "**Contradictions Identified:**\n"
|
| 698 |
for i, contradiction in enumerate(contradictions, 1):
|
| 699 |
full_output += f"{i}. {contradiction}\n"
|
| 700 |
full_output += "\n"
|
| 701 |
-
|
| 702 |
full_output += f"**Final Analysis:**\n{final_result}\n\n"
|
| 703 |
-
|
| 704 |
# Add session info for potential follow-up
|
| 705 |
full_output += f"Research Session ID: {research_session_id}\n"
|
| 706 |
full_output += f"Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
|
| 707 |
full_output += f"Total iterations: {i+1}\n"
|
| 708 |
full_output += f"Total insights generated: {len(all_insights)}\n"
|
| 709 |
-
|
| 710 |
return full_output
|
| 711 |
|
| 712 |
# Create CSS for a more professional look
|
|
@@ -750,19 +750,20 @@ iface = gr.Interface(
|
|
| 750 |
["Analyze the environmental and social impacts of lithium mining for electric vehicle batteries."],
|
| 751 |
["How has artificial intelligence influenced medical diagnostics in the past five years, and what are the ethical considerations?"]
|
| 752 |
],
|
| 753 |
-
theme="default",
|
| 754 |
css=custom_css,
|
| 755 |
allow_flagging=False,
|
| 756 |
analytics_enabled=False,
|
| 757 |
)
|
| 758 |
|
| 759 |
-
# Add footer with additional information
|
| 760 |
footer_html = """
|
| 761 |
<div class="footer">
|
| 762 |
<p>This research assistant performs advanced multi-stage analysis using natural language processing and web search.</p>
|
| 763 |
<p>Results should be verified with additional sources. Not suitable for medical, legal, or emergency use.</p>
|
| 764 |
</div>
|
| 765 |
"""
|
|
|
|
| 766 |
|
| 767 |
# Launch the interface
|
| 768 |
iface.launch(share=False)
|
|
|
|
| 6 |
import re
|
| 7 |
from uuid import uuid4
|
| 8 |
from datetime import datetime
|
| 9 |
+
from duckduckgo_search import DDGS # Corrected import
|
| 10 |
from sentence_transformers import SentenceTransformer, util
|
| 11 |
from typing import List, Dict, Any, Optional, Union, Tuple
|
| 12 |
import logging
|
|
|
|
| 24 |
raise ValueError("Please set the HF_API_KEY environment variable.")
|
| 25 |
|
| 26 |
# You can use different models for different tasks
|
| 27 |
+
MAIN_LLM_ENDPOINT = "your-main-llm-endpoint" # Replace with your actual endpoint
|
| 28 |
REASONING_LLM_ENDPOINT = "your-reasoning-llm-endpoint" # Can be the same as main if needed
|
| 29 |
CRITIC_LLM_ENDPOINT = "your-critic-llm-endpoint" # Can be the same as main if needed
|
| 30 |
|
|
|
|
| 48 |
def hf_inference(endpoint, inputs, parameters=None, retries=5):
|
| 49 |
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
|
| 50 |
payload = {"inputs": inputs, "parameters": parameters or {}}
|
| 51 |
+
|
| 52 |
for attempt in range(retries):
|
| 53 |
try:
|
| 54 |
response = requests.post(endpoint, headers=headers, json=payload, timeout=TIMEOUT)
|
|
|
|
| 61 |
time.sleep(RETRY_DELAY * (1 + attempt)) # Exponential backoff
|
| 62 |
return {"error": "Request failed after multiple retries."}
|
| 63 |
|
| 64 |
+
def tool_search_web(query: str, num_results: int = NUM_RESULTS, safesearch: str = "moderate",
|
| 65 |
time_filter: str = "", region: str = "wt-wt", language: str = "en-us") -> list:
|
| 66 |
try:
|
| 67 |
+
with DDGS() as ddgs: # Use the DDGS context manager
|
| 68 |
+
results = [r for r in ddgs.text(query, max_results=num_results, safesearch=safesearch,
|
| 69 |
+
time=time_filter, region=region, hreflang=language)] #Simplified call
|
| 70 |
+
if results:
|
| 71 |
+
return [{"title": r["title"], "snippet": r["body"], "url": r["href"]} for r in results]
|
| 72 |
+
else:
|
| 73 |
+
return []
|
| 74 |
except Exception as e:
|
| 75 |
logger.error(f"DuckDuckGo search error: {e}")
|
| 76 |
return []
|
| 77 |
|
| 78 |
+
def tool_reason(prompt: str, search_results: list, reasoning_context: list = [],
|
| 79 |
critique: str = "", focus_areas: list = []) -> str:
|
| 80 |
if not search_results:
|
| 81 |
return "No search results to reason about."
|
| 82 |
+
|
| 83 |
reasoning_input = "Reason about the following search results in relation to the prompt:\n\n"
|
| 84 |
reasoning_input += f"Prompt: {prompt}\n\n"
|
| 85 |
+
|
| 86 |
if focus_areas:
|
| 87 |
reasoning_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n\n"
|
| 88 |
+
|
| 89 |
for i, result in enumerate(search_results):
|
| 90 |
reasoning_input += f"- Result {i + 1}: Title: {result['title']}, Snippet: {result['snippet']}\n"
|
| 91 |
+
|
| 92 |
if reasoning_context:
|
| 93 |
recent_context = reasoning_context[-MAX_HISTORY_ITEMS:]
|
| 94 |
reasoning_input += "\nPrevious Reasoning Context:\n" + "\n".join(recent_context)
|
| 95 |
+
|
| 96 |
if critique:
|
| 97 |
reasoning_input += f"\n\nRecent critique to address: {critique}\n"
|
| 98 |
+
|
| 99 |
reasoning_input += "\nProvide a thorough, nuanced analysis that builds upon previous reasoning if applicable. Consider multiple perspectives and potential contradictions in the search results."
|
| 100 |
|
| 101 |
reasoning_output = hf_inference(REASONING_LLM_ENDPOINT, reasoning_input)
|
|
|
|
| 109 |
def tool_summarize(insights: list, prompt: str, contradictions: list = []) -> str:
|
| 110 |
if not insights:
|
| 111 |
return "No insights to summarize."
|
| 112 |
+
|
| 113 |
summarization_input = f"Synthesize the following insights into a cohesive and comprehensive summary regarding: '{prompt}'\n\n"
|
| 114 |
summarization_input += "\n\n".join(insights[-MAX_HISTORY_ITEMS:]) # Only use most recent insights
|
| 115 |
+
|
| 116 |
if contradictions:
|
| 117 |
summarization_input += "\n\nAddress these specific contradictions:\n" + "\n".join(contradictions)
|
| 118 |
+
|
| 119 |
summarization_input += "\n\nProvide a well-structured summary that:\n1. Presents the main findings\n2. Acknowledges limitations and uncertainties\n3. Highlights areas of consensus and disagreement\n4. Suggests potential directions for further inquiry"
|
| 120 |
+
|
| 121 |
summarization_output = hf_inference(MAIN_LLM_ENDPOINT, summarization_input)
|
| 122 |
|
| 123 |
if isinstance(summarization_output, dict) and "generated_text" in summarization_output:
|
|
|
|
| 126 |
logger.error(f"Failed to generate summary: {summarization_output}")
|
| 127 |
return "Could not generate a summary due to an error."
|
| 128 |
|
| 129 |
+
def tool_generate_search_query(prompt: str, previous_queries: list = [],
|
| 130 |
failed_queries: list = [], focus_areas: list = []) -> str:
|
| 131 |
query_gen_input = f"Generate an effective search query for the following prompt: {prompt}\n"
|
| 132 |
+
|
| 133 |
if previous_queries:
|
| 134 |
recent_queries = previous_queries[-MAX_HISTORY_ITEMS:]
|
| 135 |
query_gen_input += "Previous search queries:\n" + "\n".join(recent_queries) + "\n"
|
| 136 |
+
|
| 137 |
if failed_queries:
|
| 138 |
query_gen_input += "These queries didn't yield useful results:\n" + "\n".join(failed_queries) + "\n"
|
| 139 |
+
|
| 140 |
if focus_areas:
|
| 141 |
query_gen_input += f"Focus particularly on these aspects: {', '.join(focus_areas)}\n"
|
| 142 |
+
|
| 143 |
query_gen_input += "Refine the search query based on previous queries, aiming for more precise results.\n"
|
| 144 |
query_gen_input += "Search Query:"
|
| 145 |
+
|
| 146 |
query_gen_output = hf_inference(MAIN_LLM_ENDPOINT, query_gen_input)
|
| 147 |
|
| 148 |
if isinstance(query_gen_output, dict) and 'generated_text' in query_gen_output:
|
| 149 |
return query_gen_output['generated_text'].strip()
|
| 150 |
+
|
| 151 |
logger.error(f"Failed to generate search query: {query_gen_output}")
|
| 152 |
return ""
|
| 153 |
|
| 154 |
+
def tool_critique_reasoning(reasoning_output: str, prompt: str,
|
| 155 |
previous_critiques: list = []) -> str:
|
| 156 |
critique_input = f"Critically evaluate the following reasoning output in relation to the prompt:\n\nPrompt: {prompt}\n\nReasoning: {reasoning_output}\n\n"
|
| 157 |
+
|
| 158 |
if previous_critiques:
|
| 159 |
critique_input += "Previous critiques that should be addressed:\n" + "\n".join(previous_critiques[-MAX_HISTORY_ITEMS:]) + "\n\n"
|
| 160 |
+
|
| 161 |
critique_input += "Identify any flaws, biases, logical fallacies, unsupported claims, or areas for improvement. Be specific and constructive. Suggest concrete ways to enhance the reasoning."
|
| 162 |
+
|
| 163 |
critique_output = hf_inference(CRITIC_LLM_ENDPOINT, critique_input)
|
| 164 |
+
|
| 165 |
if isinstance(critique_output, dict) and "generated_text" in critique_output:
|
| 166 |
return critique_output["generated_text"].strip()
|
| 167 |
+
|
| 168 |
logger.error(f"Failed to generate critique: {critique_output}")
|
| 169 |
return "Could not generate a critique due to an error."
|
| 170 |
|
| 171 |
def tool_identify_contradictions(insights: list) -> list:
|
| 172 |
if len(insights) < 2:
|
| 173 |
return []
|
| 174 |
+
|
| 175 |
contradiction_input = "Identify specific contradictions in these insights:\n\n" + "\n\n".join(insights[-MAX_HISTORY_ITEMS:])
|
| 176 |
contradiction_input += "\n\nList each contradiction as a separate numbered point. If no contradictions exist, respond with 'No contradictions found.'"
|
| 177 |
+
|
| 178 |
contradiction_output = hf_inference(CRITIC_LLM_ENDPOINT, contradiction_input)
|
| 179 |
+
|
| 180 |
if isinstance(contradiction_output, dict) and "generated_text" in contradiction_output:
|
| 181 |
result = contradiction_output["generated_text"].strip()
|
| 182 |
if result == "No contradictions found.":
|
| 183 |
return []
|
| 184 |
+
|
| 185 |
# Extract numbered contradictions
|
| 186 |
contradictions = re.findall(r'\d+\.\s+(.*?)(?=\d+\.|$)', result, re.DOTALL)
|
| 187 |
return [c.strip() for c in contradictions if c.strip()]
|
| 188 |
+
|
| 189 |
logger.error(f"Failed to identify contradictions: {contradiction_output}")
|
| 190 |
return []
|
| 191 |
|
| 192 |
+
def tool_identify_focus_areas(prompt: str, insights: list = [],
|
| 193 |
failed_areas: list = []) -> list:
|
| 194 |
focus_input = f"Based on this research prompt: '{prompt}'\n\n"
|
| 195 |
+
|
| 196 |
if insights:
|
| 197 |
focus_input += "And these existing insights:\n" + "\n".join(insights[-3:]) + "\n\n" # Last 3 insights
|
| 198 |
+
|
| 199 |
if failed_areas:
|
| 200 |
focus_input += f"These focus areas didn't yield useful results: {', '.join(failed_areas)}\n\n"
|
| 201 |
+
|
| 202 |
focus_input += "Identify 2-3 specific aspects that should be investigated further to get a complete understanding. Be precise and prioritize underexplored areas."
|
| 203 |
+
|
| 204 |
focus_output = hf_inference(MAIN_LLM_ENDPOINT, focus_input)
|
| 205 |
+
|
| 206 |
if isinstance(focus_output, dict) and "generated_text" in focus_output:
|
| 207 |
result = focus_output["generated_text"].strip()
|
| 208 |
# Extract areas, assuming they're listed with numbers, bullets, or in separate lines
|
| 209 |
areas = re.findall(r'(?:^|\n)(?:\d+\.|\*|\-)\s*(.*?)(?=(?:\n(?:\d+\.|\*|\-|$))|$)', result)
|
| 210 |
return [area.strip() for area in areas if area.strip()][:3] # Limit to top 3
|
| 211 |
+
|
| 212 |
logger.error(f"Failed to identify focus areas: {focus_output}")
|
| 213 |
return []
|
| 214 |
|
| 215 |
def filter_results(search_results, prompt, previous_snippets=None):
|
| 216 |
if not main_similarity_model or not search_results:
|
| 217 |
return search_results
|
| 218 |
+
|
| 219 |
try:
|
| 220 |
prompt_embedding = main_similarity_model.encode(prompt, convert_to_tensor=True)
|
| 221 |
filtered_results = []
|
| 222 |
+
|
| 223 |
# Keep track of snippets we've already seen
|
| 224 |
seen_snippets = set()
|
| 225 |
if previous_snippets:
|
| 226 |
seen_snippets.update(previous_snippets)
|
| 227 |
+
|
| 228 |
for result in search_results:
|
| 229 |
combined_text = result['title'] + " " + result['snippet']
|
| 230 |
+
|
| 231 |
# Skip if we've seen this exact snippet before
|
| 232 |
if result['snippet'] in seen_snippets:
|
| 233 |
continue
|
| 234 |
+
|
| 235 |
result_embedding = main_similarity_model.encode(combined_text, convert_to_tensor=True)
|
| 236 |
cosine_score = util.pytorch_cos_sim(prompt_embedding, result_embedding)[0][0].item()
|
| 237 |
+
|
| 238 |
if cosine_score >= SIMILARITY_THRESHOLD:
|
| 239 |
result['relevance_score'] = cosine_score
|
| 240 |
filtered_results.append(result)
|
| 241 |
seen_snippets.add(result['snippet'])
|
| 242 |
+
|
| 243 |
# Sort by relevance score
|
| 244 |
filtered_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
| 245 |
return filtered_results
|
|
|
|
| 251 |
# New tool: Extract entities for focused research
|
| 252 |
def tool_extract_key_entities(prompt: str) -> list:
|
| 253 |
entity_input = f"Extract the key entities (people, organizations, concepts, technologies, etc.) from this research prompt that should be investigated individually:\n\n{prompt}\n\nList only the most important 3-5 entities, one per line."
|
| 254 |
+
|
| 255 |
entity_output = hf_inference(MAIN_LLM_ENDPOINT, entity_input)
|
| 256 |
+
|
| 257 |
if isinstance(entity_output, dict) and "generated_text" in entity_output:
|
| 258 |
result = entity_output["generated_text"].strip()
|
| 259 |
# Split by lines and clean up
|
| 260 |
entities = [e.strip() for e in result.split('\n') if e.strip()]
|
| 261 |
return entities[:5] # Limit to 5 entities
|
| 262 |
+
|
| 263 |
logger.error(f"Failed to extract key entities: {entity_output}")
|
| 264 |
return []
|
| 265 |
|
|
|
|
| 267 |
def tool_meta_analyze(entity_insights: Dict[str, list], prompt: str) -> str:
|
| 268 |
if not entity_insights:
|
| 269 |
return "No entity insights to analyze."
|
| 270 |
+
|
| 271 |
meta_input = f"Perform a meta-analysis across these different entities related to the prompt: '{prompt}'\n\n"
|
| 272 |
+
|
| 273 |
for entity, insights in entity_insights.items():
|
| 274 |
if insights:
|
| 275 |
meta_input += f"\n--- {entity} ---\n" + insights[-1] + "\n" # Just use the latest insight for each entity
|
| 276 |
+
|
| 277 |
meta_input += "\nProvide a high-level synthesis that identifies:\n1. Common themes across entities\n2. Important differences\n3. How these entities interact or influence each other\n4. The broader implications for the original research question"
|
| 278 |
+
|
| 279 |
meta_output = hf_inference(MAIN_LLM_ENDPOINT, meta_input)
|
| 280 |
+
|
| 281 |
if isinstance(meta_output, dict) and "generated_text" in meta_output:
|
| 282 |
return meta_output["generated_text"].strip()
|
| 283 |
+
|
| 284 |
logger.error(f"Failed to perform meta-analysis: {meta_output}")
|
| 285 |
return "Could not generate a meta-analysis due to an error."
|
| 286 |
|
|
|
|
| 385 |
|
| 386 |
# Only include most recent context items to avoid exceeding context limits
|
| 387 |
recent_context = context[-MAX_CONTEXT_ITEMS:] if len(context) > MAX_CONTEXT_ITEMS else context
|
| 388 |
+
|
| 389 |
prompt += "\nContext (most recent items):\n"
|
| 390 |
for item in recent_context:
|
| 391 |
prompt += f"- {item}\n"
|
|
|
|
| 417 |
seen_snippets = set()
|
| 418 |
contradictions = []
|
| 419 |
research_session_id = str(uuid4())
|
| 420 |
+
|
| 421 |
# Start with entity extraction for multi-pronged research
|
| 422 |
key_entities = tool_extract_key_entities(prompt=prompt)
|
| 423 |
if key_entities:
|
| 424 |
context.append(f"Identified key entities: {key_entities}")
|
| 425 |
intermediate_output += f"Identified key entities for focused research: {key_entities}\n"
|
| 426 |
+
|
| 427 |
# Tracking progress for each entity
|
| 428 |
entity_progress = {entity: {'queries': [], 'insights': []} for entity in key_entities}
|
| 429 |
entity_progress['general'] = {'queries': [], 'insights': []} # For general research not tied to specific entities
|
| 430 |
+
|
| 431 |
for i in range(MAX_ITERATIONS):
|
| 432 |
# Decide which entity to focus on this iteration, or general research
|
| 433 |
if key_entities and i > 0:
|
|
|
|
| 436 |
current_entity = entities_to_process[i % len(entities_to_process)]
|
| 437 |
else:
|
| 438 |
current_entity = 'general'
|
| 439 |
+
|
| 440 |
context.append(f"Current focus: {current_entity}")
|
| 441 |
+
|
| 442 |
# First iteration: general query and initial research
|
| 443 |
if i == 0:
|
| 444 |
initial_query = tool_generate_search_query(prompt=prompt)
|
|
|
|
| 447 |
entity_progress['general']['queries'].append(initial_query)
|
| 448 |
search_results = tool_search_web(query=initial_query)
|
| 449 |
filtered_search_results = filter_results(search_results, prompt)
|
| 450 |
+
|
| 451 |
for result in filtered_search_results:
|
| 452 |
seen_snippets.add(result['snippet'])
|
| 453 |
+
|
| 454 |
if filtered_search_results:
|
| 455 |
context.append(f"Initial Search Results: {len(filtered_search_results)} items found")
|
| 456 |
reasoning_output = tool_reason(prompt, filtered_search_results)
|
|
|
|
| 462 |
else:
|
| 463 |
failed_queries.append(initial_query)
|
| 464 |
context.append(f"Initial query yielded no relevant results: {initial_query}")
|
| 465 |
+
|
| 466 |
# Generate current entity-specific query if applicable
|
| 467 |
elif current_entity != 'general':
|
| 468 |
entity_query = tool_generate_search_query(
|
|
|
|
| 470 |
previous_queries=entity_progress[current_entity]['queries'],
|
| 471 |
focus_areas=focus_areas
|
| 472 |
)
|
| 473 |
+
|
| 474 |
if entity_query:
|
| 475 |
previous_queries.append(entity_query)
|
| 476 |
entity_progress[current_entity]['queries'].append(entity_query)
|
| 477 |
+
|
| 478 |
# Search with entity focus
|
| 479 |
search_results = tool_search_web(query=entity_query)
|
| 480 |
+
filtered_search_results = filter_results(search_results,
|
| 481 |
f"{prompt} {current_entity}",
|
| 482 |
previous_snippets=seen_snippets)
|
| 483 |
+
|
| 484 |
# Update seen snippets
|
| 485 |
for result in filtered_search_results:
|
| 486 |
seen_snippets.add(result['snippet'])
|
| 487 |
+
|
| 488 |
if filtered_search_results:
|
| 489 |
context.append(f"Entity Search for {current_entity}: {len(filtered_search_results)} results")
|
| 490 |
+
|
| 491 |
# Get entity-specific reasoning
|
| 492 |
entity_reasoning = tool_reason(
|
| 493 |
prompt=f"{prompt} focusing on {current_entity}",
|
|
|
|
| 495 |
reasoning_context=entity_progress[current_entity]['insights'],
|
| 496 |
focus_areas=focus_areas
|
| 497 |
)
|
| 498 |
+
|
| 499 |
if entity_reasoning:
|
| 500 |
all_insights.append(entity_reasoning)
|
| 501 |
entity_progress[current_entity]['insights'].append(entity_reasoning)
|
| 502 |
+
|
| 503 |
# Store in entity-specific insights dictionary for meta-analysis
|
| 504 |
if current_entity not in entity_specific_insights:
|
| 505 |
entity_specific_insights[current_entity] = []
|
| 506 |
entity_specific_insights[current_entity].append(entity_reasoning)
|
| 507 |
+
|
| 508 |
context.append(f"Reasoning about {current_entity}: {entity_reasoning[:200]}...")
|
| 509 |
else:
|
| 510 |
failed_queries.append(entity_query)
|
| 511 |
context.append(f"Entity query for {current_entity} yielded no relevant results")
|
| 512 |
+
|
| 513 |
# Generate LLM decision for next tool
|
| 514 |
llm_prompt = create_prompt(task_description, prompt, tools, context)
|
| 515 |
llm_response = hf_inference(MAIN_LLM_ENDPOINT, llm_prompt)
|
| 516 |
+
|
| 517 |
if isinstance(llm_response, dict) and "error" in llm_response:
|
| 518 |
intermediate_output += f"LLM Error: {llm_response['error']}\n"
|
| 519 |
continue
|
| 520 |
+
|
| 521 |
if not isinstance(llm_response, dict) or "generated_text" not in llm_response:
|
| 522 |
intermediate_output += "Error: Invalid LLM response.\n"
|
| 523 |
continue
|
|
|
|
| 555 |
parameters['failed_queries'] = failed_queries
|
| 556 |
parameters['focus_areas'] = focus_areas
|
| 557 |
result = tool["function"](**parameters)
|
| 558 |
+
|
| 559 |
if current_entity != 'general':
|
| 560 |
entity_progress[current_entity]['queries'].append(result)
|
| 561 |
+
|
| 562 |
previous_queries.append(result)
|
| 563 |
+
|
| 564 |
elif tool_name == "reason":
|
| 565 |
if current_entity != 'general' and 'reasoning_context' not in parameters:
|
| 566 |
parameters['reasoning_context'] = entity_progress[current_entity]['insights']
|
| 567 |
elif 'reasoning_context' not in parameters:
|
| 568 |
parameters['reasoning_context'] = reasoning_context[:]
|
| 569 |
+
|
| 570 |
if 'prompt' not in parameters:
|
| 571 |
if current_entity != 'general':
|
| 572 |
parameters['prompt'] = f"{prompt} focusing on {current_entity}"
|
| 573 |
else:
|
| 574 |
parameters['prompt'] = prompt
|
| 575 |
+
|
| 576 |
if 'search_results' not in parameters:
|
| 577 |
parameters['search_results'] = []
|
| 578 |
+
|
| 579 |
if 'focus_areas' not in parameters and focus_areas:
|
| 580 |
parameters['focus_areas'] = focus_areas
|
| 581 |
+
|
| 582 |
result = tool["function"](**parameters)
|
| 583 |
+
|
| 584 |
if current_entity != 'general':
|
| 585 |
entity_progress[current_entity]['insights'].append(result)
|
| 586 |
if current_entity not in entity_specific_insights:
|
|
|
|
| 588 |
entity_specific_insights[current_entity].append(result)
|
| 589 |
else:
|
| 590 |
reasoning_context.append(result)
|
| 591 |
+
|
| 592 |
all_insights.append(result)
|
| 593 |
+
|
| 594 |
elif tool_name == "search_web":
|
| 595 |
result = tool_search_web(**parameters)
|
| 596 |
+
filtered_result = filter_results(result,
|
| 597 |
prompt if current_entity == 'general' else f"{prompt} {current_entity}",
|
| 598 |
previous_snippets=seen_snippets)
|
| 599 |
+
|
| 600 |
# Update seen snippets
|
| 601 |
for r in filtered_result:
|
| 602 |
seen_snippets.add(r['snippet'])
|
| 603 |
+
|
| 604 |
result = filtered_result
|
| 605 |
+
|
| 606 |
if not result:
|
| 607 |
query = parameters.get('query', '')
|
| 608 |
if query:
|
| 609 |
failed_queries.append(query)
|
| 610 |
+
|
| 611 |
elif tool_name == "critique_reasoning":
|
| 612 |
if 'previous_critiques' not in parameters:
|
| 613 |
parameters['previous_critiques'] = previous_critiques
|
| 614 |
+
|
| 615 |
if all_insights:
|
| 616 |
if 'reasoning_output' not in parameters:
|
| 617 |
parameters['reasoning_output'] = all_insights[-1]
|
| 618 |
if 'prompt' not in parameters:
|
| 619 |
parameters['prompt'] = prompt
|
| 620 |
+
|
| 621 |
result = tool["function"](**parameters)
|
| 622 |
previous_critiques.append(result)
|
| 623 |
context.append(f"Critique: {result[:200]}...")
|
| 624 |
else:
|
| 625 |
result = "No reasoning to critique yet."
|
| 626 |
+
|
| 627 |
elif tool_name == "identify_contradictions":
|
| 628 |
result = tool["function"](**parameters)
|
| 629 |
if result:
|
| 630 |
contradictions = result # Store for later use in summarization
|
| 631 |
context.append(f"Identified contradictions: {result}")
|
| 632 |
+
|
| 633 |
elif tool_name == "identify_focus_areas":
|
| 634 |
if 'failed_areas' not in parameters:
|
| 635 |
parameters['failed_areas'] = failed_areas
|
|
|
|
| 640 |
focus_areas = result
|
| 641 |
failed_areas.extend([area for area in old_focus if area not in result])
|
| 642 |
context.append(f"New focus areas: {result}")
|
| 643 |
+
|
| 644 |
elif tool_name == "meta_analyze":
|
| 645 |
if 'entity_insights' not in parameters:
|
| 646 |
parameters['entity_insights'] = entity_specific_insights
|
|
|
|
| 650 |
if result:
|
| 651 |
all_insights.append(result) # Add meta-analysis to insights
|
| 652 |
context.append(f"Meta-analysis across entities: {result[:200]}...")
|
| 653 |
+
|
| 654 |
else:
|
| 655 |
result = tool["function"](**parameters)
|
| 656 |
|
|
|
|
| 658 |
result_str = str(result)
|
| 659 |
if len(result_str) > 500:
|
| 660 |
result_str = result_str[:500] + "..."
|
| 661 |
+
|
| 662 |
intermediate_output += f"Iteration {i+1} - Result: {result_str}\n"
|
| 663 |
+
|
| 664 |
# Add truncated result to context
|
| 665 |
result_context = result_str
|
| 666 |
if len(result_str) > 300: # Even shorter for context
|
|
|
|
| 670 |
except Exception as e:
|
| 671 |
logger.error(f"Error with {tool_name}: {str(e)}")
|
| 672 |
context.append(f"Error with {tool_name}: {str(e)}")
|
| 673 |
+
intermediate_output += f"Iteration {i+1} - Error: {str(e)}\ continue
|
|
|
|
| 674 |
|
| 675 |
# Perform final meta-analysis if we have entity-specific insights
|
| 676 |
if len(entity_specific_insights) > 1 and len(all_insights) > 2:
|
|
|
|
| 687 |
|
| 688 |
# Prepare the full output with detailed tracking
|
| 689 |
full_output = f"**Research Prompt:** {prompt}\n\n"
|
| 690 |
+
|
| 691 |
if key_entities:
|
| 692 |
full_output += f"**Key Entities Identified:** {', '.join(key_entities)}\n\n"
|
| 693 |
+
|
| 694 |
full_output += "**Research Process:**\n" + intermediate_output + "\n"
|
| 695 |
+
|
| 696 |
if contradictions:
|
| 697 |
full_output += "**Contradictions Identified:**\n"
|
| 698 |
for i, contradiction in enumerate(contradictions, 1):
|
| 699 |
full_output += f"{i}. {contradiction}\n"
|
| 700 |
full_output += "\n"
|
| 701 |
+
|
| 702 |
full_output += f"**Final Analysis:**\n{final_result}\n\n"
|
| 703 |
+
|
| 704 |
# Add session info for potential follow-up
|
| 705 |
full_output += f"Research Session ID: {research_session_id}\n"
|
| 706 |
full_output += f"Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
|
| 707 |
full_output += f"Total iterations: {i+1}\n"
|
| 708 |
full_output += f"Total insights generated: {len(all_insights)}\n"
|
| 709 |
+
|
| 710 |
return full_output
|
| 711 |
|
| 712 |
# Create CSS for a more professional look
|
|
|
|
| 750 |
["Analyze the environmental and social impacts of lithium mining for electric vehicle batteries."],
|
| 751 |
["How has artificial intelligence influenced medical diagnostics in the past five years, and what are the ethical considerations?"]
|
| 752 |
],
|
| 753 |
+
theme="default", # gr.themes.Base() is more explicit, but "default" also works
|
| 754 |
css=custom_css,
|
| 755 |
allow_flagging=False,
|
| 756 |
analytics_enabled=False,
|
| 757 |
)
|
| 758 |
|
| 759 |
+
# Add footer with additional information (Optional, good for context)
|
| 760 |
footer_html = """
|
| 761 |
<div class="footer">
|
| 762 |
<p>This research assistant performs advanced multi-stage analysis using natural language processing and web search.</p>
|
| 763 |
<p>Results should be verified with additional sources. Not suitable for medical, legal, or emergency use.</p>
|
| 764 |
</div>
|
| 765 |
"""
|
| 766 |
+
#iface = iface.add_html(footer_html) #gr.Interface object has no attribute add_html
|
| 767 |
|
| 768 |
# Launch the interface
|
| 769 |
iface.launch(share=False)
|