pradyML commited on
Commit
384d517
Β·
1 Parent(s): b34c33b

Final Submission

Browse files
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app.py +729 -225
  3. key.txt +0 -2
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ key.txt
2
+ *.key
3
+ .env
app.py CHANGED
@@ -9,13 +9,15 @@ from docling.document_converter import DocumentConverter
9
  import tqdm as notebook_tqdm
10
  from pydantic import BaseModel, Field
11
  import os
12
- from typing import Optional, Any, Literal, Dict, List, Tuple
 
13
  from typing_extensions import TypedDict
14
  from langgraph.graph import StateGraph, START, END
15
  from langgraph.types import Command
16
  from langchain_openai import ChatOpenAI
17
  from langchain_google_genai import ChatGoogleGenerativeAI
18
  from langchain_core.prompts import ChatPromptTemplate
 
19
  # from langfuse.callback import CallbackHandler
20
  import gradio as gr
21
  import contextlib
@@ -26,36 +28,149 @@ import re
26
  from typing import Union
27
  from dotenv import load_dotenv
28
 
 
29
  load_dotenv()
30
 
31
- # %% [markdown]
32
- # ## Telemetry/ Observability
33
- # - Used for debugging, disabled on prod
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # %%
36
- langfuse_handler = None
37
- # langfuse_handler = CallbackHandler()
38
- if langfuse_handler:
39
- TRACING = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # %% [markdown]
42
- # ## API Key
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # %%
45
- USE_GOOGLE = False
46
- try:
47
- API_KEY = os.environ["NEBIUS_KEY"]
48
- MODEL_NAME = "Qwen/Qwen3-30B-A3B-fast"
49
- ENDPOINT_URL = "https://api.studio.nebius.com/v1/"
50
- print("Using Nebius API for Research Assistant")
51
- except:
52
- try:
53
- API_KEY = os.environ["GOOGLE_API_KEY"]
54
- MODEL_NAME = os.environ["GOOGLE_DEPLOYMENT_NAME"]
55
- USE_GOOGLE = True
56
- print("Using Google API for Research Assistant")
57
- except:
58
- raise ValueError("No NEBIUS or GOOGLE API Key was found")
59
 
60
  # %% [markdown]
61
  # ## Structured outputs
@@ -75,6 +190,20 @@ class MultiStepPlan(BaseModel):
75
  reasoning : str = Field("", description="The multi-step reasoning required to break down the user query in a plan.")
76
  plan : List[Literal["summary_agent", "synthesis_agent", "future_scope_agent", "critique_agent"]] = Field("END", description="The list of agents required to fulfill the user request determined by the Orchestrator.")
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # %% [markdown]
79
  # ## Agent state
80
 
@@ -86,20 +215,19 @@ class AgentDescription(TypedDict):
86
  system_prompt : str
87
 
88
  class ResearchAgentState(BaseModel):
89
- """State of the Academic Research Assistant."""
90
- user_query : Optional[str] = Field("", description="User task for the agents to fulfill.")
91
- iterations : Optional[int] = Field(0, description="Counter for iterative refinement.")
92
- max_iterations : Optional[int] = Field(2, description="Maximum number of refinement iterations.")
93
- available_agents : Dict[str, AgentDescription] = Field(description="A dictionary of the available agents.")
94
- research_papers: List[Tuple[str, str]] = Field([], description="List of tuples containing (filename, content) of uploaded research papers.")
95
- phase : Literal["PLAN", "EXECUTE", "ANSWER"] = Field("PLAN", description="Current phase of the agent")
96
- messages : List[Tuple[str,str]] = Field([], description="List of agent thoughts (agent, agent response).")
97
- final_answer : str = Field("", description="Final answer generated after task execution.")
98
- plan : List[Literal["summary_agent", "synthesis_agent", "future_scope_agent", "critique_agent"]] = Field([], description="The current list of tasks to execute")
99
- synthesis_of_findings: Optional[str] = Field("", description="A synthesized summary of findings from all provided papers.")
100
- identified_gaps: Optional[str] = Field("", description="A consolidated list of research gaps.")
101
- future_directions_report: Optional[str] = Field("", description="The final report on future research directions.")
102
- critique: str = Field("", description="Written feedback from the critique agent regarding the generated report.")
103
 
104
  # %% [markdown]
105
  # ## System prompts
@@ -220,48 +348,94 @@ def read_file_content(file: Union[str, Path]) -> str:
220
  else:
221
  return ""
222
 
223
- def call_llm(system_prompt, user_prompt, response_format : Any = None) -> Any:
224
- """
225
- Call LLM with provided system prompt and user prompt and the response format that should be enforced
226
- """
227
 
228
- if USE_GOOGLE:
229
- llm = ChatGoogleGenerativeAI(
230
- model = MODEL_NAME,
231
- google_api_key = API_KEY,
232
- temperature = 0,
233
- max_tokens = None,
234
- timeout = None,
235
- max_retries = 2
236
- )
237
- else:
238
- llm = ChatOpenAI(
239
- model=MODEL_NAME,
240
- api_key=API_KEY,
241
- base_url=ENDPOINT_URL,
242
- max_completion_tokens=None,
243
- timeout=60,
244
- max_retries=0,
245
- temperature=0
246
- )
247
 
248
- if response_format is not None:
249
- llm = llm.with_structured_output(response_format)
250
-
251
- prompt = ChatPromptTemplate.from_messages([
252
- ("system", "{system_prompt}"),
253
- ("user", "{user_prompt}")
254
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
 
256
- chain = prompt | llm
 
 
 
257
 
258
- response = chain.invoke({
259
- "system_prompt":system_prompt,
260
- "user_prompt": user_prompt
261
- })
262
 
263
- return response
 
 
 
 
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  def serialize_messages(messages : List[Tuple[str,str]]) -> str:
266
  "Returns a formatted message history of previous messages"
267
  return "\n" +"\n".join(f"**{role}:**\n{content}" for role, content in messages)
@@ -302,9 +476,11 @@ def orchestrator_agent(state: ResearchAgentState) -> Command:
302
  """Central orchestration logic to determine the next agent to call."""
303
 
304
  if not state.research_papers:
305
- state.final_answer = "### ❗️ The research assistant needs at least one research paper to begin.\n" \
306
- "πŸ‘ˆπŸ½ Please upload one or more research papers in the 'πŸ“š Research Materials' tab."
307
- return Command(goto=END, update=state)
 
 
308
 
309
  if state.phase == "PLAN":
310
  agent_descriptions = "\n".join([
@@ -317,93 +493,216 @@ def orchestrator_agent(state: ResearchAgentState) -> Command:
317
  **Agents:**
318
  {agent_descriptions}
319
 
320
- Based on the user's query, create a logical sequence of agents to call. For example, to find future scope, you should first summarize the papers, then synthesize them, and finally call the future_scope_agent.
321
 
322
  **IMPORTANT:** Always include the summary_agent as the first step when working with research papers. Every task requires proper paper summaries before analysis can begin.
323
-
324
- **[EXAMPLE]**
325
- **USER QUERY:** Summarize these papers and tell me what to research next.
326
- **REASONING:** The user wants a summary and future research directions. I need to first run the summary_agent on all papers, then the synthesis_agent, and finally the future_scope_agent.
327
- **PLAN:** ["summary_agent", "synthesis_agent", "future_scope_agent"]
328
  """
329
 
330
  user_prompt = state.user_query
331
- state.messages.append(("user_query", state.user_query))
332
 
333
  response = call_llm(system_prompt, user_prompt, MultiStepPlan)
334
 
335
- # Enforce summary_agent as the first step if not already included
336
- if not response.plan or response.plan[0] != "summary_agent":
337
- print("⚠️ Enforcing summary_agent as first step in the plan")
338
- response.plan.insert(0, "summary_agent")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
- print("="*40)
341
- print("πŸ€– ORCHESTRATOR PLAN")
342
- print("="*40)
343
- print(f"\nπŸ“ Reasoning:\n{response.reasoning}\n")
344
- print("πŸ”— Planned Steps:")
345
- for i, step in enumerate(response.plan, 1):
346
- print(f" {i}. {step}")
347
- print("="*40)
348
- print("βš™οΈ EXECUTE PLAN")
349
- print("="*40 + "\n")
350
- state.plan = response.plan
351
- state.phase = "EXECUTE"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
  if len(state.plan) == 0 and state.phase == "EXECUTE":
354
- state.phase = "ANSWER"
355
  return Command(
356
  goto="final_answer_tool",
357
- update=state
358
  )
 
359
  if state.phase == "EXECUTE":
360
- agent = state.plan.pop(0)
 
361
  return Command(
362
- goto=agent,
363
- update=state)
 
364
 
365
  if state.phase == "ANSWER":
366
- state.phase = "PLAN"
367
- state.messages = [("orchestrator_agent", f"\n{state.final_answer}")]
 
 
 
 
 
368
 
369
- return Command(goto=END, update=state)
370
 
371
  # %% [markdown]
372
  # ### Research Agents
373
 
374
  # %%
375
  def summary_agent(state : ResearchAgentState) -> Command:
376
- "Summarizes each research paper provided."
377
-
378
- agent_description = state.available_agents.get("summary_agent", {})
379
- system_prompt = agent_description.get("system_prompt")
380
 
381
- summaries = []
382
- print(f"The summary agent is processing {len(state.research_papers)} paper(s) πŸ“š")
383
- for filename, content in state.research_papers:
384
- user_prompt = f"[RESEARCH PAPER: {filename}]\n{content}\n[END RESEARCH PAPER]"
385
 
386
- print(f" - Summarizing {filename}...")
387
- response = call_llm(system_prompt, user_prompt, ResearchSummary)
388
- summary_text = f"### Summary for {filename}\n\n**Key Findings:**\n"
389
- for finding in response.key_findings:
390
- summary_text += f"- {finding}\n"
391
- summary_text += f"\n**Methodology:**\n{response.methodology}\n"
392
- summary_text += f"\n**Limitations:**\n"
393
- for limitation in response.limitations:
394
- summary_text += f"- {limitation}\n"
395
-
396
- summaries.append(summary_text)
397
-
398
- agent_contribution = ("summary_agent", "\n\n---\n\n".join(summaries))
399
- state.messages.append(agent_contribution)
400
- print("The summary agent has finished.")
401
-
402
- return Command(goto="orchestrator_agent", update=state)
403
-
404
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  def synthesis_agent(state : ResearchAgentState) -> Command:
406
- "Synthesizes the summaries into a cohesive narrative."
407
 
408
  agent_description = state.available_agents.get("synthesis_agent", {})
409
  system_prompt = agent_description.get("system_prompt")
@@ -412,17 +711,27 @@ def synthesis_agent(state : ResearchAgentState) -> Command:
412
  user_prompt = f"Please synthesize the following research summaries:\n{previous_messages}"
413
 
414
  print("The synthesis agent is creating a literature review...")
415
- response = call_llm(system_prompt, user_prompt).content
 
 
 
 
 
 
 
 
416
  print("Synthesis complete.")
417
 
418
- state.synthesis_of_findings = response
419
- agent_contribution = ("synthesis_agent", response)
420
- state.messages.append(agent_contribution)
421
-
422
- return Command(goto="orchestrator_agent", update=state)
 
 
423
 
424
  def future_scope_agent(state : ResearchAgentState) -> Command:
425
- "Identifies research gaps and suggests future work."
426
 
427
  agent_description = state.available_agents.get("future_scope_agent", {})
428
  system_prompt = agent_description.get("system_prompt")
@@ -433,25 +742,35 @@ def future_scope_agent(state : ResearchAgentState) -> Command:
433
  print("The future scope agent is identifying research gaps...")
434
  response = call_llm(system_prompt, user_prompt, FutureScope)
435
 
436
- report_text = "### Identified Research Gaps\n"
437
- for gap in response.identified_gaps:
438
- report_text += f"- {gap}\n"
439
- report_text += "\n### Suggested Future Directions\n"
440
- for direction in response.suggested_directions:
441
- report_text += f"- {direction}\n"
442
- report_text += f"\n### Concluding Synthesis\n{response.synthesis}"
 
 
 
 
 
 
 
 
 
443
 
444
  print("Future scope analysis complete.")
445
 
446
- state.future_directions_report = report_text
447
- agent_contribution = ("future_scope_agent", report_text)
448
- state.messages.append(agent_contribution)
449
-
450
- return Command(goto="orchestrator_agent", update=state)
451
-
 
452
 
453
  def critique_agent(state: ResearchAgentState) -> Command:
454
- "Provides feedback on the generated analysis."
455
 
456
  agent_description = state.available_agents.get("critique_agent", {})
457
  system_prompt = agent_description.get("system_prompt")
@@ -460,16 +779,25 @@ def critique_agent(state: ResearchAgentState) -> Command:
460
  user_prompt = f"Please critique the following research analysis:\n{previous_messages}"
461
 
462
  print("The critique agent is reviewing the analysis... πŸ”Ž")
463
- response = call_llm(system_prompt, user_prompt).content
 
 
 
 
 
 
 
 
464
  print("Critique complete.")
465
 
466
- state.critique = response
467
- agent_contribution = ("critique_agent", response)
468
- state.messages.append(agent_contribution)
469
-
470
- return Command(goto="orchestrator_agent", update=state)
471
-
472
-
 
473
  def final_answer_tool(state : ResearchAgentState) -> Command[Literal["orchestrator_agent"]]:
474
  "Final answer tool is invoked to formulate a final answer based on the agent message history"
475
 
@@ -492,34 +820,62 @@ def final_answer_tool(state : ResearchAgentState) -> Command[Literal["orchestrat
492
  Compile the final, comprehensive answer for the user based on the history.
493
  """
494
 
495
- final_answer = call_llm(system_prompt, user_prompt).content
 
 
 
 
 
 
 
496
 
497
  if isinstance(final_answer, str):
498
  final_answer = strip_think_blocks(final_answer)
499
- state.final_answer = final_answer
500
 
 
501
  return Command(
502
  goto="orchestrator_agent",
503
- update=state
504
  )
505
 
506
 
507
  # %% [markdown]
508
- # ## Build Graph
509
 
510
  # %%
511
- builder = StateGraph(ResearchAgentState)
512
-
513
- builder.add_node(orchestrator_agent)
514
- builder.add_node(summary_agent)
515
- builder.add_node(synthesis_agent)
516
- builder.add_node(future_scope_agent)
517
- builder.add_node(critique_agent)
518
- builder.add_node(final_answer_tool)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
 
520
- builder.add_edge(START, "orchestrator_agent")
 
 
 
 
521
 
522
- graph = builder.compile()
 
523
 
524
  # %% [markdown]
525
  # ## Gradio functions
@@ -530,31 +886,70 @@ def extract_research_papers(
530
  paper_files,
531
  max_iterations: int
532
  ) -> tuple[str, Dict, bool]:
533
- """
534
- Run the extraction pipeline and return output logs + state as a dict.
535
- """
536
- output_text = ""
537
- if not paper_files:
538
- return "Please upload at least one paper.", state_dict, False
539
-
540
- papers = []
541
- try:
542
- for file in paper_files:
543
- # Gradio uploads files to a temp directory, file.name gives the full path
544
- content = read_file_content(file.name)
545
- papers.append((os.path.basename(file.name), content))
546
- output_text += f"Successfully processed {len(papers)} paper(s)."
547
- except Exception as e:
548
- return f"Reading input files failed: {str(e)}", state_dict, False
549
-
550
- state = ResearchAgentState.model_validate(state_dict)
551
- state.research_papers = papers
552
  state.max_iterations = max_iterations
553
 
554
- state_dict = type_conversion(state, ResearchAgentState)
 
555
 
556
- return output_text, state_dict, True
557
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  def call_orchestrator(state_dict : Dict, user_query : str):
559
  "Function prototype to call the orchestrator agent"
560
  state = ResearchAgentState.model_validate(state_dict)
@@ -562,10 +957,17 @@ def call_orchestrator(state_dict : Dict, user_query : str):
562
  state.user_query = user_query
563
  buffer = StringIO()
564
  with contextlib.redirect_stdout(buffer):
565
- if langfuse_handler: # Use langfuse_handler instead of TRACING
566
- result = graph.invoke(input=state, config={"callbacks": [langfuse_handler]})
567
- else:
568
- result = graph.invoke(input=state)
 
 
 
 
 
 
 
569
 
570
  result_dict = type_conversion(result, ResearchAgentState)
571
 
@@ -593,8 +995,80 @@ with gr.Blocks() as research_assistant_server:
593
 
594
  state_dict = gr.State(value=ResearchAgentState(available_agents=available_agents).model_dump())
595
  extraction_successful = gr.State(value=False)
 
596
 
597
  with gr.Tabs():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  with gr.TabItem("πŸ“š Research Materials"):
599
  gr.Markdown("### πŸ‰ Feed the assistant with the research papers you want to analyze.")
600
 
@@ -612,13 +1086,20 @@ with gr.Blocks() as research_assistant_server:
612
  extract_button = gr.Button("Process Papers", variant="primary")
613
 
614
  extract_console_output = gr.Textbox(label="Logs / Console Output")
 
 
 
 
 
 
615
 
616
  extract_button.click(
617
- fn=extract_research_papers,
618
- inputs=[state_dict, research_papers_files, max_iterations],
619
  outputs=[extract_console_output, state_dict, extraction_successful]
620
  )
621
 
 
622
  with gr.TabItem("πŸ€– Q&A Chatbot"):
623
  examples = """ℹ️ **Example Queries**
624
  - Summarize the key findings from these papers.
@@ -628,28 +1109,51 @@ with gr.Blocks() as research_assistant_server:
628
  gr.Markdown(examples)
629
  user_query = gr.Textbox(label="Ask your research question", value="Identify the main gaps and suggest future work.", interactive=True)
630
  button = gr.Button("Ask the Research Assistant πŸ”¬πŸ§ ", variant="primary")
631
- qa_orchestrator_completed = gr.State(value=False)
632
-
633
- @gr.render(inputs=[qa_orchestrator_completed,state_dict])
634
- def show_qa_results(qa_flag, state_dict):
635
- if qa_flag:
636
- gr.Markdown(state_dict.get("final_answer", "❗️ No final answer provided: please check the logs."))
637
- else:
638
- gr.Markdown("### Upload papers and ask a question to get started.")
639
 
640
  output_logs = gr.Textbox(label="Logs/ Console Output", lines=10)
641
 
642
- def reset_elements(qa_flag : bool, output_logs : str) -> bool:
643
- return False, "Generating response..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
 
645
  button.click(
646
- fn=reset_elements,
647
- inputs=[qa_orchestrator_completed, output_logs],
648
- outputs=[qa_orchestrator_completed, output_logs]
649
  ).then(
650
- fn=call_orchestrator,
651
- inputs=[state_dict, user_query],
652
- outputs=[output_logs, state_dict, qa_orchestrator_completed]
653
  )
654
 
655
  with gr.TabItem("πŸ”Ž What's under the hood?"):
 
9
  import tqdm as notebook_tqdm
10
  from pydantic import BaseModel, Field
11
  import os
12
+ from typing import Optional, Any, Literal, Dict, List, Tuple, Type, Annotated
13
+ from operator import add
14
  from typing_extensions import TypedDict
15
  from langgraph.graph import StateGraph, START, END
16
  from langgraph.types import Command
17
  from langchain_openai import ChatOpenAI
18
  from langchain_google_genai import ChatGoogleGenerativeAI
19
  from langchain_core.prompts import ChatPromptTemplate
20
+ from langchain_core.output_parsers import PydanticOutputParser # Add this import
21
  # from langfuse.callback import CallbackHandler
22
  import gradio as gr
23
  import contextlib
 
28
  from typing import Union
29
  from dotenv import load_dotenv
30
 
31
+ # Load environment variables from .env file
32
  load_dotenv()
33
 
34
+ # Use environment variables for API keys
35
+ USE_GOOGLE = False
36
+ API_KEY = os.environ.get("NEBIUS_KEY")
37
+ MODEL_NAME = None
38
+ ENDPOINT_URL = None
39
+
40
+ # Try these models one by one to see which ones actually exist
41
+ NEBIUS_MODELS = [
42
+ "meta-llama/Llama-2-7b-chat-hf", # Try this first
43
+ "mistralai/Mistral-7B-Instruct-v0.2", # Then this
44
+ "microsoft/DialoGPT-medium", # Then this
45
+ "openai/gpt-3.5-turbo", # Or this
46
+ "Qwen2.5-Coder-7B", # Keep the original as fallback
47
+ "QwQ-32B"
48
+ ]
49
+
50
+ def list_nebius_models():
51
+ """List all available models from Nebius API."""
52
+ try:
53
+ import requests
54
+
55
+ headers = {
56
+ "Authorization": f"Bearer {API_KEY}",
57
+ "Content-Type": "application/json"
58
+ }
59
+
60
+ # Try the models endpoint
61
+ response = requests.get(
62
+ f"{ENDPOINT_URL}models",
63
+ headers=headers,
64
+ timeout=10
65
+ )
66
+
67
+ if response.status_code == 200:
68
+ models = response.json()
69
+ print("Available models:")
70
+ for model in models.get('data', []):
71
+ print(f" - {model.get('id', 'Unknown')}")
72
+ return [model.get('id') for model in models.get('data', [])]
73
+ else:
74
+ print(f"Failed to fetch models: {response.status_code}")
75
+ print(f"Response: {response.text}")
76
+ return []
77
+
78
+ except Exception as e:
79
+ print(f"Error fetching models: {str(e)}")
80
+ return []
81
 
82
+ def test_available_models():
83
+ """Test which models are actually available."""
84
+
85
+ # First try to get the actual model list
86
+ available_models = list_nebius_models()
87
+
88
+ if available_models:
89
+ print(f"Found {len(available_models)} models from API")
90
+ test_models = available_models[:6] # Test first 6 models
91
+ else:
92
+ # Fallback to common model names that might work
93
+ test_models = [
94
+ "gpt-3.5-turbo",
95
+ "gpt-4",
96
+ "claude-3-haiku",
97
+ "llama-2-7b-chat",
98
+ "mistral-7b-instruct",
99
+ "qwen-7b-chat"
100
+ ]
101
+
102
+ for model in test_models:
103
+ try:
104
+ print(f"Testing model: {model}")
105
+ global MODEL_NAME
106
+ MODEL_NAME = model
107
+
108
+ # Simple test call
109
+ llm = ChatOpenAI(
110
+ model=model,
111
+ api_key=API_KEY,
112
+ base_url=ENDPOINT_URL,
113
+ max_completion_tokens=50,
114
+ timeout=10,
115
+ temperature=0
116
+ )
117
+
118
+ response = llm.invoke("Hello")
119
+ print(f"βœ… {model} works!")
120
+ return model # Return the first working model
121
+
122
+ except Exception as e:
123
+ print(f"❌ {model} failed: {str(e)}")
124
+ continue
125
+
126
+ print("⚠️ No working models found")
127
+ return None
128
 
129
+ # Call this function when setting up API key
130
+ def setup_api_key(nebius_key=None, model_name=None):
131
+ global API_KEY, MODEL_NAME, ENDPOINT_URL, USE_GOOGLE
132
+
133
+ # First try user-provided key (from UI)
134
+ if nebius_key:
135
+ API_KEY = nebius_key
136
+ ENDPOINT_URL = "https://api.studio.nebius.com/v1/"
137
+
138
+ # Test which model actually works
139
+ if model_name:
140
+ MODEL_NAME = model_name
141
+ else:
142
+ working_model = test_available_models()
143
+ if working_model:
144
+ MODEL_NAME = working_model
145
+ else:
146
+ print("No working models found")
147
+ return False
148
+
149
+ print(f"Using user-provided Nebius API key with model: {MODEL_NAME}")
150
+ return True
151
+
152
+ # Next try environment variable
153
+ if API_KEY:
154
+ ENDPOINT_URL = "https://api.studio.nebius.com/v1/"
155
+
156
+ if model_name:
157
+ MODEL_NAME = model_name
158
+ else:
159
+ working_model = test_available_models()
160
+ if working_model:
161
+ MODEL_NAME = working_model
162
+ else:
163
+ print("No working models found")
164
+ return False
165
+
166
+ print(f"Using Nebius API from environment variable with model: {MODEL_NAME}")
167
+ return True
168
+
169
+ print("No API key found. Please provide a Nebius API key.")
170
+ return False
171
 
172
+ # Initialize with environment variables if available
173
+ setup_api_key()
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  # %% [markdown]
176
  # ## Structured outputs
 
190
  reasoning : str = Field("", description="The multi-step reasoning required to break down the user query in a plan.")
191
  plan : List[Literal["summary_agent", "synthesis_agent", "future_scope_agent", "critique_agent"]] = Field("END", description="The list of agents required to fulfill the user request determined by the Orchestrator.")
192
 
193
+ class PaperSummary(BaseModel):
194
+ key_findings: List[str] = Field(
195
+ default_factory=lambda: ["No key findings available due to processing error"],
196
+ description="List of key findings from the paper"
197
+ )
198
+ methodology: str = Field(
199
+ default="Methodology not available due to processing error",
200
+ description="Summary of the methodology used in the paper"
201
+ )
202
+ conclusion: str = Field(
203
+ default="Conclusion not available due to processing error",
204
+ description="Summary of the paper's conclusion"
205
+ )
206
+
207
  # %% [markdown]
208
  # ## Agent state
209
 
 
215
  system_prompt : str
216
 
217
  class ResearchAgentState(BaseModel):
218
+ """State for the research agent."""
219
+ research_papers: Annotated[List[Tuple[str, str]], add] = Field(default_factory=list) # List of (filename, content) tuples
220
+ summary: Annotated[List[Dict], add] = Field(default_factory=list) # List of paper summaries
221
+ user_query: str = Field(default="") # Remove annotation - only set once
222
+ phase: str = Field(default="PLAN") # PLAN, EXECUTE, ANSWER
223
+ plan: List[str] = Field(default_factory=list) # List of agent names to call in order
224
+ messages: Annotated[List[Tuple[str, str]], add] = Field(default_factory=list) # List of (agent, message) tuples
225
+ critique: Optional[str] = Field(default=None) # Optional critique of the analysis
226
+ available_agents: Dict[str, Dict] = Field(default_factory=dict) # Mapping of agent name to agent description
227
+ final_answer: Optional[str] = Field(default=None) # Final answer to the user's query
228
+ max_iterations: int = Field(default=1) # Maximum number of iterations for processing
229
+ synthesis_of_findings: Optional[str] = Field(default=None) # Remove Annotated - only set once
230
+ future_directions_report: Optional[str] = Field(default=None) # Remove Annotated - only set once
 
231
 
232
  # %% [markdown]
233
  # ## System prompts
 
348
  else:
349
  return ""
350
 
351
+ # %% [markdown]
352
+ # ## LLM call
 
 
353
 
354
+ # %%
355
+ def call_llm(system_prompt, user_prompt, response_format=None):
356
+ """Call LLM with system and user prompt, optionally parsing to a specific format"""
357
+ global API_KEY, MODEL_NAME, ENDPOINT_URL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
+ if not API_KEY:
360
+ print("Error: API key is not set")
361
+ # Return a default instance for the response_format class
362
+ if response_format and hasattr(response_format, "__name__"):
363
+ try:
364
+ if response_format.__name__ == "MultiStepPlan":
365
+ return MultiStepPlan(
366
+ reasoning="Error occurred: API key not set",
367
+ plan=["summary_agent", "synthesis_agent", "future_scope_agent"]
368
+ )
369
+ elif response_format.__name__ == "PaperSummary":
370
+ return PaperSummary() # Uses default values from Field definitions
371
+ else:
372
+ # Generic attempt to create an instance with default values
373
+ return response_format()
374
+ except Exception as e:
375
+ print(f"Failed to create default instance: {str(e)}")
376
+ return None
377
+
378
+ try:
379
+ if USE_GOOGLE:
380
+ llm = ChatGoogleGenerativeAI(
381
+ model=MODEL_NAME,
382
+ google_api_key=API_KEY,
383
+ temperature=0
384
+ )
385
+ else:
386
+ llm = ChatOpenAI(
387
+ model=MODEL_NAME,
388
+ api_key=API_KEY,
389
+ base_url=ENDPOINT_URL,
390
+ max_completion_tokens=None,
391
+ timeout=60,
392
+ max_retries=2,
393
+ temperature=0
394
+ )
395
+
396
+ if response_format is not None:
397
+ llm = llm.with_structured_output(response_format)
398
 
399
+ prompt = ChatPromptTemplate.from_messages([
400
+ ("system", "{system_prompt}"),
401
+ ("user", "{user_prompt}")
402
+ ])
403
 
404
+ chain = prompt | llm
 
 
 
405
 
406
+ print(f"Calling model: {MODEL_NAME}")
407
+ response = chain.invoke({
408
+ "system_prompt": system_prompt,
409
+ "user_prompt": user_prompt
410
+ })
411
 
412
+ return response
413
+
414
+ except Exception as e:
415
+ print(f"Error in call_llm: {str(e)}")
416
+ if hasattr(e, 'response') and hasattr(e.response, 'json'):
417
+ try:
418
+ error_details = e.response.json()
419
+ print(f"API Error details: {error_details}")
420
+ except:
421
+ pass
422
+
423
+ # Create a default response based on the response_format class
424
+ if response_format and hasattr(response_format, "__name__"):
425
+ try:
426
+ if response_format.__name__ == "MultiStepPlan":
427
+ return MultiStepPlan(
428
+ reasoning="Error occurred while calling the LLM API. Using default plan.",
429
+ plan=["summary_agent", "synthesis_agent", "future_scope_agent"]
430
+ )
431
+ elif response_format.__name__ == "PaperSummary":
432
+ return PaperSummary() # Uses default values from Field definitions
433
+ else:
434
+ # Generic attempt to create an instance with default values
435
+ return response_format()
436
+ except Exception as e:
437
+ print(f"Failed to create default instance: {str(e)}")
438
+ return None
439
  def serialize_messages(messages : List[Tuple[str,str]]) -> str:
440
  "Returns a formatted message history of previous messages"
441
  return "\n" +"\n".join(f"**{role}:**\n{content}" for role, content in messages)
 
476
  """Central orchestration logic to determine the next agent to call."""
477
 
478
  if not state.research_papers:
479
+ return Command(
480
+ goto=END,
481
+ update={"final_answer": "### ❗️ The research assistant needs at least one research paper to begin.\n" \
482
+ "πŸ‘ˆπŸ½ Please upload one or more research papers in the 'πŸ“š Research Materials' tab."}
483
+ )
484
 
485
  if state.phase == "PLAN":
486
  agent_descriptions = "\n".join([
 
493
  **Agents:**
494
  {agent_descriptions}
495
 
496
+ Based on the user's query, create a logical sequence of agents to call. For example, to find future scope, you should first summarize the papers, then synthesize them, and then call the future_scope_agent.
497
 
498
  **IMPORTANT:** Always include the summary_agent as the first step when working with research papers. Every task requires proper paper summaries before analysis can begin.
 
 
 
 
 
499
  """
500
 
501
  user_prompt = state.user_query
 
502
 
503
  response = call_llm(system_prompt, user_prompt, MultiStepPlan)
504
 
505
+ # Handle None response by providing a default plan
506
+ if response is None:
507
+ print("⚠️ Failed to get response from LLM. Using default plan.")
508
+ plan = ["summary_agent", "synthesis_agent", "future_scope_agent"]
509
+ print("="*40)
510
+ print("πŸ€– DEFAULT ORCHESTRATOR PLAN (LLM call failed)")
511
+ print("="*40)
512
+ print("\nπŸ“ Reasoning: Default plan due to LLM call failure\n")
513
+ print("πŸ”— Planned Steps:")
514
+ for i, step in enumerate(plan, 1):
515
+ print(f" {i}. {step}")
516
+ print("="*40)
517
+ print("βš™οΈ EXECUTE PLAN")
518
+ print("="*40 + "\n")
519
+
520
+ # Create update dict that only modifies necessary fields
521
+ updates = {
522
+ "plan": plan,
523
+ "phase": "EXECUTE"
524
+ }
525
+
526
+ # Only add user_query to messages if it's not already there
527
+ if not any(msg[0] == "user_query" for msg in state.messages):
528
+ updates["messages"] = [("user_query", state.user_query)]
529
+
530
+ return Command(goto=plan[0], update=updates)
531
 
532
+ # If response exists but plan is empty, use default plan
533
+ try:
534
+ # Enforce summary_agent as the first step if not already included
535
+ if not hasattr(response, 'plan') or not response.plan:
536
+ print("⚠️ Response from LLM did not contain a valid plan. Using default plan.")
537
+ response.plan = ["summary_agent", "synthesis_agent", "future_scope_agent"]
538
+ elif response.plan[0] != "summary_agent":
539
+ print("⚠️ Enforcing summary_agent as first step in the plan")
540
+ response.plan.insert(0, "summary_agent")
541
+
542
+ print("="*40)
543
+ print("πŸ€– ORCHESTRATOR PLAN")
544
+ print("="*40)
545
+ print(f"\nπŸ“ Reasoning:\n{getattr(response, 'reasoning', 'No reasoning provided')}\n")
546
+ print("πŸ”— Planned Steps:")
547
+ for i, step in enumerate(response.plan, 1):
548
+ print(f" {i}. {step}")
549
+ print("="*40)
550
+ print("βš™οΈ EXECUTE PLAN")
551
+ print("="*40 + "\n")
552
+
553
+ # Create update dict that only modifies necessary fields
554
+ updates = {
555
+ "plan": response.plan,
556
+ "phase": "EXECUTE"
557
+ }
558
+
559
+ # Only add user_query to messages if it's not already there
560
+ if not any(msg[0] == "user_query" for msg in state.messages):
561
+ updates["messages"] = [("user_query", state.user_query)]
562
+
563
+ return Command(goto=response.plan[0], update=updates)
564
+
565
+ except Exception as e:
566
+ # Final fallback if response processing fails
567
+ print(f"⚠️ Error processing LLM response: {str(e)}. Using default plan.")
568
+ plan = ["summary_agent", "synthesis_agent", "future_scope_agent"]
569
+
570
+ # Create update dict that only modifies necessary fields
571
+ updates = {
572
+ "plan": plan,
573
+ "phase": "EXECUTE"
574
+ }
575
+
576
+ # Only add user_query to messages if it's not already there
577
+ if not any(msg[0] == "user_query" for msg in state.messages):
578
+ updates["messages"] = [("user_query", state.user_query)]
579
+
580
+ return Command(goto=plan[0], update=updates)
581
 
582
  if len(state.plan) == 0 and state.phase == "EXECUTE":
 
583
  return Command(
584
  goto="final_answer_tool",
585
+ update={"phase": "ANSWER"}
586
  )
587
+
588
  if state.phase == "EXECUTE":
589
+ next_agent = state.plan[0]
590
+ remaining_plan = state.plan[1:]
591
  return Command(
592
+ goto=next_agent,
593
+ update={"plan": remaining_plan}
594
+ )
595
 
596
  if state.phase == "ANSWER":
597
+ return Command(
598
+ goto=END,
599
+ update={
600
+ "phase": "PLAN",
601
+ "messages": [("orchestrator_agent", f"\n{state.final_answer}")]
602
+ }
603
+ )
604
 
605
+ return Command(goto=END, update={})
606
 
607
  # %% [markdown]
608
  # ### Research Agents
609
 
610
  # %%
611
  def summary_agent(state : ResearchAgentState) -> Command:
612
+ """Creates concise, structured summaries of research papers."""
 
 
 
613
 
614
+ if not state.summary:
615
+ # Initialize empty summaries
616
+ print("The summary agent is processing the papers... πŸ“")
617
+ research_findings = []
618
 
619
+ for filename, content in state.research_papers:
620
+ # Create a prompt for each paper
621
+ system_prompt = f"""You are a research summarization expert. Please read the provided research paper content and create a clear, concise, and structured summary.
622
+ Focus on extracting key findings, methodology, and conclusions.
623
+ """
624
+ user_prompt = f"""
625
+ Paper: {filename}
626
+
627
+ Content:
628
+ {content[:5000]} # Use first 5000 chars to avoid context limits
629
+
630
+ Please provide a structured summary with key findings, methodology, and conclusions.
631
+ """
632
+
633
+ response = call_llm(system_prompt, user_prompt, PaperSummary)
634
+
635
+ # Check if we got a valid response
636
+ if response is None:
637
+ print(f"⚠️ Failed to summarize paper {filename}. Creating default summary.")
638
+ # Create a default summary
639
+ finding = {
640
+ "title": filename,
641
+ "key_findings": ["Error: Could not summarize this paper due to API issues."],
642
+ "methodology": "Not available due to API error",
643
+ "conclusion": "Not available due to API error",
644
+ "source": filename
645
+ }
646
+ research_findings.append(finding)
647
+ else:
648
+ try:
649
+ # Extract the key findings from the response
650
+ finding = {
651
+ "title": filename,
652
+ "key_findings": response.key_findings if hasattr(response, 'key_findings') else ["No key findings extracted"],
653
+ "methodology": response.methodology if hasattr(response, 'methodology') else "Not provided",
654
+ "conclusion": response.conclusion if hasattr(response, 'conclusion') else "Not provided",
655
+ "source": filename
656
+ }
657
+ research_findings.append(finding)
658
+ except Exception as e:
659
+ print(f"⚠️ Error processing summary for {filename}: {str(e)}")
660
+ finding = {
661
+ "title": filename,
662
+ "key_findings": ["Error processing paper summary."],
663
+ "methodology": "Error in processing",
664
+ "conclusion": "Error in processing",
665
+ "source": filename
666
+ }
667
+ research_findings.append(finding)
668
+
669
+ print("Paper summaries complete.")
670
+
671
+ # Add the summaries to the message history
672
+ formatted_summaries = []
673
+ for paper in research_findings:
674
+ findings_text = "\n".join([f"- {finding}" for finding in paper['key_findings']])
675
+ formatted_summary = f"""
676
+ ## {paper['title']}
677
+
678
+ ### Key Findings:
679
+ {findings_text}
680
+
681
+ ### Methodology:
682
+ {paper['methodology']}
683
+
684
+ ### Conclusion:
685
+ {paper['conclusion']}
686
+ """
687
+ formatted_summaries.append(formatted_summary)
688
+
689
+ combined_summary = "\n\n".join(formatted_summaries)
690
+
691
+ agent_contribution = ("summary_agent", combined_summary)
692
+
693
+ # Return updates for both summary and messages
694
+ return Command(
695
+ goto="orchestrator_agent",
696
+ update={
697
+ "summary": research_findings,
698
+ "messages": [agent_contribution]
699
+ }
700
+ )
701
+ else:
702
+ # Summaries already exist, just proceed
703
+ return Command(goto="orchestrator_agent", update=state)
704
  def synthesis_agent(state : ResearchAgentState) -> Command:
705
+ """Synthesizes the summaries into a cohesive narrative."""
706
 
707
  agent_description = state.available_agents.get("synthesis_agent", {})
708
  system_prompt = agent_description.get("system_prompt")
 
711
  user_prompt = f"Please synthesize the following research summaries:\n{previous_messages}"
712
 
713
  print("The synthesis agent is creating a literature review...")
714
+ response = call_llm(system_prompt, user_prompt)
715
+
716
+ # Handle None response
717
+ if response is None:
718
+ response_text = "Error: Could not generate synthesis due to API issues."
719
+ print("⚠️ Synthesis agent failed - using default response")
720
+ else:
721
+ response_text = response.content if hasattr(response, 'content') else str(response)
722
+
723
  print("Synthesis complete.")
724
 
725
+ # Only update messages, don't update synthesis_of_findings
726
+ return Command(
727
+ goto="orchestrator_agent",
728
+ update={
729
+ "messages": [("synthesis_agent", response_text)]
730
+ }
731
+ )
732
 
733
  def future_scope_agent(state : ResearchAgentState) -> Command:
734
+ """Identifies research gaps and suggests future work."""
735
 
736
  agent_description = state.available_agents.get("future_scope_agent", {})
737
  system_prompt = agent_description.get("system_prompt")
 
742
  print("The future scope agent is identifying research gaps...")
743
  response = call_llm(system_prompt, user_prompt, FutureScope)
744
 
745
+ # Handle None response
746
+ if response is None:
747
+ print("⚠️ Future scope agent failed - using default response")
748
+ report_text = "### Identified Research Gaps\n- Error: Could not identify gaps due to API issues.\n\n### Suggested Future Directions\n- Error: Could not suggest directions due to API issues.\n\n### Concluding Synthesis\nError: Could not generate synthesis due to API issues."
749
+ else:
750
+ try:
751
+ report_text = "### Identified Research Gaps\n"
752
+ for gap in response.identified_gaps:
753
+ report_text += f"- {gap}\n"
754
+ report_text += "\n### Suggested Future Directions\n"
755
+ for direction in response.suggested_directions:
756
+ report_text += f"- {direction}\n"
757
+ report_text += f"\n### Concluding Synthesis\n{response.synthesis}"
758
+ except Exception as e:
759
+ print(f"⚠️ Error processing future scope response: {str(e)}")
760
+ report_text = "### Error\nCould not process future scope analysis due to response format issues."
761
 
762
  print("Future scope analysis complete.")
763
 
764
+ # Only update messages, don't update future_directions_report
765
+ return Command(
766
+ goto="orchestrator_agent",
767
+ update={
768
+ "messages": [("future_scope_agent", report_text)]
769
+ }
770
+ )
771
 
772
  def critique_agent(state: ResearchAgentState) -> Command:
773
+ """Provides feedback on the generated analysis."""
774
 
775
  agent_description = state.available_agents.get("critique_agent", {})
776
  system_prompt = agent_description.get("system_prompt")
 
779
  user_prompt = f"Please critique the following research analysis:\n{previous_messages}"
780
 
781
  print("The critique agent is reviewing the analysis... πŸ”Ž")
782
+ response = call_llm(system_prompt, user_prompt)
783
+
784
+ # Handle None response
785
+ if response is None:
786
+ response_text = "Error: Could not generate critique due to API issues."
787
+ print("⚠️ Critique agent failed - using default response")
788
+ else:
789
+ response_text = response.content if hasattr(response, 'content') else str(response)
790
+
791
  print("Critique complete.")
792
 
793
+ # Only update the fields that need updating - avoid updating user_query
794
+ return Command(
795
+ goto="orchestrator_agent",
796
+ update={
797
+ "critique": response_text,
798
+ "messages": [("critique_agent", response_text)]
799
+ }
800
+ )
801
  def final_answer_tool(state : ResearchAgentState) -> Command[Literal["orchestrator_agent"]]:
802
  "Final answer tool is invoked to formulate a final answer based on the agent message history"
803
 
 
820
  Compile the final, comprehensive answer for the user based on the history.
821
  """
822
 
823
+ response = call_llm(system_prompt, user_prompt)
824
+
825
+ # Handle None response
826
+ if response is None:
827
+ final_answer = "Error: Could not generate final answer due to API issues. Please check the logs and try again."
828
+ print("⚠️ Final answer tool failed - using default response")
829
+ else:
830
+ final_answer = response.content if hasattr(response, 'content') else str(response)
831
 
832
  if isinstance(final_answer, str):
833
  final_answer = strip_think_blocks(final_answer)
 
834
 
835
+ # Only update the final_answer field, not the entire state
836
  return Command(
837
  goto="orchestrator_agent",
838
+ update={"final_answer": final_answer}
839
  )
840
 
841
 
842
  # %% [markdown]
843
+ # ## Graph Definition
844
 
845
  # %%
846
+ def init_state():
847
+ """Initialize the state with default values."""
848
+ return ResearchAgentState(available_agents=available_agents)
849
+
850
+ graph = StateGraph(ResearchAgentState)
851
+ graph.add_node("orchestrator_agent", orchestrator_agent)
852
+ graph.add_node("summary_agent", summary_agent)
853
+ graph.add_node("synthesis_agent", synthesis_agent)
854
+ graph.add_node("future_scope_agent", future_scope_agent)
855
+ graph.add_node("critique_agent", critique_agent)
856
+ graph.add_node("final_answer_tool", final_answer_tool)
857
+
858
+ # Define the edges
859
+ graph.add_edge(START, "orchestrator_agent")
860
+
861
+ # Fix the parameter name from 'router' to the correct parameter name
862
+ graph.add_conditional_edges(
863
+ "orchestrator_agent",
864
+ lambda state: (
865
+ state.plan[0] if state.phase == "EXECUTE" and state.plan
866
+ else "final_answer_tool" if state.phase == "ANSWER"
867
+ else END
868
+ )
869
+ )
870
 
871
+ graph.add_edge("summary_agent", "orchestrator_agent")
872
+ graph.add_edge("synthesis_agent", "orchestrator_agent")
873
+ graph.add_edge("future_scope_agent", "orchestrator_agent")
874
+ graph.add_edge("critique_agent", "orchestrator_agent")
875
+ graph.add_edge("final_answer_tool", "orchestrator_agent")
876
 
877
+ # Compile the graph
878
+ graph = graph.compile()
879
 
880
  # %% [markdown]
881
  # ## Gradio functions
 
886
  paper_files,
887
  max_iterations: int
888
  ) -> tuple[str, Dict, bool]:
889
+ """Extract text from research papers and update state."""
890
+
891
+ # Create a new ResearchAgentState or update existing one
892
+ if isinstance(state_dict, dict):
893
+ state = ResearchAgentState(**state_dict)
894
+ else:
895
+ state = ResearchAgentState()
896
+
897
+ # Set max_iterations safely
 
 
 
 
 
 
 
 
 
 
898
  state.max_iterations = max_iterations
899
 
900
+ if not paper_files:
901
+ return "Please upload at least one research paper to analyze.", state.model_dump(), False
902
 
903
+ console_output = StringIO()
904
+ with contextlib.redirect_stdout(console_output):
905
+ papers = []
906
+
907
+ for file in paper_files:
908
+ try:
909
+ filename = file.name.split("/")[-1]
910
+ print(f"πŸ“„ Processing {filename}...")
911
+
912
+ if filename.lower().endswith(".pdf"):
913
+ # Fix DocumentConverter usage - it likely uses a different method name
914
+ try:
915
+ converter = DocumentConverter()
916
+ # Try different method names that might exist
917
+ if hasattr(converter, 'pdf_to_text'):
918
+ content = converter.pdf_to_text(file.name)
919
+ elif hasattr(converter, 'extract_text'):
920
+ content = converter.extract_text(file.name)
921
+ else:
922
+ # Fallback to PyPDF2 if available
923
+ import PyPDF2
924
+ content = ""
925
+ with open(file.name, 'rb') as pdf_file:
926
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
927
+ for page_num in range(len(pdf_reader.pages)):
928
+ content += pdf_reader.pages[page_num].extract_text()
929
+ except ImportError:
930
+ print("⚠️ PDF conversion libraries not available. Please install PyPDF2.")
931
+ continue
932
+
933
+ elif filename.lower().endswith(".docx"):
934
+ doc = docx.Document(file.name)
935
+ content = "\n".join([p.text for p in doc.paragraphs])
936
+ elif filename.lower().endswith((".txt", ".md")):
937
+ with open(file.name, "r") as f:
938
+ content = f.read()
939
+ else:
940
+ print(f"⚠️ Unsupported file format: {filename}")
941
+ continue
942
+
943
+ papers.append((filename, content))
944
+ print(f"βœ… Successfully extracted {len(content)} characters from {filename}")
945
+
946
+ except Exception as e:
947
+ print(f"❌ Error processing {file.name}: {str(e)}")
948
+
949
+ state.research_papers = papers
950
+ print(f"πŸ“Š Extracted content from {len(papers)} files.")
951
+
952
+ return console_output.getvalue(), state.model_dump(), len(papers) > 0
953
  def call_orchestrator(state_dict : Dict, user_query : str):
954
  "Function prototype to call the orchestrator agent"
955
  state = ResearchAgentState.model_validate(state_dict)
 
957
  state.user_query = user_query
958
  buffer = StringIO()
959
  with contextlib.redirect_stdout(buffer):
960
+ config = {} # Use empty config for now
961
+
962
+ try:
963
+ result = graph.invoke(input=state, config=config)
964
+ output_text = buffer.getvalue()
965
+ result_dict = type_conversion(result, ResearchAgentState)
966
+ return output_text, result_dict, True
967
+ except Exception as e:
968
+ error_msg = f"An error occurred during processing: {str(e)}"
969
+ output_text = buffer.getvalue() + "\n" + error_msg
970
+ return output_text, state_dict, False
971
 
972
  result_dict = type_conversion(result, ResearchAgentState)
973
 
 
995
 
996
  state_dict = gr.State(value=ResearchAgentState(available_agents=available_agents).model_dump())
997
  extraction_successful = gr.State(value=False)
998
+ api_key_set = gr.State(value=API_KEY is not None)
999
 
1000
  with gr.Tabs():
1001
+ with gr.TabItem("πŸ”‘ API Key Setup"):
1002
+ gr.Markdown("### Set up your Nebius API Key")
1003
+ gr.Markdown("A valid API key is required to use this research assistant. You can either provide it here or set it as an environment variable.")
1004
+
1005
+ with gr.Row():
1006
+ nebius_key_input = gr.Textbox(
1007
+ label="Nebius API Key",
1008
+ placeholder="Enter your Nebius API key here...",
1009
+ type="password",
1010
+ value=""
1011
+ )
1012
+
1013
+ # Add model discovery section
1014
+ with gr.Row():
1015
+ discover_models_button = gr.Button("πŸ” Discover Available Models", variant="secondary")
1016
+ test_model_input = gr.Textbox(
1017
+ label="Or manually test a model name:",
1018
+ placeholder="e.g., gpt-3.5-turbo"
1019
+ )
1020
+
1021
+ available_models_display = gr.Textbox(
1022
+ label="Available Models",
1023
+ lines=5,
1024
+ interactive=False
1025
+ )
1026
+
1027
+ with gr.Row():
1028
+ model_dropdown = gr.Dropdown(
1029
+ choices=NEBIUS_MODELS,
1030
+ value=MODEL_NAME or NEBIUS_MODELS[0],
1031
+ label="Select Nebius Model",
1032
+ allow_custom_value=True
1033
+ )
1034
+
1035
+ api_key_status = gr.Markdown("⚠️ **No API key detected.** Please enter your Nebius API key." if API_KEY is None else "βœ… **API key configured.** You're ready to use the assistant.")
1036
+
1037
+ save_key_button = gr.Button("Save API Key", variant="primary")
1038
+
1039
+ def discover_models(key):
1040
+ if not key:
1041
+ return "Please enter an API key first."
1042
+
1043
+ global API_KEY, ENDPOINT_URL
1044
+ API_KEY = key
1045
+ ENDPOINT_URL = "https://api.studio.nebius.com/v1/"
1046
+
1047
+ models = list_nebius_models()
1048
+ if models:
1049
+ return "Available models:\n" + "\n".join([f"- {model}" for model in models])
1050
+ else:
1051
+ return "Could not fetch models. Please check your API key."
1052
+
1053
+ discover_models_button.click(
1054
+ fn=discover_models,
1055
+ inputs=[nebius_key_input],
1056
+ outputs=[available_models_display]
1057
+ )
1058
+
1059
+ def save_api_key(key, model):
1060
+ success = setup_api_key(key, model)
1061
+ if success:
1062
+ return f"βœ… **API key saved successfully!** Using model: {MODEL_NAME}", True
1063
+ else:
1064
+ return "❌ **Invalid API key.** Please check and try again.", False
1065
+
1066
+ save_key_button.click(
1067
+ fn=save_api_key,
1068
+ inputs=[nebius_key_input, model_dropdown],
1069
+ outputs=[api_key_status, api_key_set]
1070
+ )
1071
+
1072
  with gr.TabItem("πŸ“š Research Materials"):
1073
  gr.Markdown("### πŸ‰ Feed the assistant with the research papers you want to analyze.")
1074
 
 
1086
  extract_button = gr.Button("Process Papers", variant="primary")
1087
 
1088
  extract_console_output = gr.Textbox(label="Logs / Console Output")
1089
+
1090
+ # Modify extract_research_papers to check for API key
1091
+ def extract_with_api_check(state_dict, paper_files, max_iterations, api_key_set):
1092
+ if not api_key_set:
1093
+ return "⚠️ Please set up your API key in the 'API Key Setup' tab first.", state_dict, False
1094
+ return extract_research_papers(state_dict, paper_files, max_iterations)
1095
 
1096
  extract_button.click(
1097
+ fn=extract_with_api_check,
1098
+ inputs=[state_dict, research_papers_files, max_iterations, api_key_set],
1099
  outputs=[extract_console_output, state_dict, extraction_successful]
1100
  )
1101
 
1102
+ # Rest of your tabs remain the same, but with API key checks for Q&A
1103
  with gr.TabItem("πŸ€– Q&A Chatbot"):
1104
  examples = """ℹ️ **Example Queries**
1105
  - Summarize the key findings from these papers.
 
1109
  gr.Markdown(examples)
1110
  user_query = gr.Textbox(label="Ask your research question", value="Identify the main gaps and suggest future work.", interactive=True)
1111
  button = gr.Button("Ask the Research Assistant πŸ”¬πŸ§ ", variant="primary")
1112
+
1113
+ # Replace the @gr.render with a proper output textbox
1114
+ qa_output = gr.Markdown(
1115
+ label="Research Assistant Response",
1116
+ value="### πŸ“ Upload papers and ask a question to get started.",
1117
+ elem_id="qa_output"
1118
+ )
 
1119
 
1120
  output_logs = gr.Textbox(label="Logs/ Console Output", lines=10)
1121
 
1122
+ def call_with_api_check(state_dict, user_query, api_key_set):
1123
+ """Wrapper to check API key before calling orchestrator."""
1124
+ if not API_KEY:
1125
+ error_msg = "⚠️ Please set up your API key in the 'API Key Setup' tab first."
1126
+ return error_msg, error_msg, state_dict
1127
+
1128
+ if not state_dict.get("research_papers"):
1129
+ error_msg = "### ❗️ No Research Papers Found\n\nπŸ‘ˆπŸ½ Please upload research papers in the 'πŸ“š Research Materials' tab first."
1130
+ return error_msg, error_msg, state_dict
1131
+
1132
+ try:
1133
+ logs, updated_state, success = call_orchestrator(state_dict, user_query)
1134
+
1135
+ if success and updated_state.get("final_answer"):
1136
+ final_answer = updated_state.get("final_answer")
1137
+ return final_answer, logs, updated_state
1138
+ else:
1139
+ error_msg = f"### ❗️ Processing Failed\n\n{logs}\n\nPlease check the logs above for details."
1140
+ return error_msg, logs, state_dict
1141
+
1142
+ except Exception as e:
1143
+ error_msg = f"### ❗️ An Error Occurred\n\n```\n{str(e)}\n```\n\nPlease check your API key and try again."
1144
+ return error_msg, f"Error: {str(e)}", state_dict
1145
+
1146
+ def reset_output():
1147
+ """Reset the output when starting a new query."""
1148
+ return "### πŸ€– Processing your request...\n\nPlease wait while the research assistant analyzes your papers and generates a response.", "Generating response..."
1149
 
1150
  button.click(
1151
+ fn=reset_output,
1152
+ outputs=[qa_output, output_logs]
 
1153
  ).then(
1154
+ fn=call_with_api_check,
1155
+ inputs=[state_dict, user_query, api_key_set],
1156
+ outputs=[qa_output, output_logs, state_dict]
1157
  )
1158
 
1159
  with gr.TabItem("πŸ”Ž What's under the hood?"):
key.txt DELETED
@@ -1,2 +0,0 @@
1
- eyJhbGciOiJIUzI1NiIsImtpZCI6IlV6SXJWd1h0dnprLVRvdzlLZWstc0M1akptWXBvX1VaVkxUZlpnMDRlOFUiLCJ0eXAiOiJKV1QifQ.eyJzdWIiOiJnaXRodWJ8Mzc1MzgyNTIiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyIsImlzcyI6ImFwaV9rZXlfaXNzdWVyIiwiYXVkIjpbImh0dHBzOi8vbmViaXVzLWluZmVyZW5jZS5ldS5hdXRoMC5jb20vYXBpL3YyLyJdLCJleHAiOjE5MDcyMjQ2NTcsInV1aWQiOiI0NGQyOGU3ZC0xMjRmLTQ1ZjgtYTczMS0yNWRmY2Q1NTkyZTgiLCJuYW1lIjoiTkVCSVVTX0tFWSIsImV4cGlyZXNfYXQiOiIyMDMwLTA2LTA5VDA4OjM3OjM3KzAwMDAifQ.djouDFHucL8mm3NVfDU0VTfsPjuDGx7LcOzWRu8isl8
2
- eyJhbGciOiJIUzI1NiIsImtpZCI6IlV6SXJWd1h0dnprLVRvdzlLZWstc0M1akptWXBvX1VaVkxUZlpnMDRlOFUiLCJ0eXAiOiJKV1QifQ.eyJzdWIiOiJnaXRodWJ8Mzc1MzgyNTIiLCJzY29wZSI6Im9wZW5pZCBvZmZsaW5lX2FjY2VzcyIsImlzcyI6ImFwaV9rZXlfaXNzdWVyIiwiYXVkIjpbImh0dHBzOi8vbmViaXVzLWluZmVyZW5jZS5ldS5hdXRoMC5jb20vYXBpL3YyLyJdLCJleHAiOjE5MDcyNDI2NzYsInV1aWQiOiIxY2NkMDBlMy01NmNkLTRlMGMtODE3MS1lZmQ5MWQ2NmI5YWMiLCJuYW1lIjoiTkVCSVVTX0tFWSIsImV4cGlyZXNfYXQiOiIyMDMwLTA2LTA5VDEzOjM3OjU2KzAwMDAifQ.Olq0zvLp8X-F9fmOKdcZjX0WpeHwmURQOagTQJt5Mts