harvesthealth commited on
Commit
f7bd4df
·
verified ·
1 Parent(s): f5d996e

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +90 -89
app.py CHANGED
@@ -142,49 +142,27 @@ async def refresh_logs_ui():
142
 
143
  HELMHOLTZ_BASE_URL = "https://api.helmholtz-blablador.fz-juelich.de/v1"
144
 
145
- # Initialize LLMs (Helmholtz Blablador)
146
- # We use a placeholder if the API key is missing to allow the app to start and show status.
147
- api_key = os.environ.get("BLABLADOR_API_KEY") or "EMPTY_KEY"
148
-
149
- chat_llm = ChatOpenAI(
150
- model="alias-fast",
151
- base_url=HELMHOLTZ_BASE_URL,
152
- api_key=api_key,
153
- max_tokens=2048,
154
- max_retries=0
155
- )
156
-
157
- code_llm = ChatOpenAI(
158
- model="alias-fast",
159
- base_url=HELMHOLTZ_BASE_URL,
160
- api_key=api_key,
161
- max_tokens=1024,
162
- max_retries=0
163
- )
164
-
165
- fast_llm = ChatOpenAI(
166
- model="alias-fast",
167
- base_url=HELMHOLTZ_BASE_URL,
168
- api_key=api_key,
169
- max_tokens=2048,
170
- max_retries=0
171
- )
172
-
173
- huge_llm = ChatOpenAI(
174
- model="alias-huge",
175
- base_url=HELMHOLTZ_BASE_URL,
176
- api_key=api_key,
177
- max_tokens=2048,
178
- max_retries=0
179
- )
180
 
181
- large_llm = ChatOpenAI(
182
- model="alias-large",
183
- base_url=HELMHOLTZ_BASE_URL,
184
- api_key=api_key,
185
- max_tokens=2048,
186
- max_retries=0
187
- )
 
 
 
 
188
 
189
  async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
190
  """Wrapper for LLM calls with retry logic to handle intermittent provider errors."""
@@ -206,8 +184,41 @@ async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
206
  checkpointer = MemorySaver()
207
  long_term_store = InMemoryStore()
208
 
209
- # Global MCP Client to avoid process leaks
210
- GLOBAL_MCP_CLIENT = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  # Global dictionary to store active sessions
213
  ACTIVE_SESSIONS = {}
@@ -277,7 +288,9 @@ async def update_memory(store, namespace, messages, current_profile):
277
 
278
  try:
279
  logger.info(f"Updating memory for {namespace}...")
280
- response = await ainvoke_with_retry(huge_llm, prompt)
 
 
281
  content = response.content
282
 
283
  # Clean up <think> tags if present
@@ -480,7 +493,6 @@ async def sequential_thinking_tool(thought: str = "", nextThoughtNeeded: bool =
480
  return f"Error: {last_err}"
481
 
482
  async def get_all_tools(include_jules=True):
483
- global GLOBAL_MCP_CLIENT
484
  # 1. Custom Tools
485
  custom_tools = [
486
  Tool(
@@ -501,42 +513,18 @@ async def get_all_tools(include_jules=True):
501
  sequential_thinking_tool
502
  ]
503
 
504
- # 2. MCP Tools using MultiServerMCPClient
505
- if GLOBAL_MCP_CLIENT is None:
506
- github_token = get_github_token()
507
- mcp_env = os.environ.copy()
508
- if github_token:
509
- mcp_env["GITHUB_TOKEN"] = github_token
510
- logger.info("Explicitly set GITHUB_TOKEN for MCP environment.")
511
-
512
- mcp_config = {
513
- "jules": {
514
- "transport": "stdio",
515
- "command": "python3",
516
- "args": ["mcp/mcp_jules.py"],
517
- "env": mcp_env
518
- },
519
- "github": {
520
- "transport": "stdio",
521
- "command": "npx",
522
- "args": ["-y", "@modelcontextprotocol/server-github"],
523
- "env": mcp_env
524
- }
525
- }
526
- try:
527
- logger.info("Initializing GLOBAL MCP client...")
528
- GLOBAL_MCP_CLIENT = MultiServerMCPClient(mcp_config)
529
- # Short wait to let servers start
530
- await asyncio.sleep(1)
531
- except Exception as e:
532
- logger.error(f"Failed to initialize MCP client: {e}")
533
- return custom_tools
534
 
535
  try:
536
- mcp_tools = await GLOBAL_MCP_CLIENT.get_tools()
537
- logger.info(f"Successfully retrieved {len(mcp_tools)} tools from GLOBAL MCP client.")
538
  if not include_jules:
539
- # Filter out Jules tools
540
  jules_tool_names = [
541
  "list_sources", "get_source", "create_session", "list_sessions",
542
  "get_session", "sendMessage", "approve_plan", "list_activities",
@@ -664,8 +652,11 @@ async def agent_node(state: State, config: Any, store: BaseStore):
664
  final_content = ""
665
  # Note: StateGraph nodes don't easily support yielding directly to Gradio history
666
  # without a custom streaming implementation. We'll handle streaming in handle_chat.
 
 
 
667
 
668
- async for content_chunk, _ in run_manual_agent(chat_llm, tools, state["messages"], system_message, persona=persona):
669
  final_content = content_chunk
670
  # In a standard graph, we can't easily stream out of a node to a global history
671
  # unless we use a side-channel or just wait for completion.
@@ -778,14 +769,15 @@ async def handle_chat(message, history, persona="planning", readme_content=""):
778
  all_tools = await get_all_tools(include_jules=False)
779
 
780
  # Selection of LLM and Tools based on Persona
781
- current_llm = chat_llm
 
 
 
 
 
782
  if persona == "mentor":
783
- # Mentor uses alias-fast as requested
784
- current_llm = fast_llm
785
  tools = all_tools
786
  elif persona == "planning":
787
- # Planning session agent only gets sequential thinking
788
- current_llm = chat_llm
789
  tools = [t for t in all_tools if t.name == "sequentialthinking"]
790
  logger.info(f"Persona {persona} detected: Filtering tools to only sequentialthinking.")
791
  else:
@@ -876,7 +868,12 @@ curl -N \
876
  }
877
 
878
  extracted = {}
879
- extraction_llm = huge_llm
 
 
 
 
 
880
 
881
  for key, instruction in fields_config.items():
882
  try:
@@ -1046,8 +1043,10 @@ async def handle_github_prep(desc, tasks, repos, expect, api, hf_profile, hf_spa
1046
  3. Tips for achieving the best results.
1047
 
1048
  Format it in Markdown."""
1049
-
1050
- agents_response = await ainvoke_with_retry(code_llm, agents_md_prompt)
 
 
1051
 
1052
  mandatory_instruction = """
1053
  ## important!
@@ -1164,8 +1163,10 @@ async def handle_supervisor_nudge(session_id, log_file=""):
1164
  "Identify if Jules is stuck or needs specific implementation advice. "
1165
  "Respond with the EXACT message you want to send to Jules."
1166
  )
 
 
1167
 
1168
- response = await ainvoke_with_retry(fast_llm, [
1169
  {"role": "system", "content": system_msg},
1170
  {"role": "user", "content": prompt}
1171
  ])
 
142
 
143
  HELMHOLTZ_BASE_URL = "https://api.helmholtz-blablador.fz-juelich.de/v1"
144
 
145
+ _llm_clients = {}
146
+ def get_llm(model_alias: str):
147
+ if model_alias in _llm_clients:
148
+ return _llm_clients[model_alias]
149
+
150
+ api_key = os.environ.get("BLABLADOR_API_KEY")
151
+ if not api_key or api_key == "EMPTY_KEY":
152
+ logger.error("BLABLADOR_API_KEY is not set. Please add it to your Space secrets.")
153
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ max_tokens = 1024 if "code" in model_alias else 2048
156
+
157
+ llm = ChatOpenAI(
158
+ model=model_alias,
159
+ base_url=HELMHOLTZ_BASE_URL,
160
+ api_key=api_key,
161
+ max_tokens=max_tokens,
162
+ max_retries=0
163
+ )
164
+ _llm_clients[model_alias] = llm
165
+ return llm
166
 
167
  async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
168
  """Wrapper for LLM calls with retry logic to handle intermittent provider errors."""
 
184
  checkpointer = MemorySaver()
185
  long_term_store = InMemoryStore()
186
 
187
+ _mcp_client = None
188
+ def get_mcp_client():
189
+ global _mcp_client
190
+ if _mcp_client:
191
+ return _mcp_client
192
+
193
+ github_token = get_github_token()
194
+ mcp_env = os.environ.copy()
195
+ if github_token:
196
+ mcp_env["GITHUB_TOKEN"] = github_token
197
+ logger.info("Explicitly set GITHUB_TOKEN for MCP environment.")
198
+
199
+ mcp_config = {
200
+ "jules": {
201
+ "transport": "stdio",
202
+ "command": "python3",
203
+ "args": ["mcp/mcp_jules.py"],
204
+ "env": mcp_env
205
+ },
206
+ "github": {
207
+ "transport": "stdio",
208
+ "command": "npx",
209
+ "args": ["-y", "@modelcontextprotocol/server-github"],
210
+ "env": mcp_env
211
+ }
212
+ }
213
+ try:
214
+ logger.info("Initializing GLOBAL MCP client...")
215
+ _mcp_client = MultiServerMCPClient(mcp_config)
216
+ # Short wait to let servers start
217
+ # await asyncio.sleep(1) # Cannot do async in sync function
218
+ except Exception as e:
219
+ logger.error(f"Failed to initialize MCP client: {e}")
220
+ return None
221
+ return _mcp_client
222
 
223
  # Global dictionary to store active sessions
224
  ACTIVE_SESSIONS = {}
 
288
 
289
  try:
290
  logger.info(f"Updating memory for {namespace}...")
291
+ llm = get_llm("alias-huge")
292
+ if not llm: return current_profile
293
+ response = await ainvoke_with_retry(llm, prompt)
294
  content = response.content
295
 
296
  # Clean up <think> tags if present
 
493
  return f"Error: {last_err}"
494
 
495
  async def get_all_tools(include_jules=True):
 
496
  # 1. Custom Tools
497
  custom_tools = [
498
  Tool(
 
513
  sequential_thinking_tool
514
  ]
515
 
516
+ mcp_client = get_mcp_client()
517
+ if not mcp_client:
518
+ logger.error("MCP client could not be initialized. Returning only custom tools.")
519
+ return custom_tools
520
+
521
+ # A short delay might still be beneficial for servers to start on first call
522
+ await asyncio.sleep(0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
 
524
  try:
525
+ mcp_tools = await mcp_client.get_tools()
526
+ logger.info(f"Successfully retrieved {len(mcp_tools)} tools from MCP client.")
527
  if not include_jules:
 
528
  jules_tool_names = [
529
  "list_sources", "get_source", "create_session", "list_sessions",
530
  "get_session", "sendMessage", "approve_plan", "list_activities",
 
652
  final_content = ""
653
  # Note: StateGraph nodes don't easily support yielding directly to Gradio history
654
  # without a custom streaming implementation. We'll handle streaming in handle_chat.
655
+ llm = get_llm("alias-fast")
656
+ if not llm:
657
+ return {"messages": [AIMessage(content="LLM client is not available. Check API keys.")]}
658
 
659
+ async for content_chunk, _ in run_manual_agent(llm, tools, state["messages"], system_message, persona=persona):
660
  final_content = content_chunk
661
  # In a standard graph, we can't easily stream out of a node to a global history
662
  # unless we use a side-channel or just wait for completion.
 
769
  all_tools = await get_all_tools(include_jules=False)
770
 
771
  # Selection of LLM and Tools based on Persona
772
+ model_alias = "alias-fast" if persona == "mentor" else "alias-fast"
773
+ current_llm = get_llm(model_alias)
774
+ if not current_llm:
775
+ yield history + [{"role": "assistant", "content": f"LLM '{model_alias}' is not available. Check API keys."}]
776
+ return
777
+
778
  if persona == "mentor":
 
 
779
  tools = all_tools
780
  elif persona == "planning":
 
 
781
  tools = [t for t in all_tools if t.name == "sequentialthinking"]
782
  logger.info(f"Persona {persona} detected: Filtering tools to only sequentialthinking.")
783
  else:
 
868
  }
869
 
870
  extracted = {}
871
+ extraction_llm = get_llm("alias-huge")
872
+ if not extraction_llm:
873
+ # Gracefully handle missing LLM
874
+ error_message = "LLM client 'alias-huge' is not available. Check API keys."
875
+ folders = get_ideation_logs()
876
+ return (gr.update(visible=False), gr.update(visible=True), error_message, "", "", "", "", "", "", "", error_message, gr.update(choices=folders), gr.update(choices=folders), error_message, "", "", "", "", "", "", "")
877
 
878
  for key, instruction in fields_config.items():
879
  try:
 
1043
  3. Tips for achieving the best results.
1044
 
1045
  Format it in Markdown."""
1046
+
1047
+ llm = get_llm("alias-fast")
1048
+ if not llm: return "Could not generate AGENTS.md content; LLM not available."
1049
+ agents_response = await ainvoke_with_retry(llm, agents_md_prompt)
1050
 
1051
  mandatory_instruction = """
1052
  ## important!
 
1163
  "Identify if Jules is stuck or needs specific implementation advice. "
1164
  "Respond with the EXACT message you want to send to Jules."
1165
  )
1166
+ llm = get_llm("alias-fast")
1167
+ if not llm: return "Supervisor LLM not available."
1168
 
1169
+ response = await ainvoke_with_retry(llm, [
1170
  {"role": "system", "content": system_msg},
1171
  {"role": "user", "content": prompt}
1172
  ])