aux_backup

Paused

App Files Files Community

harvesthealth commited on Jan 24

Commit

f7bd4df

verified ·

1 Parent(s): f5d996e

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +90 -89

app.py CHANGED Viewed

@@ -142,49 +142,27 @@ async def refresh_logs_ui():
 HELMHOLTZ_BASE_URL = "https://api.helmholtz-blablador.fz-juelich.de/v1"
-# Initialize LLMs (Helmholtz Blablador)
-# We use a placeholder if the API key is missing to allow the app to start and show status.
-api_key = os.environ.get("BLABLADOR_API_KEY") or "EMPTY_KEY"
-chat_llm = ChatOpenAI(
-    model="alias-fast",
-    base_url=HELMHOLTZ_BASE_URL,
-    api_key=api_key,
-    max_tokens=2048,
-    max_retries=0
-)
-code_llm = ChatOpenAI(
-    model="alias-fast",
-    base_url=HELMHOLTZ_BASE_URL,
-    api_key=api_key,
-    max_tokens=1024,
-    max_retries=0
-)
-fast_llm = ChatOpenAI(
-    model="alias-fast",
-    base_url=HELMHOLTZ_BASE_URL,
-    api_key=api_key,
-    max_tokens=2048,
-    max_retries=0
-)
-huge_llm = ChatOpenAI(
-    model="alias-huge",
-    base_url=HELMHOLTZ_BASE_URL,
-    api_key=api_key,
-    max_tokens=2048,
-    max_retries=0
-)
-large_llm = ChatOpenAI(
-    model="alias-large",
-    base_url=HELMHOLTZ_BASE_URL,
-    api_key=api_key,
-    max_tokens=2048,
-    max_retries=0
-)
 async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
     """Wrapper for LLM calls with retry logic to handle intermittent provider errors."""
@@ -206,8 +184,41 @@ async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
 checkpointer = MemorySaver()
 long_term_store = InMemoryStore()
-# Global MCP Client to avoid process leaks
-GLOBAL_MCP_CLIENT = None
 # Global dictionary to store active sessions
 ACTIVE_SESSIONS = {}
@@ -277,7 +288,9 @@ async def update_memory(store, namespace, messages, current_profile):
     try:
         logger.info(f"Updating memory for {namespace}...")
-        response = await ainvoke_with_retry(huge_llm, prompt)
         content = response.content
         # Clean up <think> tags if present
@@ -480,7 +493,6 @@ async def sequential_thinking_tool(thought: str = "", nextThoughtNeeded: bool =
     return f"Error: {last_err}"
 async def get_all_tools(include_jules=True):
-    global GLOBAL_MCP_CLIENT
     # 1. Custom Tools
     custom_tools = [
         Tool(
@@ -501,42 +513,18 @@ async def get_all_tools(include_jules=True):
         sequential_thinking_tool
     ]
-    # 2. MCP Tools using MultiServerMCPClient
-    if GLOBAL_MCP_CLIENT is None:
-        github_token = get_github_token()
-        mcp_env = os.environ.copy()
-        if github_token:
-            mcp_env["GITHUB_TOKEN"] = github_token
-            logger.info("Explicitly set GITHUB_TOKEN for MCP environment.")
-        mcp_config = {
-            "jules": {
-                "transport": "stdio",
-                "command": "python3",
-                "args": ["mcp/mcp_jules.py"],
-                "env": mcp_env
-            },
-            "github": {
-                "transport": "stdio",
-                "command": "npx",
-                "args": ["-y", "@modelcontextprotocol/server-github"],
-                "env": mcp_env
-            }
-        }
-        try:
-            logger.info("Initializing GLOBAL MCP client...")
-            GLOBAL_MCP_CLIENT = MultiServerMCPClient(mcp_config)
-            # Short wait to let servers start
-            await asyncio.sleep(1)
-        except Exception as e:
-            logger.error(f"Failed to initialize MCP client: {e}")
-            return custom_tools
     try:
-        mcp_tools = await GLOBAL_MCP_CLIENT.get_tools()
-        logger.info(f"Successfully retrieved {len(mcp_tools)} tools from GLOBAL MCP client.")
         if not include_jules:
-            # Filter out Jules tools
             jules_tool_names = [
                 "list_sources", "get_source", "create_session", "list_sessions",
                 "get_session", "sendMessage", "approve_plan", "list_activities",
@@ -664,8 +652,11 @@ async def agent_node(state: State, config: Any, store: BaseStore):
     final_content = ""
     # Note: StateGraph nodes don't easily support yielding directly to Gradio history
     # without a custom streaming implementation. We'll handle streaming in handle_chat.
-    async for content_chunk, _ in run_manual_agent(chat_llm, tools, state["messages"], system_message, persona=persona):
         final_content = content_chunk
         # In a standard graph, we can't easily stream out of a node to a global history
         # unless we use a side-channel or just wait for completion.
@@ -778,14 +769,15 @@ async def handle_chat(message, history, persona="planning", readme_content=""):
     all_tools = await get_all_tools(include_jules=False)
     # Selection of LLM and Tools based on Persona
-    current_llm = chat_llm
     if persona == "mentor":
-        # Mentor uses alias-fast as requested
-        current_llm = fast_llm
         tools = all_tools
     elif persona == "planning":
-        # Planning session agent only gets sequential thinking
-        current_llm = chat_llm
         tools = [t for t in all_tools if t.name == "sequentialthinking"]
         logger.info(f"Persona {persona} detected: Filtering tools to only sequentialthinking.")
     else:
@@ -876,7 +868,12 @@ curl -N \
     }
     extracted = {}
-    extraction_llm = huge_llm
     for key, instruction in fields_config.items():
         try:
@@ -1046,8 +1043,10 @@ async def handle_github_prep(desc, tasks, repos, expect, api, hf_profile, hf_spa
         3. Tips for achieving the best results.
         Format it in Markdown."""
-        agents_response = await ainvoke_with_retry(code_llm, agents_md_prompt)
         mandatory_instruction = """
 ## important!
@@ -1164,8 +1163,10 @@ async def handle_supervisor_nudge(session_id, log_file=""):
         "Identify if Jules is stuck or needs specific implementation advice. "
         "Respond with the EXACT message you want to send to Jules."
     )
-    response = await ainvoke_with_retry(fast_llm, [
         {"role": "system", "content": system_msg},
         {"role": "user", "content": prompt}
     ])

 HELMHOLTZ_BASE_URL = "https://api.helmholtz-blablador.fz-juelich.de/v1"
+_llm_clients = {}
+def get_llm(model_alias: str):
+    if model_alias in _llm_clients:
+        return _llm_clients[model_alias]
+    api_key = os.environ.get("BLABLADOR_API_KEY")
+    if not api_key or api_key == "EMPTY_KEY":
+        logger.error("BLABLADOR_API_KEY is not set. Please add it to your Space secrets.")
+        return None
+    max_tokens = 1024 if "code" in model_alias else 2048
+    llm = ChatOpenAI(
+        model=model_alias,
+        base_url=HELMHOLTZ_BASE_URL,
+        api_key=api_key,
+        max_tokens=max_tokens,
+        max_retries=0
+    )
+    _llm_clients[model_alias] = llm
+    return llm
 async def ainvoke_with_retry(llm, prompt, max_retries=5, delay=30):
     """Wrapper for LLM calls with retry logic to handle intermittent provider errors."""
 checkpointer = MemorySaver()
 long_term_store = InMemoryStore()
+_mcp_client = None
+def get_mcp_client():
+    global _mcp_client
+    if _mcp_client:
+        return _mcp_client
+    github_token = get_github_token()
+    mcp_env = os.environ.copy()
+    if github_token:
+        mcp_env["GITHUB_TOKEN"] = github_token
+        logger.info("Explicitly set GITHUB_TOKEN for MCP environment.")
+    mcp_config = {
+        "jules": {
+            "transport": "stdio",
+            "command": "python3",
+            "args": ["mcp/mcp_jules.py"],
+            "env": mcp_env
+        },
+        "github": {
+            "transport": "stdio",
+            "command": "npx",
+            "args": ["-y", "@modelcontextprotocol/server-github"],
+            "env": mcp_env
+        }
+    }
+    try:
+        logger.info("Initializing GLOBAL MCP client...")
+        _mcp_client = MultiServerMCPClient(mcp_config)
+        # Short wait to let servers start
+        # await asyncio.sleep(1) # Cannot do async in sync function
+    except Exception as e:
+        logger.error(f"Failed to initialize MCP client: {e}")
+        return None
+    return _mcp_client
 # Global dictionary to store active sessions
 ACTIVE_SESSIONS = {}
     try:
         logger.info(f"Updating memory for {namespace}...")
+        llm = get_llm("alias-huge")
+        if not llm: return current_profile
+        response = await ainvoke_with_retry(llm, prompt)
         content = response.content
         # Clean up <think> tags if present
     return f"Error: {last_err}"
 async def get_all_tools(include_jules=True):
     # 1. Custom Tools
     custom_tools = [
         Tool(
         sequential_thinking_tool
     ]
+    mcp_client = get_mcp_client()
+    if not mcp_client:
+        logger.error("MCP client could not be initialized. Returning only custom tools.")
+        return custom_tools
+    # A short delay might still be beneficial for servers to start on first call
+    await asyncio.sleep(0.5)
     try:
+        mcp_tools = await mcp_client.get_tools()
+        logger.info(f"Successfully retrieved {len(mcp_tools)} tools from MCP client.")
         if not include_jules:
             jules_tool_names = [
                 "list_sources", "get_source", "create_session", "list_sessions",
                 "get_session", "sendMessage", "approve_plan", "list_activities",
     final_content = ""
     # Note: StateGraph nodes don't easily support yielding directly to Gradio history
     # without a custom streaming implementation. We'll handle streaming in handle_chat.
+    llm = get_llm("alias-fast")
+    if not llm:
+        return {"messages": [AIMessage(content="LLM client is not available. Check API keys.")]}
+    async for content_chunk, _ in run_manual_agent(llm, tools, state["messages"], system_message, persona=persona):
         final_content = content_chunk
         # In a standard graph, we can't easily stream out of a node to a global history
         # unless we use a side-channel or just wait for completion.
     all_tools = await get_all_tools(include_jules=False)
     # Selection of LLM and Tools based on Persona
+    model_alias = "alias-fast" if persona == "mentor" else "alias-fast"
+    current_llm = get_llm(model_alias)
+    if not current_llm:
+        yield history + [{"role": "assistant", "content": f"LLM '{model_alias}' is not available. Check API keys."}]
+        return
     if persona == "mentor":
         tools = all_tools
     elif persona == "planning":
         tools = [t for t in all_tools if t.name == "sequentialthinking"]
         logger.info(f"Persona {persona} detected: Filtering tools to only sequentialthinking.")
     else:
     }
     extracted = {}
+    extraction_llm = get_llm("alias-huge")
+    if not extraction_llm:
+        # Gracefully handle missing LLM
+        error_message = "LLM client 'alias-huge' is not available. Check API keys."
+        folders = get_ideation_logs()
+        return (gr.update(visible=False), gr.update(visible=True), error_message, "", "", "", "", "", "", "", error_message, gr.update(choices=folders), gr.update(choices=folders), error_message, "", "", "", "", "", "", "")
     for key, instruction in fields_config.items():
         try:
         3. Tips for achieving the best results.
         Format it in Markdown."""
+        llm = get_llm("alias-fast")
+        if not llm: return "Could not generate AGENTS.md content; LLM not available."
+        agents_response = await ainvoke_with_retry(llm, agents_md_prompt)
         mandatory_instruction = """
 ## important!
         "Identify if Jules is stuck or needs specific implementation advice. "
         "Respond with the EXACT message you want to send to Jules."
     )
+    llm = get_llm("alias-fast")
+    if not llm: return "Supervisor LLM not available."
+    response = await ainvoke_with_retry(llm, [
         {"role": "system", "content": system_msg},
         {"role": "user", "content": prompt}
     ])