Spaces:

Pulastya0
/

Data-Science-Agent

Running

App Files Files Community

Pulastya B commited on Jan 31

Commit

e29cf28

1 Parent(s): a8b0cca

Fixed slow Agent loading which caused the Agent took too long to respond

Browse files

Files changed (2) hide show

src/api/app.py +56 -51
src/orchestrator.py +4 -3

src/api/app.py CHANGED Viewed

@@ -28,6 +28,7 @@ import numpy as np
 # Import from parent package
 from src.orchestrator import DataScienceCopilot
 from src.progress_manager import progress_manager
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -78,10 +79,6 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Initialize agent once (singleton pattern for stateless service)
-# Agent itself is stateless - no conversation memory between requests
-agent: Optional[DataScienceCopilot] = None
 # SSE event queues for real-time streaming
 class ProgressEventManager:
     """Manages SSE connections and progress events for real-time updates."""
@@ -151,69 +148,80 @@ class ProgressEventManager:
         if session_id in self.session_status:
             del self.session_status[session_id]
-# 👥 MULTI-USER SUPPORT: Per-session agent instances
-# Instead of one global agent, create isolated instances per session
-# This prevents users from interfering with each other's workflows
-agent_cache: Dict[str, DataScienceCopilot] = {}  # session_id -> agent instance
 agent_cache_lock = asyncio.Lock()
-MAX_CACHED_AGENTS = 10  # Limit memory usage
-logger.info("👥 Multi-user agent cache initialized")
-# Legacy global agent for backward compatibility (will be deprecated)
 agent = None
-# 👥 MULTI-USER SUPPORT: Per-session agent instances
-# Instead of one global agent, create isolated instances per session
-# This prevents users from interfering with each other's workflows
-agent_cache: Dict[str, DataScienceCopilot] = {}  # session_id -> agent instance
-agent_cache_lock = asyncio.Lock()
-MAX_CACHED_AGENTS = 10  # Limit memory usage
-logger.info("👥 Multi-user agent cache initialized")
-# Legacy global agent for backward compatibility (will be deprecated)
-agent = None
 async def get_agent_for_session(session_id: str) -> DataScienceCopilot:
     """
-    Get or create an isolated agent instance for a session.
-    This ensures each user gets their own agent with isolated state,
-    preventing session collisions and race conditions.
     Args:
         session_id: Unique session identifier
     Returns:
-        DataScienceCopilot instance for this session
     """
     async with agent_cache_lock:
-        # Return existing agent if cached
-        if session_id in agent_cache:
-            logger.info(f"[♻️] Reusing cached agent for session {session_id[:8]}...")
-            return agent_cache[session_id]
-        # Create new agent instance
-        logger.info(f"[🆕] Creating new agent for session {session_id[:8]}...")
-        provider = os.getenv("LLM_PROVIDER", "mistral")
-        new_agent = DataScienceCopilot(
-            reasoning_effort="medium",
-            provider=provider,
-            use_compact_prompts=False,  # Multi-agent architecture
-            session_id=session_id  # Pass session_id for isolation
-        )
         # Cache management: Remove oldest if cache is full
-        if len(agent_cache) >= MAX_CACHED_AGENTS:
-            oldest_session = next(iter(agent_cache))
             logger.info(f"[🗑️] Cache full, removing session {oldest_session[:8]}...")
-            del agent_cache[oldest_session]
-        agent_cache[session_id] = new_agent
-        logger.info(f"✅ Agent created for session {session_id[:8]} (cache: {len(agent_cache)}/{MAX_CACHED_AGENTS})")
-        return new_agent
 # 🔒 REQUEST QUEUING: Global lock to prevent concurrent workflows
 # This ensures only one analysis runs at a time, preventing:
@@ -460,15 +468,12 @@ async def run_analysis_async(
         logger.info(f"[ASYNC] File saved: {file.filename}")
     else:
-        # 🛡️ VALIDATION: For follow-up queries, check if any cached agent has dataset
-        # Note: In true multi-user setup, you'd need session_id from frontend to match exact session
         has_dataset = False
         async with agent_cache_lock:
-            for cached_agent in agent_cache.values():
-                if hasattr(cached_agent, 'session') and cached_agent.session and cached_agent.session.last_dataset:
-                    has_dataset = True
-                    logger.info(f"[ASYNC] Follow-up query using cached session data")
-                    break
         if not has_dataset:
             logger.warning("[ASYNC] No file uploaded and no session dataset available")

 # Import from parent package
 from src.orchestrator import DataScienceCopilot
 from src.progress_manager import progress_manager
+from src.session_memory import SessionMemory
 # Configure logging
 logging.basicConfig(level=logging.INFO)
     allow_headers=["*"],
 )
 # SSE event queues for real-time streaming
 class ProgressEventManager:
     """Manages SSE connections and progress events for real-time updates."""
         if session_id in self.session_status:
             del self.session_status[session_id]
+# 👥 MULTI-USER SUPPORT: Session state isolation
+# Heavy components (SBERT, tools, LLM client) are shared via global 'agent'
+# Only session memory is isolated per user for fast initialization
+session_states: Dict[str, Any] = {}  # session_id -> SessionMemory
 agent_cache_lock = asyncio.Lock()
+MAX_CACHED_AGENTS = 10  # Limit memory usage (session states are lightweight)
+logger.info("👥 Multi-user session isolation initialized (fast mode)")
+# Global agent - Heavy components loaded ONCE at startup
+# SBERT model, tool functions, LLM client are shared across all users
+agent: Optional[DataScienceCopilot] = None
 agent = None
+# Session state isolation (lightweight - just session memory)
+session_states: Dict[str, any] = {}  # session_id -> session memory only
 async def get_agent_for_session(session_id: str) -> DataScienceCopilot:
     """
+    Get agent with isolated session state.
+    OPTIMIZATION: Instead of creating a full new agent per session (slow!),
+    we reuse the global agent but swap session memory per request.
+    Heavy components (SBERT, tools, LLM client) are shared.
+    This reduces per-user initialization from 20s to <1s.
     Args:
         session_id: Unique session identifier
     Returns:
+        DataScienceCopilot instance with isolated session for this user
     """
+    global agent
     async with agent_cache_lock:
+        # Ensure base agent exists (heavy components loaded once at startup)
+        if agent is None:
+            logger.warning("Base agent not initialized - this shouldn't happen after startup")
+            provider = os.getenv("LLM_PROVIDER", "mistral")
+            agent = DataScienceCopilot(
+                reasoning_effort="medium",
+                provider=provider,
+                use_compact_prompts=False
+            )
+        # Check if we have cached session memory for this session
+        if session_id in session_states:
+            logger.info(f"[♻️] Reusing session state for {session_id[:8]}...")
+            agent.session = session_states[session_id]
+            agent.http_session_key = session_id
+            return agent
+        # 🚀 FAST PATH: Create new session memory only (no SBERT reload!)
+        logger.info(f"[🆕] Creating lightweight session for {session_id[:8]}...")
+        # Create isolated session memory for this user
+        new_session = SessionMemory(session_id=session_id)
+        # Cache session memory (lightweight)
         # Cache management: Remove oldest if cache is full
+        if len(session_states) >= MAX_CACHED_AGENTS:
+            oldest_session = next(iter(session_states))
             logger.info(f"[🗑️] Cache full, removing session {oldest_session[:8]}...")
+            del session_states[oldest_session]
+        session_states[session_id] = new_session
+        # Set session on shared agent
+        agent.session = new_session
+        agent.http_session_key = session_id
+        logger.info(f"✅ Session created for {session_id[:8]} (cache: {len(session_states)}/{MAX_CACHED_AGENTS}) - <1s init")
+        return agent
 # 🔒 REQUEST QUEUING: Global lock to prevent concurrent workflows
 # This ensures only one analysis runs at a time, preventing:
         logger.info(f"[ASYNC] File saved: {file.filename}")
     else:
+        # 🛡️ VALIDATION: Check if agent's current session has dataset
         has_dataset = False
         async with agent_cache_lock:
+            if agent and hasattr(agent, 'session') and agent.session and hasattr(agent.session, 'last_dataset') and agent.session.last_dataset:
+                has_dataset = True
+                logger.info(f"[ASYNC] Follow-up query using session data")
         if not has_dataset:
             logger.warning("[ASYNC] No file uploaded and no session dataset available")

src/orchestrator.py CHANGED Viewed

@@ -269,8 +269,9 @@ class DataScienceCopilot:
         max_context = provider_max_tokens.get(self.provider, 128000)
         self.token_manager = get_token_manager(model=self.model, max_tokens=max_context)
-        # ⚡ Initialize parallel executor
-        self.parallel_executor = get_parallel_executor()
         # 🧠 Initialize session memory
         self.use_session_memory = use_session_memory
@@ -3438,7 +3439,7 @@ You receive quality reports from EDA agent and deliver clean data to modeling ag
                         print(f"   These will run SEQUENTIALLY to prevent resource exhaustion")
                         print(f"   Heavy tools: {', '.join(heavy_tools)}")
                         # Fall through to sequential execution
-                    elif len(tool_executions) > 1 and len(heavy_tools) <= 1:
                         try:
                             results = asyncio.run(self.parallel_executor.execute_all(
                                 tool_executions=tool_executions,

         max_context = provider_max_tokens.get(self.provider, 128000)
         self.token_manager = get_token_manager(model=self.model, max_tokens=max_context)
+        # ⚡ Parallel executor DISABLED - running tools sequentially for stability
+        # self.parallel_executor = get_parallel_executor()
+        self.parallel_executor = None  # Disabled for scale optimization
         # 🧠 Initialize session memory
         self.use_session_memory = use_session_memory
                         print(f"   These will run SEQUENTIALLY to prevent resource exhaustion")
                         print(f"   Heavy tools: {', '.join(heavy_tools)}")
                         # Fall through to sequential execution
+                    elif len(tool_executions) > 1 and len(heavy_tools) <= 1 and self.parallel_executor is not None:
                         try:
                             results = asyncio.run(self.parallel_executor.execute_all(
                                 tool_executions=tool_executions,