Commit
Β·
cc5dfc8
1
Parent(s):
e6163d4
fix(perf): Implement P2 Phases 2 & 3 (Pre-warming + Gradio Progress)
Browse files- docs/bugs/ACTIVE_BUGS.md +7 -4
- docs/bugs/P2_ADVANCED_MODE_COLD_START_NO_FEEDBACK.md +4 -4
- src/app.py +63 -25
- src/utils/service_loader.py +23 -0
docs/bugs/ACTIVE_BUGS.md
CHANGED
|
@@ -13,22 +13,25 @@ _No active P0 bugs._
|
|
| 13 |
|
| 14 |
## P2 - UX Friction
|
| 15 |
|
| 16 |
-
### P2 - Advanced Mode Cold Start Has No User Feedback (
|
| 17 |
**File:** `docs/bugs/P2_ADVANCED_MODE_COLD_START_NO_FEEDBACK.md`
|
| 18 |
**Issue:** [#108](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/108)
|
| 19 |
**Found:** 2025-12-01 (Gradio Testing)
|
| 20 |
|
| 21 |
**Problem:** Three "dead zones" with no visual feedback during Advanced Mode startup:
|
| 22 |
1. **Dead Zone #1** (5-15s): Between STARTED β THINKING β
FIXED (granular events)
|
| 23 |
-
2. **Dead Zone #2** (10-30s): Between THINKING β PROGRESS (first LLM call)
|
| 24 |
-
3. **Dead Zone #3** (30-90s): After PROGRESS (SearchAgent executing)
|
| 25 |
|
| 26 |
**Phase 1 Fix (commit dbf888c):**
|
| 27 |
- Added granular progress events during initialization
|
| 28 |
- Users now see "Loading embedding service...", "Initializing research memory...", "Building agent team..."
|
| 29 |
- Significantly improves perceived responsiveness
|
| 30 |
|
| 31 |
-
**
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
---
|
| 34 |
|
|
|
|
| 13 |
|
| 14 |
## P2 - UX Friction
|
| 15 |
|
| 16 |
+
### P2 - Advanced Mode Cold Start Has No User Feedback (β
FIXED)
|
| 17 |
**File:** `docs/bugs/P2_ADVANCED_MODE_COLD_START_NO_FEEDBACK.md`
|
| 18 |
**Issue:** [#108](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/108)
|
| 19 |
**Found:** 2025-12-01 (Gradio Testing)
|
| 20 |
|
| 21 |
**Problem:** Three "dead zones" with no visual feedback during Advanced Mode startup:
|
| 22 |
1. **Dead Zone #1** (5-15s): Between STARTED β THINKING β
FIXED (granular events)
|
| 23 |
+
2. **Dead Zone #2** (10-30s): Between THINKING β PROGRESS (first LLM call) β
FIXED (Progress Bar)
|
| 24 |
+
3. **Dead Zone #3** (30-90s): After PROGRESS (SearchAgent executing) β
FIXED (Pre-warming + Progress Bar)
|
| 25 |
|
| 26 |
**Phase 1 Fix (commit dbf888c):**
|
| 27 |
- Added granular progress events during initialization
|
| 28 |
- Users now see "Loading embedding service...", "Initializing research memory...", "Building agent team..."
|
| 29 |
- Significantly improves perceived responsiveness
|
| 30 |
|
| 31 |
+
**Phase 2/3 Fix (Latest):**
|
| 32 |
+
- Implemented service pre-warming (`service_loader.warmup_services`)
|
| 33 |
+
- Added native Gradio progress bar (`gr.Progress`) to `research_agent`
|
| 34 |
+
- Visual feedback is now continuous throughout the entire lifecycle
|
| 35 |
|
| 36 |
---
|
| 37 |
|
docs/bugs/P2_ADVANCED_MODE_COLD_START_NO_FEEDBACK.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
**Priority**: P2 (UX Friction)
|
| 4 |
**Component**: `src/orchestrators/advanced.py`
|
| 5 |
-
**Status**:
|
| 6 |
**Issue**: [#108](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/108)
|
| 7 |
**Created**: 2025-12-01
|
| 8 |
|
|
@@ -199,9 +199,9 @@ with gr.Blocks() as demo:
|
|
| 199 |
|
| 200 |
## Recommended Approach
|
| 201 |
|
| 202 |
-
**Phase 1 (Quick Win)**: Option A - Add granular events β
COMPLETE
|
| 203 |
-
**Phase 2 (Performance)**: Option C - Pre-warm services at startup
|
| 204 |
-
**Phase 3 (Polish)**: Option D - Gradio progress bar
|
| 205 |
|
| 206 |
## Related Considerations
|
| 207 |
|
|
|
|
| 2 |
|
| 3 |
**Priority**: P2 (UX Friction)
|
| 4 |
**Component**: `src/orchestrators/advanced.py`
|
| 5 |
+
**Status**: β
FIXED (All Phases Complete)
|
| 6 |
**Issue**: [#108](https://github.com/The-Obstacle-Is-The-Way/DeepBoner/issues/108)
|
| 7 |
**Created**: 2025-12-01
|
| 8 |
|
|
|
|
| 199 |
|
| 200 |
## Recommended Approach
|
| 201 |
|
| 202 |
+
**Phase 1 (Quick Win)**: Option A - Add granular events β
COMPLETE
|
| 203 |
+
**Phase 2 (Performance)**: Option C - Pre-warm services at startup β
COMPLETE
|
| 204 |
+
**Phase 3 (Polish)**: Option D - Gradio progress bar β
COMPLETE
|
| 205 |
|
| 206 |
## Related Considerations
|
| 207 |
|
src/app.py
CHANGED
|
@@ -21,6 +21,7 @@ from src.tools.search_handler import SearchHandler
|
|
| 21 |
from src.utils.config import settings
|
| 22 |
from src.utils.exceptions import ConfigurationError
|
| 23 |
from src.utils.models import OrchestratorConfig
|
|
|
|
| 24 |
|
| 25 |
OrchestratorMode = Literal["simple", "magentic", "advanced", "hierarchical"]
|
| 26 |
|
|
@@ -137,6 +138,38 @@ def configure_orchestrator(
|
|
| 137 |
return orchestrator, backend_info
|
| 138 |
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
async def research_agent(
|
| 141 |
message: str,
|
| 142 |
history: list[dict[str, Any]],
|
|
@@ -144,6 +177,7 @@ async def research_agent(
|
|
| 144 |
domain: str = "sexual_health",
|
| 145 |
api_key: str = "",
|
| 146 |
api_key_state: str = "",
|
|
|
|
| 147 |
) -> AsyncGenerator[str, None]:
|
| 148 |
"""
|
| 149 |
Gradio chat function that runs the research agent.
|
|
@@ -155,6 +189,7 @@ async def research_agent(
|
|
| 155 |
domain: Research domain
|
| 156 |
api_key: Optional user-provided API key (BYOK - auto-detects provider)
|
| 157 |
api_key_state: Persistent API key state (survives example clicks)
|
|
|
|
| 158 |
|
| 159 |
Yields:
|
| 160 |
Markdown-formatted responses for streaming
|
|
@@ -164,38 +199,19 @@ async def research_agent(
|
|
| 164 |
return
|
| 165 |
|
| 166 |
# BUG FIX: Handle None values from Gradio example caching
|
| 167 |
-
# Gradio passes None for missing example columns, overriding defaults
|
| 168 |
-
api_key_str = api_key or ""
|
| 169 |
-
api_key_state_str = api_key_state or ""
|
| 170 |
domain_str = domain or "sexual_health"
|
| 171 |
|
| 172 |
-
# Validate
|
| 173 |
-
|
| 174 |
-
mode_validated: OrchestratorMode = mode if mode in valid_modes else "simple" # type: ignore[assignment]
|
| 175 |
|
| 176 |
-
#
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
# Check available keys
|
| 180 |
-
has_openai = settings.has_openai_key
|
| 181 |
-
has_anthropic = settings.has_anthropic_key
|
| 182 |
-
# Check for OpenAI user key
|
| 183 |
-
is_openai_user_key = (
|
| 184 |
-
user_api_key and user_api_key.startswith("sk-") and not user_api_key.startswith("sk-ant-")
|
| 185 |
-
)
|
| 186 |
-
has_paid_key = has_openai or has_anthropic or bool(user_api_key)
|
| 187 |
-
|
| 188 |
-
# Advanced mode requires OpenAI specifically (due to agent-framework binding)
|
| 189 |
-
if mode_validated == "advanced" and not (has_openai or is_openai_user_key):
|
| 190 |
yield (
|
| 191 |
"β οΈ **Warning**: Advanced mode currently requires OpenAI API key. "
|
| 192 |
"Anthropic keys only work in Simple mode. Falling back to Simple.\n\n"
|
| 193 |
)
|
| 194 |
-
mode_validated = "simple"
|
| 195 |
|
| 196 |
-
# Inform user about fallback if no keys
|
| 197 |
if not has_paid_key:
|
| 198 |
-
# No paid keys - will use FREE HuggingFace Inference
|
| 199 |
yield (
|
| 200 |
"π€ **Free Tier**: Using HuggingFace Inference (Llama 3.1 / Mistral) for AI analysis.\n"
|
| 201 |
"For premium models, enter an OpenAI or Anthropic API key below.\n\n"
|
|
@@ -207,9 +223,8 @@ async def research_agent(
|
|
| 207 |
|
| 208 |
try:
|
| 209 |
# use_mock=False - let configure_orchestrator decide based on available keys
|
| 210 |
-
# It will use: Paid API > HF Inference (free tier)
|
| 211 |
orchestrator, backend_name = configure_orchestrator(
|
| 212 |
-
use_mock=False,
|
| 213 |
mode=mode_validated,
|
| 214 |
user_api_key=user_api_key,
|
| 215 |
domain=domain_str,
|
|
@@ -224,6 +239,28 @@ async def research_agent(
|
|
| 224 |
)
|
| 225 |
|
| 226 |
async for event in orchestrator.run(message):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
# BUG FIX: Handle streaming events separately to avoid token-by-token spam
|
| 228 |
if event.type == "streaming":
|
| 229 |
# Accumulate streaming tokens without emitting individual events
|
|
@@ -349,6 +386,7 @@ def create_demo() -> tuple[gr.ChatInterface, gr.Accordion]:
|
|
| 349 |
|
| 350 |
def main() -> None:
|
| 351 |
"""Run the Gradio app with MCP server enabled."""
|
|
|
|
| 352 |
demo, _ = create_demo()
|
| 353 |
demo.launch(
|
| 354 |
server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"), # nosec B104
|
|
|
|
| 21 |
from src.utils.config import settings
|
| 22 |
from src.utils.exceptions import ConfigurationError
|
| 23 |
from src.utils.models import OrchestratorConfig
|
| 24 |
+
from src.utils.service_loader import warmup_services
|
| 25 |
|
| 26 |
OrchestratorMode = Literal["simple", "magentic", "advanced", "hierarchical"]
|
| 27 |
|
|
|
|
| 138 |
return orchestrator, backend_info
|
| 139 |
|
| 140 |
|
| 141 |
+
def _validate_inputs(
|
| 142 |
+
mode: str,
|
| 143 |
+
api_key: str | None,
|
| 144 |
+
api_key_state: str | None,
|
| 145 |
+
) -> tuple[OrchestratorMode, str | None, bool]:
|
| 146 |
+
"""Validate inputs and determine mode/key status.
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
Tuple of (validated_mode, effective_user_key, has_paid_key)
|
| 150 |
+
"""
|
| 151 |
+
# Validate mode
|
| 152 |
+
valid_modes: set[str] = {"simple", "magentic", "advanced", "hierarchical"}
|
| 153 |
+
mode_validated: OrchestratorMode = mode if mode in valid_modes else "simple" # type: ignore[assignment]
|
| 154 |
+
|
| 155 |
+
# Determine effective key
|
| 156 |
+
user_api_key = (api_key or api_key_state or "").strip() or None
|
| 157 |
+
|
| 158 |
+
# Check available keys
|
| 159 |
+
has_openai = settings.has_openai_key
|
| 160 |
+
has_anthropic = settings.has_anthropic_key
|
| 161 |
+
is_openai_user_key = (
|
| 162 |
+
user_api_key and user_api_key.startswith("sk-") and not user_api_key.startswith("sk-ant-")
|
| 163 |
+
)
|
| 164 |
+
has_paid_key = has_openai or has_anthropic or bool(user_api_key)
|
| 165 |
+
|
| 166 |
+
# Fallback logic for Advanced mode
|
| 167 |
+
if mode_validated == "advanced" and not (has_openai or is_openai_user_key):
|
| 168 |
+
mode_validated = "simple"
|
| 169 |
+
|
| 170 |
+
return mode_validated, user_api_key, has_paid_key
|
| 171 |
+
|
| 172 |
+
|
| 173 |
async def research_agent(
|
| 174 |
message: str,
|
| 175 |
history: list[dict[str, Any]],
|
|
|
|
| 177 |
domain: str = "sexual_health",
|
| 178 |
api_key: str = "",
|
| 179 |
api_key_state: str = "",
|
| 180 |
+
progress: gr.Progress = gr.Progress(), # noqa: B008
|
| 181 |
) -> AsyncGenerator[str, None]:
|
| 182 |
"""
|
| 183 |
Gradio chat function that runs the research agent.
|
|
|
|
| 189 |
domain: Research domain
|
| 190 |
api_key: Optional user-provided API key (BYOK - auto-detects provider)
|
| 191 |
api_key_state: Persistent API key state (survives example clicks)
|
| 192 |
+
progress: Gradio progress tracker
|
| 193 |
|
| 194 |
Yields:
|
| 195 |
Markdown-formatted responses for streaming
|
|
|
|
| 199 |
return
|
| 200 |
|
| 201 |
# BUG FIX: Handle None values from Gradio example caching
|
|
|
|
|
|
|
|
|
|
| 202 |
domain_str = domain or "sexual_health"
|
| 203 |
|
| 204 |
+
# Validate inputs using helper to reduce complexity
|
| 205 |
+
mode_validated, user_api_key, has_paid_key = _validate_inputs(mode, api_key, api_key_state)
|
|
|
|
| 206 |
|
| 207 |
+
# Inform user about fallback/tier status
|
| 208 |
+
if mode == "advanced" and mode_validated == "simple":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
yield (
|
| 210 |
"β οΈ **Warning**: Advanced mode currently requires OpenAI API key. "
|
| 211 |
"Anthropic keys only work in Simple mode. Falling back to Simple.\n\n"
|
| 212 |
)
|
|
|
|
| 213 |
|
|
|
|
| 214 |
if not has_paid_key:
|
|
|
|
| 215 |
yield (
|
| 216 |
"π€ **Free Tier**: Using HuggingFace Inference (Llama 3.1 / Mistral) for AI analysis.\n"
|
| 217 |
"For premium models, enter an OpenAI or Anthropic API key below.\n\n"
|
|
|
|
| 223 |
|
| 224 |
try:
|
| 225 |
# use_mock=False - let configure_orchestrator decide based on available keys
|
|
|
|
| 226 |
orchestrator, backend_name = configure_orchestrator(
|
| 227 |
+
use_mock=False,
|
| 228 |
mode=mode_validated,
|
| 229 |
user_api_key=user_api_key,
|
| 230 |
domain=domain_str,
|
|
|
|
| 239 |
)
|
| 240 |
|
| 241 |
async for event in orchestrator.run(message):
|
| 242 |
+
# Update progress bar
|
| 243 |
+
if event.type == "started":
|
| 244 |
+
progress(0, desc="Starting research...")
|
| 245 |
+
elif event.type == "thinking":
|
| 246 |
+
progress(0.1, desc="Multi-agent reasoning...")
|
| 247 |
+
elif event.type == "progress":
|
| 248 |
+
# Try to calculate percentage based on max rounds/iterations
|
| 249 |
+
p = None
|
| 250 |
+
max_iters = 10 # default
|
| 251 |
+
if hasattr(orchestrator, "_max_rounds"):
|
| 252 |
+
max_iters = orchestrator._max_rounds
|
| 253 |
+
elif hasattr(orchestrator, "config") and hasattr(
|
| 254 |
+
orchestrator.config, "max_iterations"
|
| 255 |
+
):
|
| 256 |
+
max_iters = orchestrator.config.max_iterations
|
| 257 |
+
|
| 258 |
+
if event.iteration:
|
| 259 |
+
# Map 0..max to 0.2..0.9
|
| 260 |
+
p = 0.2 + (0.7 * (min(event.iteration, max_iters) / max_iters))
|
| 261 |
+
|
| 262 |
+
progress(p, desc=event.message)
|
| 263 |
+
|
| 264 |
# BUG FIX: Handle streaming events separately to avoid token-by-token spam
|
| 265 |
if event.type == "streaming":
|
| 266 |
# Accumulate streaming tokens without emitting individual events
|
|
|
|
| 386 |
|
| 387 |
def main() -> None:
|
| 388 |
"""Run the Gradio app with MCP server enabled."""
|
| 389 |
+
warmup_services() # Phase 2: Pre-warm services
|
| 390 |
demo, _ = create_demo()
|
| 391 |
demo.launch(
|
| 392 |
server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"), # nosec B104
|
src/utils/service_loader.py
CHANGED
|
@@ -9,6 +9,7 @@ Design Patterns:
|
|
| 9 |
- Strategy Pattern: Selects between EmbeddingService and LlamaIndexRAGService
|
| 10 |
"""
|
| 11 |
|
|
|
|
| 12 |
from typing import TYPE_CHECKING
|
| 13 |
|
| 14 |
import structlog
|
|
@@ -22,6 +23,28 @@ if TYPE_CHECKING:
|
|
| 22 |
logger = structlog.get_logger()
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def get_embedding_service() -> "EmbeddingServiceProtocol":
|
| 26 |
"""Get the best available embedding service.
|
| 27 |
|
|
|
|
| 9 |
- Strategy Pattern: Selects between EmbeddingService and LlamaIndexRAGService
|
| 10 |
"""
|
| 11 |
|
| 12 |
+
import threading
|
| 13 |
from typing import TYPE_CHECKING
|
| 14 |
|
| 15 |
import structlog
|
|
|
|
| 23 |
logger = structlog.get_logger()
|
| 24 |
|
| 25 |
|
| 26 |
+
def warmup_services() -> None:
|
| 27 |
+
"""Pre-warm expensive services in a background thread.
|
| 28 |
+
|
| 29 |
+
This reduces the "cold start" latency for the first user request by
|
| 30 |
+
loading heavy models (like SentenceTransformer or LlamaIndex) into memory
|
| 31 |
+
during application startup.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def _warmup() -> None:
|
| 35 |
+
logger.info("π₯ Warmup: Starting background service initialization...")
|
| 36 |
+
try:
|
| 37 |
+
# Trigger model loading (cached globally)
|
| 38 |
+
get_embedding_service_if_available()
|
| 39 |
+
logger.info("π₯ Warmup: Embedding service ready")
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logger.warning("π₯ Warmup: Failed to warm up services", error=str(e))
|
| 42 |
+
|
| 43 |
+
# Run in daemon thread so it doesn't block shutdown
|
| 44 |
+
thread = threading.Thread(target=_warmup, daemon=True)
|
| 45 |
+
thread.start()
|
| 46 |
+
|
| 47 |
+
|
| 48 |
def get_embedding_service() -> "EmbeddingServiceProtocol":
|
| 49 |
"""Get the best available embedding service.
|
| 50 |
|