AjinkyaPagare's picture
fix: python3.10, disable flash-attn, reduce memory for HF Spaces CPU
043a495
"""
Fast Path Optimizer — identifies and executes optimized execution paths.
Analyzes goals to determine if they can be handled via a "fast path"
that bypasses the full DAG mesh. Fast paths are pre-compiled execution
strategies for common goal types.
Ultra-lightweight: pure Python pattern matching, no ML overhead.
"""
import os
from typing import Optional, Callable, Awaitable
class FastPathOptimizer:
"""
Identifies and executes optimized fast paths for common goal types.
Fast paths bypass the full DAG-based execution mesh for simple
goals, reducing latency from seconds to milliseconds.
Pre-compiled fast paths:
- direct_reply: Simple Q&A, no tool use required
- quick_calc: Mathematical calculations
- code_snippet: Generate a code snippet
- definition: Provide a definition or explanation
"""
def __init__(self, llm_call_fn=None):
self._llm = llm_call_fn
self._fast_paths = {
"direct_reply": self._fast_direct,
"quick_calc": self._fast_calc,
}
def detect_fast_path(self, goal: str) -> Optional[str]:
"""
Detect if a goal can be handled via a fast path.
Returns the fast path name or None.
"""
if not goal:
return None
goal_lower = goal.strip().lower()
# Very short queries (under 40 chars, no special chars)
if len(goal_lower) < 40 and not any(c in goal for c in ["\n", "{", "}", "[", "]"]):
return "direct_reply"
# Simple calculations
if any(op in goal_lower for op in ["+", "-", "*", "/", "calculate", "compute"]):
if len(goal_lower) < 80:
return "quick_calc"
# Definition/explanation requests
if goal_lower.startswith(("what is", "define", "what does", "explain")):
if len(goal_lower) < 60:
return "direct_reply"
return None
async def execute_fast_path(self, path_name: str, goal: str) -> Optional[str]:
"""
Execute a fast path and return the result.
Returns None if the fast path fails.
"""
handler = self._fast_paths.get(path_name)
if not handler:
return None
try:
return await handler(goal)
except Exception:
return None
async def _fast_direct(self, goal: str) -> str:
"""Fast path for direct Q&A."""
if self._llm:
return await self._llm(goal, model_hint="fast", max_tokens=500)
return f"Response to: {goal}"
async def _fast_calc(self, goal: str) -> str:
"""Fast path for mathematical calculations."""
# Try to extract and compute
import re
# Remove words, keep math expression
expr = re.sub(r'[^0-9+\-*/().%\s]', '', goal).strip()
if expr:
try:
result = eval(expr, {"__builtins__": {}}, {})
return f"Result: {result}"
except Exception:
pass
# Fallback to LLM
if self._llm:
return await self._llm(f"Calculate: {goal}", model_hint="fast", max_tokens=100)
return f"Cannot calculate: {goal}"