Spaces:
Running
Running
File size: 4,735 Bytes
0157ac7 ebba9d6 0157ac7 ebba9d6 0157ac7 ebba9d6 0157ac7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | """Optimization handlers for fast-path API responses.
Each handler returns a MessagesResponse if the request matches and the
optimization is enabled, otherwise None.
"""
import uuid
from loguru import logger
from config.settings import Settings
from .command_utils import extract_command_prefix, extract_filepaths_from_command
from .detection import (
is_filepath_extraction_request,
is_prefix_detection_request,
is_quota_check_request,
is_suggestion_mode_request,
is_title_generation_request,
is_trivial_text_request,
)
from .models.anthropic import MessagesRequest
from .models.responses import MessagesResponse, Usage
def _text_response(
request_data: MessagesRequest,
text: str,
*,
input_tokens: int,
output_tokens: int,
) -> MessagesResponse:
return MessagesResponse(
id=f"msg_{uuid.uuid4()}",
model=request_data.model,
content=[{"type": "text", "text": text}],
stop_reason="end_turn",
usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
)
def try_prefix_detection(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
"""Fast prefix detection - return command prefix without API call."""
if not settings.fast_prefix_detection:
return None
is_prefix_req, command = is_prefix_detection_request(request_data)
if not is_prefix_req:
return None
logger.info("Optimization: Fast prefix detection request")
return _text_response(
request_data,
extract_command_prefix(command),
input_tokens=100,
output_tokens=5,
)
def try_quota_mock(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
"""Mock quota probe requests."""
if not settings.enable_network_probe_mock:
return None
if not is_quota_check_request(request_data):
return None
logger.info("Optimization: Intercepted and mocked quota probe")
return _text_response(
request_data,
"Quota check passed.",
input_tokens=10,
output_tokens=5,
)
def try_title_skip(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
"""Skip title generation requests."""
if not settings.enable_title_generation_skip:
return None
if not is_title_generation_request(request_data):
return None
logger.info("Optimization: Skipped title generation request")
return _text_response(
request_data,
"Conversation",
input_tokens=100,
output_tokens=5,
)
def try_suggestion_skip(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
"""Skip suggestion mode requests."""
if not settings.enable_suggestion_mode_skip:
return None
if not is_suggestion_mode_request(request_data):
return None
logger.info("Optimization: Skipped suggestion mode request")
return _text_response(
request_data,
"",
input_tokens=100,
output_tokens=1,
)
def try_filepath_mock(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
"""Mock filepath extraction requests."""
if not settings.enable_filepath_extraction_mock:
return None
is_fp, cmd, output = is_filepath_extraction_request(request_data)
if not is_fp:
return None
filepaths = extract_filepaths_from_command(cmd, output)
logger.info("Optimization: Mocked filepath extraction")
return _text_response(
request_data,
filepaths,
input_tokens=100,
output_tokens=10,
)
def try_trivial_text(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
"""Fast-path trivial text requests (hi, ok, status checks) without API call."""
is_trivial, text = is_trivial_text_request(request_data)
if not is_trivial:
return None
logger.info("Optimization: Fast-path trivial text request")
return _text_response(
request_data,
text,
input_tokens=5,
output_tokens=3,
)
# Cheapest/most common optimizations first for faster short-circuit.
OPTIMIZATION_HANDLERS = [
try_trivial_text,
try_quota_mock,
try_prefix_detection,
try_title_skip,
try_suggestion_skip,
try_filepath_mock,
]
def try_optimizations(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
"""Run optimization handlers in order. Returns first match or None."""
for handler in OPTIMIZATION_HANDLERS:
result = handler(request_data, settings)
if result is not None:
return result
return None
|