File size: 4,735 Bytes
0157ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebba9d6
0157ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebba9d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0157ac7
 
ebba9d6
0157ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""Optimization handlers for fast-path API responses.

Each handler returns a MessagesResponse if the request matches and the
optimization is enabled, otherwise None.
"""

import uuid

from loguru import logger

from config.settings import Settings

from .command_utils import extract_command_prefix, extract_filepaths_from_command
from .detection import (
    is_filepath_extraction_request,
    is_prefix_detection_request,
    is_quota_check_request,
    is_suggestion_mode_request,
    is_title_generation_request,
    is_trivial_text_request,
)
from .models.anthropic import MessagesRequest
from .models.responses import MessagesResponse, Usage


def _text_response(
    request_data: MessagesRequest,
    text: str,
    *,
    input_tokens: int,
    output_tokens: int,
) -> MessagesResponse:
    return MessagesResponse(
        id=f"msg_{uuid.uuid4()}",
        model=request_data.model,
        content=[{"type": "text", "text": text}],
        stop_reason="end_turn",
        usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
    )


def try_prefix_detection(
    request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
    """Fast prefix detection - return command prefix without API call."""
    if not settings.fast_prefix_detection:
        return None

    is_prefix_req, command = is_prefix_detection_request(request_data)
    if not is_prefix_req:
        return None

    logger.info("Optimization: Fast prefix detection request")
    return _text_response(
        request_data,
        extract_command_prefix(command),
        input_tokens=100,
        output_tokens=5,
    )


def try_quota_mock(
    request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
    """Mock quota probe requests."""
    if not settings.enable_network_probe_mock:
        return None
    if not is_quota_check_request(request_data):
        return None

    logger.info("Optimization: Intercepted and mocked quota probe")
    return _text_response(
        request_data,
        "Quota check passed.",
        input_tokens=10,
        output_tokens=5,
    )


def try_title_skip(
    request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
    """Skip title generation requests."""
    if not settings.enable_title_generation_skip:
        return None
    if not is_title_generation_request(request_data):
        return None

    logger.info("Optimization: Skipped title generation request")
    return _text_response(
        request_data,
        "Conversation",
        input_tokens=100,
        output_tokens=5,
    )


def try_suggestion_skip(
    request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
    """Skip suggestion mode requests."""
    if not settings.enable_suggestion_mode_skip:
        return None
    if not is_suggestion_mode_request(request_data):
        return None

    logger.info("Optimization: Skipped suggestion mode request")
    return _text_response(
        request_data,
        "",
        input_tokens=100,
        output_tokens=1,
    )


def try_filepath_mock(
    request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
    """Mock filepath extraction requests."""
    if not settings.enable_filepath_extraction_mock:
        return None

    is_fp, cmd, output = is_filepath_extraction_request(request_data)
    if not is_fp:
        return None

    filepaths = extract_filepaths_from_command(cmd, output)
    logger.info("Optimization: Mocked filepath extraction")
    return _text_response(
        request_data,
        filepaths,
        input_tokens=100,
        output_tokens=10,
    )


def try_trivial_text(
    request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
    """Fast-path trivial text requests (hi, ok, status checks) without API call."""
    is_trivial, text = is_trivial_text_request(request_data)
    if not is_trivial:
        return None

    logger.info("Optimization: Fast-path trivial text request")
    return _text_response(
        request_data,
        text,
        input_tokens=5,
        output_tokens=3,
    )


# Cheapest/most common optimizations first for faster short-circuit.
OPTIMIZATION_HANDLERS = [
    try_trivial_text,
    try_quota_mock,
    try_prefix_detection,
    try_title_skip,
    try_suggestion_skip,
    try_filepath_mock,
]


def try_optimizations(
    request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
    """Run optimization handlers in order. Returns first match or None."""
    for handler in OPTIMIZATION_HANDLERS:
        result = handler(request_data, settings)
        if result is not None:
            return result
    return None