Spaces:
Paused
Paused
Mirrowel commited on
Commit ·
19af906
1
Parent(s): 760ee6b
refactor(core): review fixes + improve error handling and execution logic
Browse filesReorganized request handling in proxy_app/main.py for better logging and error management.
Updated launcher.bat to streamline execution modes and remove redundant code.
Enhanced condition checks in gemini_cli_provider.py for more reliable processing.
Improved cost calculation in usage_manager.py for embeddings.
launcher.bat
CHANGED
|
@@ -182,8 +182,13 @@ if "%LOGGING%"=="true" (
|
|
| 182 |
echo Starting Proxy...
|
| 183 |
echo Arguments: %ARGS%
|
| 184 |
echo.
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
:AddCredentials
|
| 189 |
cls
|
|
@@ -206,18 +211,9 @@ echo ==================================================
|
|
| 206 |
echo.
|
| 207 |
echo The build process will start in a new window.
|
| 208 |
start "Build Process" cmd /c "pip install -r requirements.txt && pip install pyinstaller && python src/proxy_app/build.py && echo Build finished. && pause"
|
| 209 |
-
|
| 210 |
|
| 211 |
:: --- Helper Functions ---
|
| 212 |
-
:Execute
|
| 213 |
-
set "COMMAND=%~1"
|
| 214 |
-
set "ARGS=%~2"
|
| 215 |
-
if "%EXECUTION_MODE%"=="exe" (
|
| 216 |
-
start "LLM API Proxy" %EXE_NAME% %COMMAND% %ARGS%
|
| 217 |
-
) else (
|
| 218 |
-
set "PYTHONPATH=%~dp0src;%PYTHONPATH%"
|
| 219 |
-
start "LLM API Proxy" python %SOURCE_PATH% %COMMAND% %ARGS%
|
| 220 |
-
)
|
| 221 |
|
| 222 |
:SelectModeMenu
|
| 223 |
cls
|
|
|
|
| 182 |
echo Starting Proxy...
|
| 183 |
echo Arguments: %ARGS%
|
| 184 |
echo.
|
| 185 |
+
if "%EXECUTION_MODE%"=="exe" (
|
| 186 |
+
start "LLM API Proxy" %EXE_NAME% %ARGS%
|
| 187 |
+
) else (
|
| 188 |
+
set "PYTHONPATH=%~dp0src;%PYTHONPATH%"
|
| 189 |
+
start "LLM API Proxy" python %SOURCE_PATH% %ARGS%
|
| 190 |
+
)
|
| 191 |
+
exit /b 0
|
| 192 |
|
| 193 |
:AddCredentials
|
| 194 |
cls
|
|
|
|
| 211 |
echo.
|
| 212 |
echo The build process will start in a new window.
|
| 213 |
start "Build Process" cmd /c "pip install -r requirements.txt && pip install pyinstaller && python src/proxy_app/build.py && echo Build finished. && pause"
|
| 214 |
+
exit /b
|
| 215 |
|
| 216 |
:: --- Helper Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
:SelectModeMenu
|
| 219 |
cls
|
src/proxy_app/main.py
CHANGED
|
@@ -130,6 +130,8 @@ load_dotenv()
|
|
| 130 |
# --- Configuration ---
|
| 131 |
USE_EMBEDDING_BATCHER = False
|
| 132 |
ENABLE_REQUEST_LOGGING = args.enable_request_logging
|
|
|
|
|
|
|
| 133 |
PROXY_API_KEY = os.getenv("PROXY_API_KEY")
|
| 134 |
if not PROXY_API_KEY:
|
| 135 |
raise ValueError("PROXY_API_KEY environment variable not set.")
|
|
@@ -489,31 +491,29 @@ async def chat_completions(
|
|
| 489 |
OpenAI-compatible endpoint powered by the RotatingClient.
|
| 490 |
Handles both streaming and non-streaming responses and logs them.
|
| 491 |
"""
|
| 492 |
-
if ENABLE_REQUEST_LOGGING:
|
| 493 |
-
# Preserve and re-use the request body so downstream code can still call request.json()
|
| 494 |
-
raw_body = await request.body()
|
| 495 |
-
try:
|
| 496 |
-
parsed_body = json.loads(raw_body.decode("utf-8")) if raw_body else {}
|
| 497 |
-
except Exception:
|
| 498 |
-
parsed_body = {}
|
| 499 |
-
# Reattach the raw body for later reads
|
| 500 |
-
request._body = raw_body
|
| 501 |
-
|
| 502 |
-
# Extract the fields we want to log (supporting possible nesting in generationConfig)
|
| 503 |
-
model = parsed_body.get("model")
|
| 504 |
-
generation_cfg = parsed_body.get("generationConfig", {}) or parsed_body.get("generation_config", {}) or {}
|
| 505 |
-
reasoning_effort = parsed_body.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
|
| 506 |
-
custom_reasoning_budget = parsed_body.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
|
| 507 |
-
|
| 508 |
-
logging.getLogger("rotator_library").info(
|
| 509 |
-
f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
|
| 510 |
-
)
|
| 511 |
logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
|
| 512 |
try:
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
if logger:
|
| 515 |
logger.log_request(headers=request.headers, body=request_data)
|
| 516 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
log_request_to_console(
|
| 518 |
url=str(request.url),
|
| 519 |
headers=dict(request.headers),
|
|
|
|
| 130 |
# --- Configuration ---
|
| 131 |
USE_EMBEDDING_BATCHER = False
|
| 132 |
ENABLE_REQUEST_LOGGING = args.enable_request_logging
|
| 133 |
+
if ENABLE_REQUEST_LOGGING:
|
| 134 |
+
logging.info("Request logging is enabled.")
|
| 135 |
PROXY_API_KEY = os.getenv("PROXY_API_KEY")
|
| 136 |
if not PROXY_API_KEY:
|
| 137 |
raise ValueError("PROXY_API_KEY environment variable not set.")
|
|
|
|
| 491 |
OpenAI-compatible endpoint powered by the RotatingClient.
|
| 492 |
Handles both streaming and non-streaming responses and logs them.
|
| 493 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
|
| 495 |
try:
|
| 496 |
+
# Read and parse the request body only once at the beginning.
|
| 497 |
+
try:
|
| 498 |
+
request_data = await request.json()
|
| 499 |
+
except json.JSONDecodeError:
|
| 500 |
+
raise HTTPException(status_code=400, detail="Invalid JSON in request body.")
|
| 501 |
+
|
| 502 |
+
# If logging is enabled, perform all logging operations using the parsed data.
|
| 503 |
if logger:
|
| 504 |
logger.log_request(headers=request.headers, body=request_data)
|
| 505 |
|
| 506 |
+
# Extract and log specific reasoning parameters for monitoring.
|
| 507 |
+
model = request_data.get("model")
|
| 508 |
+
generation_cfg = request_data.get("generationConfig", {}) or request_data.get("generation_config", {}) or {}
|
| 509 |
+
reasoning_effort = request_data.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
|
| 510 |
+
custom_reasoning_budget = request_data.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
|
| 511 |
+
|
| 512 |
+
logging.getLogger("rotator_library").info(
|
| 513 |
+
f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
# Log basic request info to console (this is a separate, simpler logger).
|
| 517 |
log_request_to_console(
|
| 518 |
url=str(request.url),
|
| 519 |
headers=dict(request.headers),
|
src/rotator_library/providers/gemini_cli_provider.py
CHANGED
|
@@ -231,9 +231,7 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 231 |
if parts:
|
| 232 |
gemini_contents.append({"role": gemini_role, "parts": parts})
|
| 233 |
|
| 234 |
-
if not
|
| 235 |
-
gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
|
| 236 |
-
elif gemini_contents and gemini_contents[0]["role"] == "model":
|
| 237 |
gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
|
| 238 |
|
| 239 |
return system_instruction, gemini_contents
|
|
@@ -303,8 +301,9 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 303 |
}
|
| 304 |
}]
|
| 305 |
elif 'text' in part:
|
| 306 |
-
# Use
|
| 307 |
-
|
|
|
|
| 308 |
delta['reasoning_content'] = part['text']
|
| 309 |
else:
|
| 310 |
delta['content'] = part['text']
|
|
|
|
| 231 |
if parts:
|
| 232 |
gemini_contents.append({"role": gemini_role, "parts": parts})
|
| 233 |
|
| 234 |
+
if not gemini_contents or gemini_contents[0]['role'] != 'user':
|
|
|
|
|
|
|
| 235 |
gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
|
| 236 |
|
| 237 |
return system_instruction, gemini_contents
|
|
|
|
| 301 |
}
|
| 302 |
}]
|
| 303 |
elif 'text' in part:
|
| 304 |
+
# Use an explicit check for the 'thought' flag, as its type can be inconsistent
|
| 305 |
+
thought = part.get('thought')
|
| 306 |
+
if thought is True or (isinstance(thought, str) and thought.lower() == 'true'):
|
| 307 |
delta['reasoning_content'] = part['text']
|
| 308 |
else:
|
| 309 |
delta['content'] = part['text']
|
src/rotator_library/usage_manager.py
CHANGED
|
@@ -268,7 +268,13 @@ class UsageManager:
|
|
| 268 |
else:
|
| 269 |
# Differentiate cost calculation based on response type
|
| 270 |
if isinstance(completion_response, litellm.EmbeddingResponse):
|
| 271 |
-
cost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
else:
|
| 273 |
cost = litellm.completion_cost(completion_response=completion_response, model=model)
|
| 274 |
|
|
@@ -276,7 +282,7 @@ class UsageManager:
|
|
| 276 |
daily_model_data["approx_cost"] += cost
|
| 277 |
except Exception as e:
|
| 278 |
lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
|
| 279 |
-
elif
|
| 280 |
# This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
|
| 281 |
pass
|
| 282 |
else:
|
|
|
|
| 268 |
else:
|
| 269 |
# Differentiate cost calculation based on response type
|
| 270 |
if isinstance(completion_response, litellm.EmbeddingResponse):
|
| 271 |
+
# Manually calculate cost for embeddings
|
| 272 |
+
model_info = litellm.get_model_info(model)
|
| 273 |
+
input_cost = model_info.get("input_cost_per_token")
|
| 274 |
+
if input_cost:
|
| 275 |
+
cost = completion_response.usage.prompt_tokens * input_cost
|
| 276 |
+
else:
|
| 277 |
+
cost = None
|
| 278 |
else:
|
| 279 |
cost = litellm.completion_cost(completion_response=completion_response, model=model)
|
| 280 |
|
|
|
|
| 282 |
daily_model_data["approx_cost"] += cost
|
| 283 |
except Exception as e:
|
| 284 |
lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
|
| 285 |
+
elif isinstance(completion_response, asyncio.Future) or hasattr(completion_response, '__aiter__'):
|
| 286 |
# This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
|
| 287 |
pass
|
| 288 |
else:
|