Mirrowel commited on
Commit
19af906
·
1 Parent(s): 760ee6b

refactor(core): review fixes + improve error handling and execution logic

Browse files

Reorganized request handling in proxy_app/main.py for better logging and error management.
Updated launcher.bat to streamline execution modes and remove redundant code.
Enhanced condition checks in gemini_cli_provider.py for more reliable processing.
Improved cost calculation in usage_manager.py for embeddings.

launcher.bat CHANGED
@@ -182,8 +182,13 @@ if "%LOGGING%"=="true" (
182
  echo Starting Proxy...
183
  echo Arguments: %ARGS%
184
  echo.
185
- call :Execute "" "%ARGS%"
186
- goto :eof
 
 
 
 
 
187
 
188
  :AddCredentials
189
  cls
@@ -206,18 +211,9 @@ echo ==================================================
206
  echo.
207
  echo The build process will start in a new window.
208
  start "Build Process" cmd /c "pip install -r requirements.txt && pip install pyinstaller && python src/proxy_app/build.py && echo Build finished. && pause"
209
- goto :eof
210
 
211
  :: --- Helper Functions ---
212
- :Execute
213
- set "COMMAND=%~1"
214
- set "ARGS=%~2"
215
- if "%EXECUTION_MODE%"=="exe" (
216
- start "LLM API Proxy" %EXE_NAME% %COMMAND% %ARGS%
217
- ) else (
218
- set "PYTHONPATH=%~dp0src;%PYTHONPATH%"
219
- start "LLM API Proxy" python %SOURCE_PATH% %COMMAND% %ARGS%
220
- )
221
 
222
  :SelectModeMenu
223
  cls
 
182
  echo Starting Proxy...
183
  echo Arguments: %ARGS%
184
  echo.
185
+ if "%EXECUTION_MODE%"=="exe" (
186
+ start "LLM API Proxy" %EXE_NAME% %ARGS%
187
+ ) else (
188
+ set "PYTHONPATH=%~dp0src;%PYTHONPATH%"
189
+ start "LLM API Proxy" python %SOURCE_PATH% %ARGS%
190
+ )
191
+ exit /b 0
192
 
193
  :AddCredentials
194
  cls
 
211
  echo.
212
  echo The build process will start in a new window.
213
  start "Build Process" cmd /c "pip install -r requirements.txt && pip install pyinstaller && python src/proxy_app/build.py && echo Build finished. && pause"
214
+ exit /b
215
 
216
  :: --- Helper Functions ---
 
 
 
 
 
 
 
 
 
217
 
218
  :SelectModeMenu
219
  cls
src/proxy_app/main.py CHANGED
@@ -130,6 +130,8 @@ load_dotenv()
130
  # --- Configuration ---
131
  USE_EMBEDDING_BATCHER = False
132
  ENABLE_REQUEST_LOGGING = args.enable_request_logging
 
 
133
  PROXY_API_KEY = os.getenv("PROXY_API_KEY")
134
  if not PROXY_API_KEY:
135
  raise ValueError("PROXY_API_KEY environment variable not set.")
@@ -489,31 +491,29 @@ async def chat_completions(
489
  OpenAI-compatible endpoint powered by the RotatingClient.
490
  Handles both streaming and non-streaming responses and logs them.
491
  """
492
- if ENABLE_REQUEST_LOGGING:
493
- # Preserve and re-use the request body so downstream code can still call request.json()
494
- raw_body = await request.body()
495
- try:
496
- parsed_body = json.loads(raw_body.decode("utf-8")) if raw_body else {}
497
- except Exception:
498
- parsed_body = {}
499
- # Reattach the raw body for later reads
500
- request._body = raw_body
501
-
502
- # Extract the fields we want to log (supporting possible nesting in generationConfig)
503
- model = parsed_body.get("model")
504
- generation_cfg = parsed_body.get("generationConfig", {}) or parsed_body.get("generation_config", {}) or {}
505
- reasoning_effort = parsed_body.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
506
- custom_reasoning_budget = parsed_body.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
507
-
508
- logging.getLogger("rotator_library").info(
509
- f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
510
- )
511
  logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
512
  try:
513
- request_data = await request.json()
 
 
 
 
 
 
514
  if logger:
515
  logger.log_request(headers=request.headers, body=request_data)
516
 
 
 
 
 
 
 
 
 
 
 
 
517
  log_request_to_console(
518
  url=str(request.url),
519
  headers=dict(request.headers),
 
130
  # --- Configuration ---
131
  USE_EMBEDDING_BATCHER = False
132
  ENABLE_REQUEST_LOGGING = args.enable_request_logging
133
+ if ENABLE_REQUEST_LOGGING:
134
+ logging.info("Request logging is enabled.")
135
  PROXY_API_KEY = os.getenv("PROXY_API_KEY")
136
  if not PROXY_API_KEY:
137
  raise ValueError("PROXY_API_KEY environment variable not set.")
 
491
  OpenAI-compatible endpoint powered by the RotatingClient.
492
  Handles both streaming and non-streaming responses and logs them.
493
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
495
  try:
496
+ # Read and parse the request body only once at the beginning.
497
+ try:
498
+ request_data = await request.json()
499
+ except json.JSONDecodeError:
500
+ raise HTTPException(status_code=400, detail="Invalid JSON in request body.")
501
+
502
+ # If logging is enabled, perform all logging operations using the parsed data.
503
  if logger:
504
  logger.log_request(headers=request.headers, body=request_data)
505
 
506
+ # Extract and log specific reasoning parameters for monitoring.
507
+ model = request_data.get("model")
508
+ generation_cfg = request_data.get("generationConfig", {}) or request_data.get("generation_config", {}) or {}
509
+ reasoning_effort = request_data.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
510
+ custom_reasoning_budget = request_data.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
511
+
512
+ logging.getLogger("rotator_library").info(
513
+ f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
514
+ )
515
+
516
+ # Log basic request info to console (this is a separate, simpler logger).
517
  log_request_to_console(
518
  url=str(request.url),
519
  headers=dict(request.headers),
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -231,9 +231,7 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
231
  if parts:
232
  gemini_contents.append({"role": gemini_role, "parts": parts})
233
 
234
- if not any(c['role'] == 'user' for c in gemini_contents):
235
- gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
236
- elif gemini_contents and gemini_contents[0]["role"] == "model":
237
  gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
238
 
239
  return system_instruction, gemini_contents
@@ -303,8 +301,9 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
303
  }
304
  }]
305
  elif 'text' in part:
306
- # Use a lenient check for the 'thought' flag, as its type can be inconsistent
307
- if str(part.get('thought')).lower() == 'true':
 
308
  delta['reasoning_content'] = part['text']
309
  else:
310
  delta['content'] = part['text']
 
231
  if parts:
232
  gemini_contents.append({"role": gemini_role, "parts": parts})
233
 
234
+ if not gemini_contents or gemini_contents[0]['role'] != 'user':
 
 
235
  gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
236
 
237
  return system_instruction, gemini_contents
 
301
  }
302
  }]
303
  elif 'text' in part:
304
+ # Use an explicit check for the 'thought' flag, as its type can be inconsistent
305
+ thought = part.get('thought')
306
+ if thought is True or (isinstance(thought, str) and thought.lower() == 'true'):
307
  delta['reasoning_content'] = part['text']
308
  else:
309
  delta['content'] = part['text']
src/rotator_library/usage_manager.py CHANGED
@@ -268,7 +268,13 @@ class UsageManager:
268
  else:
269
  # Differentiate cost calculation based on response type
270
  if isinstance(completion_response, litellm.EmbeddingResponse):
271
- cost = litellm.embedding_cost(embedding_response=completion_response)
 
 
 
 
 
 
272
  else:
273
  cost = litellm.completion_cost(completion_response=completion_response, model=model)
274
 
@@ -276,7 +282,7 @@ class UsageManager:
276
  daily_model_data["approx_cost"] += cost
277
  except Exception as e:
278
  lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
279
- elif asyncio.iscoroutine(completion_response) or isinstance(completion_response, asyncio.Future) or hasattr(completion_response, '__aiter__'):
280
  # This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
281
  pass
282
  else:
 
268
  else:
269
  # Differentiate cost calculation based on response type
270
  if isinstance(completion_response, litellm.EmbeddingResponse):
271
+ # Manually calculate cost for embeddings
272
+ model_info = litellm.get_model_info(model)
273
+ input_cost = model_info.get("input_cost_per_token")
274
+ if input_cost:
275
+ cost = completion_response.usage.prompt_tokens * input_cost
276
+ else:
277
+ cost = None
278
  else:
279
  cost = litellm.completion_cost(completion_response=completion_response, model=model)
280
 
 
282
  daily_model_data["approx_cost"] += cost
283
  except Exception as e:
284
  lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
285
+ elif isinstance(completion_response, asyncio.Future) or hasattr(completion_response, '__aiter__'):
286
  # This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
287
  pass
288
  else: