Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Oct 21, 2025

Commit

19af906

1 Parent(s): 760ee6b

refactor(core): review fixes + improve error handling and execution logic

Reorganized request handling in proxy_app/main.py for better logging and error management.
Updated launcher.bat to streamline execution modes and remove redundant code.
Enhanced condition checks in gemini_cli_provider.py for more reliable processing.
Improved cost calculation in usage_manager.py for embeddings.

Files changed (4) hide show

launcher.bat +8 -12
src/proxy_app/main.py +20 -20
src/rotator_library/providers/gemini_cli_provider.py +4 -5
src/rotator_library/usage_manager.py +8 -2

launcher.bat CHANGED Viewed

@@ -182,8 +182,13 @@ if "%LOGGING%"=="true" (
 echo Starting Proxy...
 echo Arguments: %ARGS%
 echo.
-call :Execute "" "%ARGS%"
-goto :eof
 :AddCredentials
 cls
@@ -206,18 +211,9 @@ echo ==================================================
 echo.
 echo The build process will start in a new window.
 start "Build Process" cmd /c "pip install -r requirements.txt && pip install pyinstaller && python src/proxy_app/build.py && echo Build finished. && pause"
-goto :eof
 :: --- Helper Functions ---
-:Execute
-set "COMMAND=%~1"
-set "ARGS=%~2"
-if "%EXECUTION_MODE%"=="exe" (
-    start "LLM API Proxy" %EXE_NAME% %COMMAND% %ARGS%
-) else (
-    set "PYTHONPATH=%~dp0src;%PYTHONPATH%"
-    start "LLM API Proxy" python %SOURCE_PATH% %COMMAND% %ARGS%
-)
 :SelectModeMenu
 cls

 echo Starting Proxy...
 echo Arguments: %ARGS%
 echo.
+if "%EXECUTION_MODE%"=="exe" (
+    start "LLM API Proxy" %EXE_NAME% %ARGS%
+) else (
+    set "PYTHONPATH=%~dp0src;%PYTHONPATH%"
+    start "LLM API Proxy" python %SOURCE_PATH% %ARGS%
+)
+exit /b 0
 :AddCredentials
 cls
 echo.
 echo The build process will start in a new window.
 start "Build Process" cmd /c "pip install -r requirements.txt && pip install pyinstaller && python src/proxy_app/build.py && echo Build finished. && pause"
+exit /b
 :: --- Helper Functions ---
 :SelectModeMenu
 cls

src/proxy_app/main.py CHANGED Viewed

@@ -130,6 +130,8 @@ load_dotenv()
 # --- Configuration ---
 USE_EMBEDDING_BATCHER = False
 ENABLE_REQUEST_LOGGING = args.enable_request_logging
 PROXY_API_KEY = os.getenv("PROXY_API_KEY")
 if not PROXY_API_KEY:
     raise ValueError("PROXY_API_KEY environment variable not set.")
@@ -489,31 +491,29 @@ async def chat_completions(
     OpenAI-compatible endpoint powered by the RotatingClient.
     Handles both streaming and non-streaming responses and logs them.
     """
-    if ENABLE_REQUEST_LOGGING:
-        # Preserve and re-use the request body so downstream code can still call request.json()
-        raw_body = await request.body()
-        try:
-            parsed_body = json.loads(raw_body.decode("utf-8")) if raw_body else {}
-        except Exception:
-            parsed_body = {}
-        # Reattach the raw body for later reads
-        request._body = raw_body
-        # Extract the fields we want to log (supporting possible nesting in generationConfig)
-        model = parsed_body.get("model")
-        generation_cfg = parsed_body.get("generationConfig", {}) or parsed_body.get("generation_config", {}) or {}
-        reasoning_effort = parsed_body.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
-        custom_reasoning_budget = parsed_body.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
-        logging.getLogger("rotator_library").info(
-            f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
-        )
     logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
     try:
-        request_data = await request.json()
         if logger:
             logger.log_request(headers=request.headers, body=request_data)
         log_request_to_console(
             url=str(request.url),
             headers=dict(request.headers),

 # --- Configuration ---
 USE_EMBEDDING_BATCHER = False
 ENABLE_REQUEST_LOGGING = args.enable_request_logging
+if ENABLE_REQUEST_LOGGING:
+    logging.info("Request logging is enabled.")
 PROXY_API_KEY = os.getenv("PROXY_API_KEY")
 if not PROXY_API_KEY:
     raise ValueError("PROXY_API_KEY environment variable not set.")
     OpenAI-compatible endpoint powered by the RotatingClient.
     Handles both streaming and non-streaming responses and logs them.
     """
     logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
     try:
+        # Read and parse the request body only once at the beginning.
+        try:
+            request_data = await request.json()
+        except json.JSONDecodeError:
+            raise HTTPException(status_code=400, detail="Invalid JSON in request body.")
+        # If logging is enabled, perform all logging operations using the parsed data.
         if logger:
             logger.log_request(headers=request.headers, body=request_data)
+            # Extract and log specific reasoning parameters for monitoring.
+            model = request_data.get("model")
+            generation_cfg = request_data.get("generationConfig", {}) or request_data.get("generation_config", {}) or {}
+            reasoning_effort = request_data.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
+            custom_reasoning_budget = request_data.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
+            logging.getLogger("rotator_library").info(
+                f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
+            )
+        # Log basic request info to console (this is a separate, simpler logger).
         log_request_to_console(
             url=str(request.url),
             headers=dict(request.headers),

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

@@ -231,9 +231,7 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
             if parts:
                 gemini_contents.append({"role": gemini_role, "parts": parts})
-        if not any(c['role'] == 'user' for c in gemini_contents):
-             gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
-        elif gemini_contents and gemini_contents[0]["role"] == "model":
             gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
         return system_instruction, gemini_contents
@@ -303,8 +301,9 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
                     }
                 }]
             elif 'text' in part:
-                # Use a lenient check for the 'thought' flag, as its type can be inconsistent
-                if str(part.get('thought')).lower() == 'true':
                     delta['reasoning_content'] = part['text']
                 else:
                     delta['content'] = part['text']

             if parts:
                 gemini_contents.append({"role": gemini_role, "parts": parts})
+        if not gemini_contents or gemini_contents[0]['role'] != 'user':
             gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
         return system_instruction, gemini_contents
                     }
                 }]
             elif 'text' in part:
+                # Use an explicit check for the 'thought' flag, as its type can be inconsistent
+                thought = part.get('thought')
+                if thought is True or (isinstance(thought, str) and thought.lower() == 'true'):
                     delta['reasoning_content'] = part['text']
                 else:
                     delta['content'] = part['text']

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -268,7 +268,13 @@ class UsageManager:
                     else:
                         # Differentiate cost calculation based on response type
                         if isinstance(completion_response, litellm.EmbeddingResponse):
-                            cost = litellm.embedding_cost(embedding_response=completion_response)
                         else:
                             cost = litellm.completion_cost(completion_response=completion_response, model=model)
@@ -276,7 +282,7 @@ class UsageManager:
                             daily_model_data["approx_cost"] += cost
                 except Exception as e:
                     lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
-            elif asyncio.iscoroutine(completion_response) or isinstance(completion_response, asyncio.Future) or hasattr(completion_response, '__aiter__'):
                 # This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
                 pass
             else:

                     else:
                         # Differentiate cost calculation based on response type
                         if isinstance(completion_response, litellm.EmbeddingResponse):
+                            # Manually calculate cost for embeddings
+                            model_info = litellm.get_model_info(model)
+                            input_cost = model_info.get("input_cost_per_token")
+                            if input_cost:
+                                cost = completion_response.usage.prompt_tokens * input_cost
+                            else:
+                                cost = None
                         else:
                             cost = litellm.completion_cost(completion_response=completion_response, model=model)
                             daily_model_data["approx_cost"] += cost
                 except Exception as e:
                     lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
+            elif isinstance(completion_response, asyncio.Future) or hasattr(completion_response, '__aiter__'):
                 # This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
                 pass
             else: