Mirrowel commited on
Commit
3981e3a
·
1 Parent(s): 62ed41b

feat(logging): sanitize Litellm logs even more

Browse files

This change enhances the `_sanitize_litellm_log` function to ensure comprehensive removal of sensitive and large data fields from Litellm log dictionaries.

- Implements a recursive cleaning mechanism to handle deeply nested log structures.
- Expands the list of keys to remove, including `api_key`, `api_base`, `original_response`, and `additional_args`.
- Prevents mutation of the original log object by performing a deep copy before sanitization.
- Improves log cleanliness and security by ensuring all relevant sensitive information is scrubbed.

Files changed (1) hide show
  1. src/rotator_library/client.py +30 -12
src/rotator_library/client.py CHANGED
@@ -64,25 +64,43 @@ class RotatingClient:
64
 
65
  def _sanitize_litellm_log(self, log_data: dict) -> dict:
66
  """
67
- Removes large data fields from litellm log dictionaries to keep debug logs clean.
 
68
  """
69
  if not isinstance(log_data, dict):
70
  return log_data
71
 
72
- clean_data = log_data.copy()
73
-
74
- # These keys often contain the full request/response payload.
75
- keys_to_pop = ["messages", "input", "response", "data"]
 
76
 
77
- # The actual log data from litellm is often nested inside 'kwargs'
78
- if 'kwargs' in clean_data and isinstance(clean_data['kwargs'], dict):
 
 
 
 
 
 
 
 
 
79
  for key in keys_to_pop:
80
- clean_data['kwargs'].pop(key, None)
81
-
82
- # Sometimes they are at the top level
83
- for key in keys_to_pop:
84
- clean_data.pop(key, None)
 
 
 
 
 
 
85
 
 
86
  return clean_data
87
 
88
  def _litellm_logger_callback(self, log_data: dict):
 
64
 
65
  def _sanitize_litellm_log(self, log_data: dict) -> dict:
66
  """
67
+ Recursively removes large data fields and sensitive information from litellm log
68
+ dictionaries to keep debug logs clean and secure.
69
  """
70
  if not isinstance(log_data, dict):
71
  return log_data
72
 
73
+ # Keys to remove at any level of the dictionary
74
+ keys_to_pop = [
75
+ "messages", "input", "response", "data", "api_key",
76
+ "api_base", "original_response", "additional_args"
77
+ ]
78
 
79
+ # Keys that might contain nested dictionaries to clean
80
+ nested_keys = ["kwargs", "litellm_params", "model_info", "proxy_server_request"]
81
+
82
+ # Create a deep copy to avoid modifying the original log object in memory
83
+ clean_data = json.loads(json.dumps(log_data, default=str))
84
+
85
+ def clean_recursively(data_dict):
86
+ if not isinstance(data_dict, dict):
87
+ return
88
+
89
+ # Remove sensitive/large keys
90
  for key in keys_to_pop:
91
+ data_dict.pop(key, None)
92
+
93
+ # Recursively clean nested dictionaries
94
+ for key in nested_keys:
95
+ if key in data_dict and isinstance(data_dict[key], dict):
96
+ clean_recursively(data_dict[key])
97
+
98
+ # Also iterate through all values to find any other nested dicts
99
+ for key, value in list(data_dict.items()):
100
+ if isinstance(value, dict):
101
+ clean_recursively(value)
102
 
103
+ clean_recursively(clean_data)
104
  return clean_data
105
 
106
  def _litellm_logger_callback(self, log_data: dict):