Spaces:
Paused
Paused
Add debug logging for LLM responses
Browse files- llm/oai.py +10 -0
llm/oai.py
CHANGED
|
@@ -128,6 +128,7 @@ class TextChatAtOAI(BaseFnCallModel):
|
|
| 128 |
generate_cfg: dict,
|
| 129 |
) -> Iterator[List[Message]]:
|
| 130 |
messages = self.convert_messages_to_dicts(messages)
|
|
|
|
| 131 |
try:
|
| 132 |
MAX_RETRIES = 5
|
| 133 |
INITIAL_DELAY = 2
|
|
@@ -137,10 +138,12 @@ class TextChatAtOAI(BaseFnCallModel):
|
|
| 137 |
response = None
|
| 138 |
for attempt in range(MAX_RETRIES):
|
| 139 |
try:
|
|
|
|
| 140 |
response = self._chat_complete_create(model=self.model,
|
| 141 |
messages=messages,
|
| 142 |
stream=True,
|
| 143 |
**generate_cfg)
|
|
|
|
| 144 |
break
|
| 145 |
except RateLimitError as ex:
|
| 146 |
if attempt == MAX_RETRIES - 1:
|
|
@@ -156,7 +159,10 @@ class TextChatAtOAI(BaseFnCallModel):
|
|
| 156 |
raise ModelServiceError(exception=ex) from ex
|
| 157 |
|
| 158 |
if delta_stream:
|
|
|
|
| 159 |
for chunk in response:
|
|
|
|
|
|
|
| 160 |
if chunk.choices:
|
| 161 |
choice = chunk.choices[0]
|
| 162 |
if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
|
|
@@ -183,8 +189,12 @@ class TextChatAtOAI(BaseFnCallModel):
|
|
| 183 |
full_reasoning_content = ''
|
| 184 |
content_buffer = ''
|
| 185 |
reasoning_content_buffer = ''
|
|
|
|
| 186 |
|
| 187 |
for chunk in response:
|
|
|
|
|
|
|
|
|
|
| 188 |
if not chunk.choices:
|
| 189 |
continue
|
| 190 |
choice = chunk.choices[0]
|
|
|
|
| 128 |
generate_cfg: dict,
|
| 129 |
) -> Iterator[List[Message]]:
|
| 130 |
messages = self.convert_messages_to_dicts(messages)
|
| 131 |
+
logger.info(f'LLM _chat_stream called with model={self.model}, messages count={len(messages)}')
|
| 132 |
try:
|
| 133 |
MAX_RETRIES = 5
|
| 134 |
INITIAL_DELAY = 2
|
|
|
|
| 138 |
response = None
|
| 139 |
for attempt in range(MAX_RETRIES):
|
| 140 |
try:
|
| 141 |
+
logger.info(f'Attempting LLM call, attempt {attempt + 1}/{MAX_RETRIES}')
|
| 142 |
response = self._chat_complete_create(model=self.model,
|
| 143 |
messages=messages,
|
| 144 |
stream=True,
|
| 145 |
**generate_cfg)
|
| 146 |
+
logger.info(f'LLM call successful, got response object: {type(response)}')
|
| 147 |
break
|
| 148 |
except RateLimitError as ex:
|
| 149 |
if attempt == MAX_RETRIES - 1:
|
|
|
|
| 159 |
raise ModelServiceError(exception=ex) from ex
|
| 160 |
|
| 161 |
if delta_stream:
|
| 162 |
+
chunk_count = 0
|
| 163 |
for chunk in response:
|
| 164 |
+
chunk_count += 1
|
| 165 |
+
logger.info(f'delta_stream chunk {chunk_count}: {chunk}')
|
| 166 |
if chunk.choices:
|
| 167 |
choice = chunk.choices[0]
|
| 168 |
if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
|
|
|
|
| 189 |
full_reasoning_content = ''
|
| 190 |
content_buffer = ''
|
| 191 |
reasoning_content_buffer = ''
|
| 192 |
+
chunk_count = 0
|
| 193 |
|
| 194 |
for chunk in response:
|
| 195 |
+
chunk_count += 1
|
| 196 |
+
if chunk_count <= 3:
|
| 197 |
+
logger.info(f'non-delta chunk {chunk_count}: {chunk}')
|
| 198 |
if not chunk.choices:
|
| 199 |
continue
|
| 200 |
choice = chunk.choices[0]
|