Spaces:

pjpjq
/

DeepResearch

Paused

App Files Files Community

pjpjq commited on Jan 9

Commit

c78586b

1 Parent(s): b386b30

Add debug logging for LLM responses

Browse files

Files changed (1) hide show

llm/oai.py +10 -0

llm/oai.py CHANGED Viewed

@@ -128,6 +128,7 @@ class TextChatAtOAI(BaseFnCallModel):
         generate_cfg: dict,
     ) -> Iterator[List[Message]]:
         messages = self.convert_messages_to_dicts(messages)
         try:
             MAX_RETRIES = 5
             INITIAL_DELAY = 2
@@ -137,10 +138,12 @@ class TextChatAtOAI(BaseFnCallModel):
             response = None
             for attempt in range(MAX_RETRIES):
                 try:
                     response = self._chat_complete_create(model=self.model,
                                                           messages=messages,
                                                           stream=True,
                                                           **generate_cfg)
                     break
                 except RateLimitError as ex:
                     if attempt == MAX_RETRIES - 1:
@@ -156,7 +159,10 @@ class TextChatAtOAI(BaseFnCallModel):
                     raise ModelServiceError(exception=ex) from ex
             if delta_stream:
                 for chunk in response:
                     if chunk.choices:
                         choice = chunk.choices[0]
                         if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
@@ -183,8 +189,12 @@ class TextChatAtOAI(BaseFnCallModel):
                 full_reasoning_content = ''
                 content_buffer = ''
                 reasoning_content_buffer = ''
                 for chunk in response:
                     if not chunk.choices:
                         continue
                     choice = chunk.choices[0]

         generate_cfg: dict,
     ) -> Iterator[List[Message]]:
         messages = self.convert_messages_to_dicts(messages)
+        logger.info(f'LLM _chat_stream called with model={self.model}, messages count={len(messages)}')
         try:
             MAX_RETRIES = 5
             INITIAL_DELAY = 2
             response = None
             for attempt in range(MAX_RETRIES):
                 try:
+                    logger.info(f'Attempting LLM call, attempt {attempt + 1}/{MAX_RETRIES}')
                     response = self._chat_complete_create(model=self.model,
                                                           messages=messages,
                                                           stream=True,
                                                           **generate_cfg)
+                    logger.info(f'LLM call successful, got response object: {type(response)}')
                     break
                 except RateLimitError as ex:
                     if attempt == MAX_RETRIES - 1:
                     raise ModelServiceError(exception=ex) from ex
             if delta_stream:
+                chunk_count = 0
                 for chunk in response:
+                    chunk_count += 1
+                    logger.info(f'delta_stream chunk {chunk_count}: {chunk}')
                     if chunk.choices:
                         choice = chunk.choices[0]
                         if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content:
                 full_reasoning_content = ''
                 content_buffer = ''
                 reasoning_content_buffer = ''
+                chunk_count = 0
                 for chunk in response:
+                    chunk_count += 1
+                    if chunk_count <= 3:
+                        logger.info(f'non-delta chunk {chunk_count}: {chunk}')
                     if not chunk.choices:
                         continue
                     choice = chunk.choices[0]