Spaces:
Paused
Paused
Commit
·
a03de74
1
Parent(s):
ba2538c
fixed cot parsing bugs
Browse files- app/api_helpers.py +35 -19
- app/message_processing.py +22 -7
- app/routes/chat_api.py +79 -72
app/api_helpers.py
CHANGED
|
@@ -19,7 +19,8 @@ from message_processing import (
|
|
| 19 |
convert_chunk_to_openai,
|
| 20 |
create_final_chunk,
|
| 21 |
split_text_by_completion_tokens,
|
| 22 |
-
parse_gemini_response_for_reasoning_and_content # Added import
|
|
|
|
| 23 |
)
|
| 24 |
import config as app_config
|
| 25 |
|
|
@@ -235,16 +236,14 @@ async def gemini_fake_stream_generator( # Changed to async
|
|
| 235 |
# Consider re-raising if auto-mode needs to catch this: raise e_outer_gemini
|
| 236 |
|
| 237 |
|
| 238 |
-
async def openai_fake_stream_generator(
|
| 239 |
openai_client: AsyncOpenAI,
|
| 240 |
-
openai_params: Dict[str, Any],
|
| 241 |
openai_extra_body: Dict[str, Any],
|
| 242 |
request_obj: OpenAIRequest,
|
| 243 |
-
is_auto_attempt: bool
|
| 244 |
-
|
| 245 |
-
gcp_project_id
|
| 246 |
-
gcp_location: str,
|
| 247 |
-
base_model_id_for_tokenizer: str
|
| 248 |
):
|
| 249 |
api_model_name = openai_params.get("model", "unknown-openai-model")
|
| 250 |
print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning split.")
|
|
@@ -254,8 +253,16 @@ async def openai_fake_stream_generator(
|
|
| 254 |
params_for_non_stream_call = openai_params.copy()
|
| 255 |
params_for_non_stream_call['stream'] = False
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
_api_call_task = asyncio.create_task(
|
| 258 |
-
openai_client.chat.completions.create(**params_for_non_stream_call, extra_body=
|
| 259 |
)
|
| 260 |
raw_response = await _api_call_task
|
| 261 |
full_content_from_api = ""
|
|
@@ -264,18 +271,27 @@ async def openai_fake_stream_generator(
|
|
| 264 |
vertex_completion_tokens = 0
|
| 265 |
if raw_response.usage and raw_response.usage.completion_tokens is not None:
|
| 266 |
vertex_completion_tokens = raw_response.usage.completion_tokens
|
|
|
|
| 267 |
reasoning_text = ""
|
| 268 |
-
actual_content_text
|
| 269 |
-
if full_content_from_api
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
)
|
| 277 |
if reasoning_text:
|
| 278 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
return raw_response, reasoning_text, actual_content_text
|
| 280 |
|
| 281 |
temp_task_for_keepalive_check = asyncio.create_task(_openai_api_call_and_split_task_creator_wrapper())
|
|
|
|
| 19 |
convert_chunk_to_openai,
|
| 20 |
create_final_chunk,
|
| 21 |
split_text_by_completion_tokens,
|
| 22 |
+
parse_gemini_response_for_reasoning_and_content, # Added import
|
| 23 |
+
extract_reasoning_by_tags # Added for new OpenAI direct reasoning logic
|
| 24 |
)
|
| 25 |
import config as app_config
|
| 26 |
|
|
|
|
| 236 |
# Consider re-raising if auto-mode needs to catch this: raise e_outer_gemini
|
| 237 |
|
| 238 |
|
| 239 |
+
async def openai_fake_stream_generator( # Reverted signature: removed thought_tag_marker
|
| 240 |
openai_client: AsyncOpenAI,
|
| 241 |
+
openai_params: Dict[str, Any],
|
| 242 |
openai_extra_body: Dict[str, Any],
|
| 243 |
request_obj: OpenAIRequest,
|
| 244 |
+
is_auto_attempt: bool
|
| 245 |
+
# Removed thought_tag_marker as parsing uses a fixed tag now
|
| 246 |
+
# Removed gcp_credentials, gcp_project_id, gcp_location, base_model_id_for_tokenizer previously
|
|
|
|
|
|
|
| 247 |
):
|
| 248 |
api_model_name = openai_params.get("model", "unknown-openai-model")
|
| 249 |
print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning split.")
|
|
|
|
| 253 |
params_for_non_stream_call = openai_params.copy()
|
| 254 |
params_for_non_stream_call['stream'] = False
|
| 255 |
|
| 256 |
+
# Add the tag marker specifically for the internal non-streaming call in fake streaming
|
| 257 |
+
extra_body_for_internal_call = openai_extra_body.copy() # Avoid modifying the original dict
|
| 258 |
+
if 'google' not in extra_body_for_internal_call.get('extra_body', {}):
|
| 259 |
+
if 'extra_body' not in extra_body_for_internal_call: extra_body_for_internal_call['extra_body'] = {}
|
| 260 |
+
extra_body_for_internal_call['extra_body']['google'] = {}
|
| 261 |
+
extra_body_for_internal_call['extra_body']['google']['thought_tag_marker'] = 'vertex_think_tag'
|
| 262 |
+
print("DEBUG: Adding 'thought_tag_marker' for fake-streaming internal call.")
|
| 263 |
+
|
| 264 |
_api_call_task = asyncio.create_task(
|
| 265 |
+
openai_client.chat.completions.create(**params_for_non_stream_call, extra_body=extra_body_for_internal_call) # Use modified extra_body
|
| 266 |
)
|
| 267 |
raw_response = await _api_call_task
|
| 268 |
full_content_from_api = ""
|
|
|
|
| 271 |
vertex_completion_tokens = 0
|
| 272 |
if raw_response.usage and raw_response.usage.completion_tokens is not None:
|
| 273 |
vertex_completion_tokens = raw_response.usage.completion_tokens
|
| 274 |
+
# --- Start Inserted Block (Tag-based reasoning extraction) ---
|
| 275 |
reasoning_text = ""
|
| 276 |
+
# Ensure actual_content_text is a string even if API returns None
|
| 277 |
+
actual_content_text = full_content_from_api if isinstance(full_content_from_api, str) else ""
|
| 278 |
+
|
| 279 |
+
fixed_tag = "vertex_think_tag" # Use the fixed tag name
|
| 280 |
+
if actual_content_text: # Check if content exists
|
| 281 |
+
print(f"INFO: OpenAI Direct Fake-Streaming - Applying tag extraction with fixed marker: '{fixed_tag}'")
|
| 282 |
+
# Unconditionally attempt extraction with the fixed tag
|
| 283 |
+
reasoning_text, actual_content_text = extract_reasoning_by_tags(actual_content_text, fixed_tag)
|
|
|
|
| 284 |
if reasoning_text:
|
| 285 |
+
print(f"DEBUG: Tag extraction success (fixed tag). Reasoning len: {len(reasoning_text)}, Content len: {len(actual_content_text)}")
|
| 286 |
+
else:
|
| 287 |
+
print(f"DEBUG: No content found within fixed tag '{fixed_tag}'.")
|
| 288 |
+
else:
|
| 289 |
+
print(f"WARNING: OpenAI Direct Fake-Streaming - No initial content found in message.")
|
| 290 |
+
actual_content_text = "" # Ensure empty string
|
| 291 |
+
|
| 292 |
+
# --- End Revised Block ---
|
| 293 |
+
|
| 294 |
+
# The return uses the potentially modified variables:
|
| 295 |
return raw_response, reasoning_text, actual_content_text
|
| 296 |
|
| 297 |
temp_task_for_keepalive_check = asyncio.create_task(_openai_api_call_and_split_task_creator_wrapper())
|
app/message_processing.py
CHANGED
|
@@ -11,6 +11,26 @@ from google import genai as google_genai_client
|
|
| 11 |
from models import OpenAIMessage, ContentPartText, ContentPartImage
|
| 12 |
|
| 13 |
SUPPORTED_ROLES = ["user", "model"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
def create_gemini_prompt(messages: List[OpenAIMessage]) -> Union[types.Content, List[types.Content]]:
|
| 16 |
# This function remains unchanged
|
|
@@ -203,11 +223,8 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
|
|
| 203 |
# Check if gemini_response_candidate itself resembles a part_item with 'thought'
|
| 204 |
# This might be relevant for direct part processing in stream chunks if candidate structure is shallow
|
| 205 |
candidate_part_text = ""
|
| 206 |
-
is_candidate_itself_thought = False
|
| 207 |
if hasattr(gemini_response_candidate, 'text') and gemini_response_candidate.text is not None:
|
| 208 |
candidate_part_text = str(gemini_response_candidate.text)
|
| 209 |
-
if hasattr(gemini_response_candidate, 'thought') and gemini_response_candidate.thought is True:
|
| 210 |
-
is_candidate_itself_thought = True
|
| 211 |
|
| 212 |
# Primary logic: Iterate through parts of the candidate's content object
|
| 213 |
gemini_candidate_content = None
|
|
@@ -224,9 +241,7 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
|
|
| 224 |
reasoning_text_parts.append(part_text)
|
| 225 |
else:
|
| 226 |
normal_text_parts.append(part_text)
|
| 227 |
-
|
| 228 |
-
reasoning_text_parts.append(candidate_part_text)
|
| 229 |
-
elif candidate_part_text: # Candidate had text but no parts and was not a thought itself
|
| 230 |
normal_text_parts.append(candidate_part_text)
|
| 231 |
# If no parts and no direct text on candidate, both lists remain empty.
|
| 232 |
|
|
@@ -235,7 +250,7 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
|
|
| 235 |
normal_text_parts.append(str(gemini_candidate_content.text))
|
| 236 |
# Fallback if no .content but direct .text on candidate
|
| 237 |
elif hasattr(gemini_response_candidate, 'text') and gemini_response_candidate.text is not None and not gemini_candidate_content:
|
| 238 |
-
|
| 239 |
|
| 240 |
return "".join(reasoning_text_parts), "".join(normal_text_parts)
|
| 241 |
|
|
|
|
| 11 |
from models import OpenAIMessage, ContentPartText, ContentPartImage
|
| 12 |
|
| 13 |
SUPPORTED_ROLES = ["user", "model"]
|
| 14 |
+
# New function to extract reasoning based on specified tags
|
| 15 |
+
# Removed duplicate import
|
| 16 |
+
|
| 17 |
+
def extract_reasoning_by_tags(full_text: str, tag_name: str) -> Tuple[str, str]:
|
| 18 |
+
"""Extracts reasoning content enclosed in specific tags."""
|
| 19 |
+
if not tag_name or not isinstance(full_text, str): # Handle empty tag or non-string input
|
| 20 |
+
return "", full_text if isinstance(full_text, str) else ""
|
| 21 |
+
|
| 22 |
+
open_tag = f"<{tag_name}>"
|
| 23 |
+
close_tag = f"</{tag_name}>"
|
| 24 |
+
# Make pattern non-greedy and handle potential multiple occurrences
|
| 25 |
+
pattern = re.compile(f"{re.escape(open_tag)}(.*?){re.escape(close_tag)}", re.DOTALL)
|
| 26 |
+
|
| 27 |
+
reasoning_parts = pattern.findall(full_text)
|
| 28 |
+
# Remove tags and the extracted reasoning content to get normal content
|
| 29 |
+
normal_text = pattern.sub('', full_text)
|
| 30 |
+
|
| 31 |
+
reasoning_content = "".join(reasoning_parts)
|
| 32 |
+
# Consider trimming whitespace that might be left after tag removal
|
| 33 |
+
return reasoning_content.strip(), normal_text.strip()
|
| 34 |
|
| 35 |
def create_gemini_prompt(messages: List[OpenAIMessage]) -> Union[types.Content, List[types.Content]]:
|
| 36 |
# This function remains unchanged
|
|
|
|
| 223 |
# Check if gemini_response_candidate itself resembles a part_item with 'thought'
|
| 224 |
# This might be relevant for direct part processing in stream chunks if candidate structure is shallow
|
| 225 |
candidate_part_text = ""
|
|
|
|
| 226 |
if hasattr(gemini_response_candidate, 'text') and gemini_response_candidate.text is not None:
|
| 227 |
candidate_part_text = str(gemini_response_candidate.text)
|
|
|
|
|
|
|
| 228 |
|
| 229 |
# Primary logic: Iterate through parts of the candidate's content object
|
| 230 |
gemini_candidate_content = None
|
|
|
|
| 241 |
reasoning_text_parts.append(part_text)
|
| 242 |
else:
|
| 243 |
normal_text_parts.append(part_text)
|
| 244 |
+
if candidate_part_text: # Candidate had text but no parts and was not a thought itself
|
|
|
|
|
|
|
| 245 |
normal_text_parts.append(candidate_part_text)
|
| 246 |
# If no parts and no direct text on candidate, both lists remain empty.
|
| 247 |
|
|
|
|
| 250 |
normal_text_parts.append(str(gemini_candidate_content.text))
|
| 251 |
# Fallback if no .content but direct .text on candidate
|
| 252 |
elif hasattr(gemini_response_candidate, 'text') and gemini_response_candidate.text is not None and not gemini_candidate_content:
|
| 253 |
+
normal_text_parts.append(str(gemini_response_candidate.text))
|
| 254 |
|
| 255 |
return "".join(reasoning_text_parts), "".join(normal_text_parts)
|
| 256 |
|
app/routes/chat_api.py
CHANGED
|
@@ -23,7 +23,8 @@ from message_processing import (
|
|
| 23 |
create_gemini_prompt,
|
| 24 |
create_encrypted_gemini_prompt,
|
| 25 |
create_encrypted_full_gemini_prompt,
|
| 26 |
-
split_text_by_completion_tokens # Added
|
|
|
|
| 27 |
)
|
| 28 |
from api_helpers import (
|
| 29 |
create_generation_config,
|
|
@@ -219,29 +220,34 @@ STRICT OPERATING PROTOCOL:
|
|
| 219 |
openai_params = {k: v for k, v in openai_params.items() if v is not None}
|
| 220 |
|
| 221 |
openai_extra_body = {
|
| 222 |
-
|
| 223 |
-
'
|
|
|
|
|
|
|
|
|
|
| 224 |
}
|
| 225 |
}
|
| 226 |
|
|
|
|
| 227 |
if request.stream:
|
| 228 |
if app_config.FAKE_STREAMING_ENABLED:
|
| 229 |
print(f"INFO: OpenAI Fake Streaming (SSE Simulation) ENABLED for model '{request.model}'.")
|
| 230 |
# openai_params already has "stream": True from initial setup,
|
| 231 |
# but openai_fake_stream_generator will make a stream=False call internally.
|
| 232 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
return StreamingResponse(
|
| 234 |
-
openai_fake_stream_generator(
|
| 235 |
openai_client=openai_client,
|
| 236 |
openai_params=openai_params,
|
| 237 |
-
openai_extra_body=openai_extra_body,
|
| 238 |
request_obj=request,
|
| 239 |
-
is_auto_attempt=False
|
| 240 |
-
#
|
| 241 |
-
gcp_credentials
|
| 242 |
-
gcp_project_id=PROJECT_ID, # This is rotated_project_id
|
| 243 |
-
gcp_location=LOCATION, # This is "global"
|
| 244 |
-
base_model_id_for_tokenizer=base_model_name # Stripped model ID for tokenizer
|
| 245 |
),
|
| 246 |
media_type="text/event-stream"
|
| 247 |
)
|
|
@@ -297,70 +303,71 @@ STRICT OPERATING PROTOCOL:
|
|
| 297 |
yield "data: [DONE]\n\n"
|
| 298 |
return StreamingResponse(openai_true_stream_generator(), media_type="text/event-stream")
|
| 299 |
else: # Not streaming (is_openai_direct_model and not request.stream)
|
| 300 |
-
|
| 301 |
-
#
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
full_content = message_dict.get('content')
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
vertex_completion_tokens
|
| 339 |
-
)
|
| 340 |
-
|
| 341 |
-
message_dict['content'] = actual_content
|
| 342 |
-
if reasoning_text: # Only add reasoning_content if it's not empty
|
| 343 |
message_dict['reasoning_content'] = reasoning_text
|
| 344 |
-
print(f"
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
print(f"INFO: Content reconstructed from tokens. Original len: {len(full_content)}, Reconstructed len: {len(actual_content)}")
|
| 348 |
-
# else: No reasoning, and content is original full_content because num_completion_tokens was invalid or zero.
|
| 349 |
-
|
| 350 |
else:
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
elif is_auto_model:
|
| 365 |
print(f"Processing auto model: {request.model}")
|
| 366 |
attempts = [
|
|
|
|
| 23 |
create_gemini_prompt,
|
| 24 |
create_encrypted_gemini_prompt,
|
| 25 |
create_encrypted_full_gemini_prompt,
|
| 26 |
+
split_text_by_completion_tokens, # Added
|
| 27 |
+
extract_reasoning_by_tags # Added for new reasoning logic
|
| 28 |
)
|
| 29 |
from api_helpers import (
|
| 30 |
create_generation_config,
|
|
|
|
| 220 |
openai_params = {k: v for k, v in openai_params.items() if v is not None}
|
| 221 |
|
| 222 |
openai_extra_body = {
|
| 223 |
+
"extra_body": {
|
| 224 |
+
'google': {
|
| 225 |
+
'safety_settings': openai_safety_settings
|
| 226 |
+
# REMOVED 'thought_tag_marker' - will be added conditionally below
|
| 227 |
+
}
|
| 228 |
}
|
| 229 |
}
|
| 230 |
|
| 231 |
+
|
| 232 |
if request.stream:
|
| 233 |
if app_config.FAKE_STREAMING_ENABLED:
|
| 234 |
print(f"INFO: OpenAI Fake Streaming (SSE Simulation) ENABLED for model '{request.model}'.")
|
| 235 |
# openai_params already has "stream": True from initial setup,
|
| 236 |
# but openai_fake_stream_generator will make a stream=False call internally.
|
| 237 |
+
# Retrieve the marker before the call
|
| 238 |
+
openai_extra_body_from_req = getattr(request, 'openai_extra_body', None)
|
| 239 |
+
thought_tag_marker = openai_extra_body_from_req.get("google", {}).get("thought_tag_marker") if openai_extra_body_from_req else None
|
| 240 |
+
|
| 241 |
+
# Call the generator with updated signature
|
| 242 |
return StreamingResponse(
|
| 243 |
+
openai_fake_stream_generator(
|
| 244 |
openai_client=openai_client,
|
| 245 |
openai_params=openai_params,
|
| 246 |
+
openai_extra_body=openai_extra_body, # Keep passing the full extra_body as it might be used elsewhere
|
| 247 |
request_obj=request,
|
| 248 |
+
is_auto_attempt=False # Assuming this remains false for direct calls
|
| 249 |
+
# Removed thought_tag_marker argument
|
| 250 |
+
# Removed gcp_credentials, gcp_project_id, gcp_location, base_model_id_for_tokenizer previously
|
|
|
|
|
|
|
|
|
|
| 251 |
),
|
| 252 |
media_type="text/event-stream"
|
| 253 |
)
|
|
|
|
| 303 |
yield "data: [DONE]\n\n"
|
| 304 |
return StreamingResponse(openai_true_stream_generator(), media_type="text/event-stream")
|
| 305 |
else: # Not streaming (is_openai_direct_model and not request.stream)
|
| 306 |
+
# Conditionally add the tag marker ONLY for non-streaming
|
| 307 |
+
extra_body_for_call = openai_extra_body.copy() # Avoid modifying the original dict used elsewhere
|
| 308 |
+
if 'google' not in extra_body_for_call.get('extra_body', {}):
|
| 309 |
+
if 'extra_body' not in extra_body_for_call: extra_body_for_call['extra_body'] = {}
|
| 310 |
+
extra_body_for_call['extra_body']['google'] = {}
|
| 311 |
+
extra_body_for_call['extra_body']['google']['thought_tag_marker'] = 'vertex_think_tag'
|
| 312 |
+
print("DEBUG: Adding 'thought_tag_marker' for non-streaming call.")
|
| 313 |
+
|
| 314 |
+
try: # Corrected indentation for entire block
|
| 315 |
+
# Ensure stream=False is explicitly passed for non-streaming
|
| 316 |
+
openai_params_for_non_stream = {**openai_params, "stream": False}
|
| 317 |
+
response = await openai_client.chat.completions.create(
|
| 318 |
+
**openai_params_for_non_stream,
|
| 319 |
+
# Removed redundant **openai_params spread
|
| 320 |
+
extra_body=extra_body_for_call # Use the modified extra_body for non-streaming call
|
| 321 |
+
)
|
| 322 |
+
response_dict = response.model_dump(exclude_unset=True, exclude_none=True)
|
| 323 |
|
| 324 |
+
try:
|
| 325 |
+
usage = response_dict.get('usage')
|
| 326 |
+
vertex_completion_tokens = 0 # Keep this for potential future use, but not used for split
|
| 327 |
+
|
| 328 |
+
if usage and isinstance(usage, dict):
|
| 329 |
+
vertex_completion_tokens = usage.get('completion_tokens')
|
| 330 |
+
|
| 331 |
+
choices = response_dict.get('choices')
|
| 332 |
+
if choices and isinstance(choices, list) and len(choices) > 0:
|
| 333 |
+
message_dict = choices[0].get('message')
|
| 334 |
+
if message_dict and isinstance(message_dict, dict):
|
| 335 |
+
# Always remove extra_content from the message if it exists
|
| 336 |
+
if 'extra_content' in message_dict:
|
| 337 |
+
del message_dict['extra_content']
|
| 338 |
+
# print("DEBUG: Removed 'extra_content' from response message.") # Optional debug log
|
| 339 |
+
|
| 340 |
+
# --- Start Revised Block (Fixed tag reasoning extraction) ---
|
| 341 |
+
# No longer need to get marker from request
|
| 342 |
full_content = message_dict.get('content')
|
| 343 |
+
reasoning_text = ""
|
| 344 |
+
actual_content = full_content if isinstance(full_content, str) else "" # Ensure string
|
| 345 |
+
|
| 346 |
+
fixed_tag = "vertex_think_tag" # Use the fixed tag name
|
| 347 |
+
if actual_content: # Check if content exists
|
| 348 |
+
print(f"INFO: OpenAI Direct Non-Streaming - Applying tag extraction with fixed marker: '{fixed_tag}'")
|
| 349 |
+
# Unconditionally attempt extraction with the fixed tag
|
| 350 |
+
reasoning_text, actual_content = extract_reasoning_by_tags(actual_content, fixed_tag)
|
| 351 |
+
message_dict['content'] = actual_content # Update the dictionary
|
| 352 |
+
if reasoning_text:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
message_dict['reasoning_content'] = reasoning_text
|
| 354 |
+
print(f"DEBUG: Tag extraction success (fixed tag). Reasoning len: {len(reasoning_text)}, Content len: {len(actual_content)}")
|
| 355 |
+
else:
|
| 356 |
+
print(f"DEBUG: No content found within fixed tag '{fixed_tag}'.")
|
|
|
|
|
|
|
|
|
|
| 357 |
else:
|
| 358 |
+
print(f"WARNING: OpenAI Direct Non-Streaming - No initial content found in message. Content: {message_dict.get('content')}")
|
| 359 |
+
message_dict['content'] = "" # Ensure content key exists and is empty string
|
| 360 |
+
|
| 361 |
+
# --- End Revised Block ---
|
| 362 |
+
except Exception as e_reasoning_processing:
|
| 363 |
+
print(f"WARNING: Error during non-streaming reasoning token processing for model {request.model} due to: {e_reasoning_processing}.")
|
| 364 |
+
|
| 365 |
+
return JSONResponse(content=response_dict)
|
| 366 |
+
except Exception as generate_error: # Corrected indentation for except block
|
| 367 |
+
error_msg_generate = f"Error calling OpenAI client for {request.model}: {str(generate_error)}"
|
| 368 |
+
print(f"ERROR: {error_msg_generate}")
|
| 369 |
+
error_response = create_openai_error_response(500, error_msg_generate, "server_error")
|
| 370 |
+
return JSONResponse(status_code=500, content=error_response)
|
| 371 |
elif is_auto_model:
|
| 372 |
print(f"Processing auto model: {request.model}")
|
| 373 |
attempts = [
|