Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

zai2api / proxy_handler.py

bluewinliang

Upload 8 files

6480add verified 6 months ago

raw

history blame

12.2 kB

	"""
	Proxy handler for Z.AI API requests
	"""
	import json
	import logging
	import re
	import time
	from typing import AsyncGenerator, Dict, Any, Optional
	import httpx
	from fastapi import HTTPException
	from fastapi.responses import StreamingResponse

	from config import settings
	from cookie_manager import cookie_manager
	from models import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse

	logger = logging.getLogger(__name__)

	class ProxyHandler:
	def __init__(self):
	self.client = httpx.AsyncClient(timeout=60.0)

	async def __aenter__(self):
	return self

	async def __aexit__(self, exc_type, exc_val, exc_tb):
	await self.client.aclose()

	def transform_content(self, content: str) -> str:
	"""Transform content by replacing HTML tags and optionally removing think tags"""
	if not content:
	return content

	logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")

	# Optionally remove thinking content based on configuration
	if not settings.SHOW_THINK_TAGS:
	logger.debug("Removing thinking content from response")
	original_length = len(content)

	# Remove <details> blocks (thinking content) - handle both closed and unclosed tags
	# First try to remove complete <details>...</details> blocks
	content = re.sub(r'<details[^>]>.?</details>', '', content, flags=re.DOTALL)

	# Then remove any remaining <details> opening tags and everything after them until we hit answer content
	# Look for pattern: <details...><summary>...</summary>...content... and remove the thinking part
	content = re.sub(r'<details[^>]>.?(?=\s[A-Z]\|\s\d\|\s*$)', '', content, flags=re.DOTALL)

	content = content.strip()

	logger.debug(f"Content length after removing thinking content: {original_length} -> {len(content)}")
	else:
	logger.debug("Keeping thinking content, converting to <think> tags")

	# Replace <details> with <think>
	content = re.sub(r'<details[^>]*>', '<think>', content)
	content = content.replace('</details>', '</think>')

	# Remove <summary> tags and their content
	content = re.sub(r'<summary>.*?</summary>', '', content, flags=re.DOTALL)

	# If there's no closing </think>, add it at the end of thinking content
	if '<think>' in content and '</think>' not in content:
	# Find where thinking ends and answer begins
	think_start = content.find('<think>')
	if think_start != -1:
	# Look for the start of the actual answer (usually starts with a capital letter or number)
	answer_match = re.search(r'\n\s*[A-Z0-9]', content[think_start:])
	if answer_match:
	insert_pos = think_start + answer_match.start()
	content = content[:insert_pos] + '</think>\n' + content[insert_pos:]
	else:
	content += '</think>'

	return content.strip()

	async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
	"""Proxy request to Z.AI API"""
	cookie = await cookie_manager.get_next_cookie()
	if not cookie:
	raise HTTPException(status_code=503, detail="No available cookies")

	# Transform model name
	target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model

	# Determine if this should be a streaming response
	is_streaming = request.stream if request.stream is not None else settings.DEFAULT_STREAM

	# Validate parameter compatibility
	if is_streaming and not settings.SHOW_THINK_TAGS:
	logger.warning("SHOW_THINK_TAGS=false is ignored for streaming responses")

	# Prepare request data
	request_data = request.model_dump(exclude_none=True)
	request_data["model"] = target_model

	# Build request data based on actual Z.AI format from zai-messages.md
	import uuid

	request_data = {
	"stream": True, # Always request streaming from Z.AI for processing
	"model": target_model,
	"messages": request_data["messages"],
	"background_tasks": {
	"title_generation": True,
	"tags_generation": True
	},
	"chat_id": str(uuid.uuid4()),
	"features": {
	"image_generation": False,
	"code_interpreter": False,
	"web_search": False,
	"auto_web_search": False
	},
	"id": str(uuid.uuid4()),
	"mcp_servers": ["deep-web-search"],
	"model_item": {
	"id": target_model,
	"name": "GLM-4.5",
	"owned_by": "openai"
	},
	"params": {},
	"tool_servers": [],
	"variables": {
	"{{USER_NAME}}": "User",
	"{{USER_LOCATION}}": "Unknown",
	"{{CURRENT_DATETIME}}": "2025-08-04 16:46:56"
	}
	}



	logger.debug(f"Sending request data: {request_data}")

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {cookie}",
	"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
	"Accept": "application/json, text/event-stream",
	"Accept-Language": "zh-CN",
	"sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
	"sec-ch-ua-mobile": "?0",
	"sec-ch-ua-platform": '"macOS"',
	"x-fe-version": "prod-fe-1.0.53",
	"Origin": "https://chat.z.ai",
	"Referer": "https://chat.z.ai/c/069723d5-060b-404f-992c-4705f1554c4c"
	}

	try:
	response = await self.client.post(
	settings.UPSTREAM_URL,
	json=request_data,
	headers=headers
	)

	if response.status_code == 401:
	await cookie_manager.mark_cookie_failed(cookie)
	raise HTTPException(status_code=401, detail="Invalid authentication")

	if response.status_code != 200:
	raise HTTPException(status_code=response.status_code, detail=f"Upstream error: {response.text}")

	await cookie_manager.mark_cookie_success(cookie)
	return {"response": response, "cookie": cookie}

	except httpx.RequestError as e:
	logger.error(f"Request error: {e}")
	await cookie_manager.mark_cookie_failed(cookie)
	raise HTTPException(status_code=503, detail="Upstream service unavailable")

	async def process_streaming_response(self, response: httpx.Response) -> AsyncGenerator[Dict[str, Any], None]:
	"""Process streaming response from Z.AI"""
	buffer = ""

	async for chunk in response.aiter_text():
	buffer += chunk
	lines = buffer.split('\n')
	buffer = lines[-1] # Keep incomplete line in buffer

	for line in lines[:-1]:
	line = line.strip()
	if not line.startswith("data: "):
	continue

	payload = line[6:].strip()
	if payload == "[DONE]":
	return

	try:
	parsed = json.loads(payload)

	# Check for errors first
	if parsed.get("error") or (parsed.get("data", {}).get("error")):
	error_detail = (parsed.get("error", {}).get("detail") or
	parsed.get("data", {}).get("error", {}).get("detail") or
	"Unknown error from upstream")
	logger.error(f"Upstream error: {error_detail}")
	raise HTTPException(status_code=400, detail=f"Upstream error: {error_detail}")

	# Transform the response
	if parsed.get("data"):
	# Remove unwanted fields
	parsed["data"].pop("edit_index", None)
	parsed["data"].pop("edit_content", None)

	# Note: We don't transform delta_content here because <think> tags
	# might span multiple chunks. We'll transform the final aggregated content.

	yield parsed

	except json.JSONDecodeError:
	continue # Skip non-JSON lines

	async def handle_chat_completion(self, request: ChatCompletionRequest):
	"""Handle chat completion request"""
	proxy_result = await self.proxy_request(request)
	response = proxy_result["response"]

	# Determine final streaming mode
	is_streaming = request.stream if request.stream is not None else settings.DEFAULT_STREAM

	if is_streaming:
	# For streaming responses, SHOW_THINK_TAGS setting is ignored
	return StreamingResponse(
	self.stream_response(response, request.model),
	media_type="text/event-stream",
	headers={
	"Cache-Control": "no-cache",
	"Connection": "keep-alive",
	}
	)
	else:
	# For non-streaming responses, SHOW_THINK_TAGS setting applies
	return await self.non_stream_response(response, request.model)

	async def stream_response(self, response: httpx.Response, model: str) -> AsyncGenerator[str, None]:
	"""Generate streaming response"""
	async for parsed in self.process_streaming_response(response):
	yield f"data: {json.dumps(parsed)}\n\n"
	yield "data: [DONE]\n\n"

	async def non_stream_response(self, response: httpx.Response, model: str) -> ChatCompletionResponse:
	"""Generate non-streaming response"""
	chunks = []
	async for parsed in self.process_streaming_response(response):
	chunks.append(parsed)
	logger.debug(f"Received chunk: {parsed}") # Debug log

	if not chunks:
	raise HTTPException(status_code=500, detail="No response from upstream")

	logger.info(f"Total chunks received: {len(chunks)}")
	logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")

	# Aggregate content based on SHOW_THINK_TAGS setting
	if settings.SHOW_THINK_TAGS:
	# Include all content
	full_content = "".join(
	chunk.get("data", {}).get("delta_content", "") for chunk in chunks
	)
	else:
	# Only include answer phase content
	full_content = "".join(
	chunk.get("data", {}).get("delta_content", "")
	for chunk in chunks
	if chunk.get("data", {}).get("phase") == "answer"
	)

	logger.info(f"Aggregated content length: {len(full_content)}")
	logger.debug(f"Full aggregated content: {full_content}") # Show full content for debugging

	# Apply content transformation (including think tag filtering)
	transformed_content = self.transform_content(full_content)

	logger.info(f"Transformed content length: {len(transformed_content)}")
	logger.debug(f"Transformed content: {transformed_content[:200]}...")

	# Create OpenAI-compatible response
	return ChatCompletionResponse(
	id=chunks[0].get("data", {}).get("id", "chatcmpl-unknown"),
	created=int(time.time()),
	model=model,
	choices=[{
	"index": 0,
	"message": {
	"role": "assistant",
	"content": transformed_content
	},
	"finish_reason": "stop"
	}]
	)