Spaces:
Running
Running
Dmitry Beresnev commited on
Commit ·
e9b8569
1
Parent(s): 7d65cc9
fix logger
Browse files
app.py
CHANGED
|
@@ -420,6 +420,7 @@ def _format_body_for_log(content_type: str, body: bytes) -> str:
|
|
| 420 |
async def log_received_request(request: Request, call_next):
|
| 421 |
"""Log incoming requests and responses with basic metadata."""
|
| 422 |
request_id = uuid.uuid4().hex[:12]
|
|
|
|
| 423 |
start = time.perf_counter()
|
| 424 |
|
| 425 |
body_text = ""
|
|
@@ -450,9 +451,9 @@ async def log_received_request(request: Request, call_next):
|
|
| 450 |
elapsed_ms = (time.perf_counter() - start) * 1000
|
| 451 |
logger.info(f"⬅️ {request_id} {response.status_code} {elapsed_ms:.1f}ms")
|
| 452 |
return response
|
| 453 |
-
except Exception
|
| 454 |
elapsed_ms = (time.perf_counter() - start) * 1000
|
| 455 |
-
logger.
|
| 456 |
raise
|
| 457 |
|
| 458 |
|
|
@@ -873,7 +874,7 @@ async def switch_model(request: ModelSwitchRequest):
|
|
| 873 |
summary="Chat Completions",
|
| 874 |
description="High-performance OpenAI-compatible chat completions with connection pooling."
|
| 875 |
)
|
| 876 |
-
async def chat_completions(request: ChatCompletionRequest):
|
| 877 |
"""
|
| 878 |
OpenAI-compatible chat completions with performance optimizations.
|
| 879 |
|
|
@@ -882,9 +883,13 @@ async def chat_completions(request: ChatCompletionRequest):
|
|
| 882 |
- HTTP connection pooling
|
| 883 |
- Request metrics tracking
|
| 884 |
"""
|
|
|
|
| 885 |
try:
|
| 886 |
request_start = time.time()
|
| 887 |
|
|
|
|
|
|
|
|
|
|
| 888 |
# Get current model from cache
|
| 889 |
cached_model = model_cache.get(current_model)
|
| 890 |
if not cached_model:
|
|
@@ -910,7 +915,11 @@ async def chat_completions(request: ChatCompletionRequest):
|
|
| 910 |
|
| 911 |
return result
|
| 912 |
except aiohttp.ClientError as e:
|
|
|
|
| 913 |
raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
| 914 |
|
| 915 |
|
| 916 |
async def search_web_async(query: str, max_results: int = 5) -> list[dict]:
|
|
@@ -975,7 +984,7 @@ def format_search_context(query: str, search_results: list[dict]) -> str:
|
|
| 975 |
summary="Web-Augmented Chat Completions",
|
| 976 |
description="Chat completions with real-time web search and result caching."
|
| 977 |
)
|
| 978 |
-
async def web_chat_completions(request: WebChatRequest):
|
| 979 |
"""
|
| 980 |
Chat completions with web search augmentation.
|
| 981 |
|
|
@@ -984,6 +993,7 @@ async def web_chat_completions(request: WebChatRequest):
|
|
| 984 |
- LRU cache for search results (1 hour TTL)
|
| 985 |
- Parallel execution where possible
|
| 986 |
"""
|
|
|
|
| 987 |
try:
|
| 988 |
# Get the last user message as search query
|
| 989 |
user_messages = [msg for msg in request.messages if msg.get("role") == "user"]
|
|
@@ -1015,6 +1025,9 @@ Always cite sources when using information from the search results."""
|
|
| 1015 |
|
| 1016 |
augmented_messages.insert(-1, system_prompt)
|
| 1017 |
|
|
|
|
|
|
|
|
|
|
| 1018 |
# Get current model from cache
|
| 1019 |
cached_model = model_cache.get(current_model)
|
| 1020 |
if not cached_model:
|
|
@@ -1043,8 +1056,12 @@ Always cite sources when using information from the search results."""
|
|
| 1043 |
return result
|
| 1044 |
|
| 1045 |
except aiohttp.ClientError as e:
|
|
|
|
| 1046 |
raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
|
|
|
|
|
|
|
| 1047 |
except Exception as e:
|
|
|
|
| 1048 |
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
|
| 1049 |
|
| 1050 |
|
|
|
|
| 420 |
async def log_received_request(request: Request, call_next):
|
| 421 |
"""Log incoming requests and responses with basic metadata."""
|
| 422 |
request_id = uuid.uuid4().hex[:12]
|
| 423 |
+
request.state.request_id = request_id
|
| 424 |
start = time.perf_counter()
|
| 425 |
|
| 426 |
body_text = ""
|
|
|
|
| 451 |
elapsed_ms = (time.perf_counter() - start) * 1000
|
| 452 |
logger.info(f"⬅️ {request_id} {response.status_code} {elapsed_ms:.1f}ms")
|
| 453 |
return response
|
| 454 |
+
except Exception:
|
| 455 |
elapsed_ms = (time.perf_counter() - start) * 1000
|
| 456 |
+
logger.exception(f"⬅️ {request_id} 500 {elapsed_ms:.1f}ms unhandled error")
|
| 457 |
raise
|
| 458 |
|
| 459 |
|
|
|
|
| 874 |
summary="Chat Completions",
|
| 875 |
description="High-performance OpenAI-compatible chat completions with connection pooling."
|
| 876 |
)
|
| 877 |
+
async def chat_completions(request: ChatCompletionRequest, raw_request: Request):
|
| 878 |
"""
|
| 879 |
OpenAI-compatible chat completions with performance optimizations.
|
| 880 |
|
|
|
|
| 883 |
- HTTP connection pooling
|
| 884 |
- Request metrics tracking
|
| 885 |
"""
|
| 886 |
+
request_id = getattr(raw_request.state, "request_id", "-")
|
| 887 |
try:
|
| 888 |
request_start = time.time()
|
| 889 |
|
| 890 |
+
if not http_session or http_session.closed:
|
| 891 |
+
raise HTTPException(status_code=500, detail="HTTP session not initialized")
|
| 892 |
+
|
| 893 |
# Get current model from cache
|
| 894 |
cached_model = model_cache.get(current_model)
|
| 895 |
if not cached_model:
|
|
|
|
| 915 |
|
| 916 |
return result
|
| 917 |
except aiohttp.ClientError as e:
|
| 918 |
+
logger.exception(f"request_id={request_id} llama-server error")
|
| 919 |
raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
|
| 920 |
+
except Exception:
|
| 921 |
+
logger.exception(f"request_id={request_id} chat_completions error")
|
| 922 |
+
raise
|
| 923 |
|
| 924 |
|
| 925 |
async def search_web_async(query: str, max_results: int = 5) -> list[dict]:
|
|
|
|
| 984 |
summary="Web-Augmented Chat Completions",
|
| 985 |
description="Chat completions with real-time web search and result caching."
|
| 986 |
)
|
| 987 |
+
async def web_chat_completions(request: WebChatRequest, raw_request: Request):
|
| 988 |
"""
|
| 989 |
Chat completions with web search augmentation.
|
| 990 |
|
|
|
|
| 993 |
- LRU cache for search results (1 hour TTL)
|
| 994 |
- Parallel execution where possible
|
| 995 |
"""
|
| 996 |
+
request_id = getattr(raw_request.state, "request_id", "-")
|
| 997 |
try:
|
| 998 |
# Get the last user message as search query
|
| 999 |
user_messages = [msg for msg in request.messages if msg.get("role") == "user"]
|
|
|
|
| 1025 |
|
| 1026 |
augmented_messages.insert(-1, system_prompt)
|
| 1027 |
|
| 1028 |
+
if not http_session or http_session.closed:
|
| 1029 |
+
raise HTTPException(status_code=500, detail="HTTP session not initialized")
|
| 1030 |
+
|
| 1031 |
# Get current model from cache
|
| 1032 |
cached_model = model_cache.get(current_model)
|
| 1033 |
if not cached_model:
|
|
|
|
| 1056 |
return result
|
| 1057 |
|
| 1058 |
except aiohttp.ClientError as e:
|
| 1059 |
+
logger.exception(f"request_id={request_id} llama-server error")
|
| 1060 |
raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
|
| 1061 |
+
except HTTPException:
|
| 1062 |
+
raise
|
| 1063 |
except Exception as e:
|
| 1064 |
+
logger.exception(f"request_id={request_id} web_chat_completions error")
|
| 1065 |
raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
|
| 1066 |
|
| 1067 |
|
logger.py
CHANGED
|
@@ -80,12 +80,10 @@ class Logger:
|
|
| 80 |
Returns:
|
| 81 |
Configured logger instance
|
| 82 |
"""
|
| 83 |
-
#
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
if not cls._initialized and name == "AGI":
|
| 88 |
-
logger.setLevel(level)
|
| 89 |
|
| 90 |
# Console handler with colors
|
| 91 |
console_handler = logging.StreamHandler(sys.stdout)
|
|
@@ -95,7 +93,7 @@ class Logger:
|
|
| 95 |
datefmt='%Y-%m-%d %H:%M:%S'
|
| 96 |
)
|
| 97 |
console_handler.setFormatter(console_formatter)
|
| 98 |
-
|
| 99 |
|
| 100 |
# File handler with rotation (if enabled)
|
| 101 |
if log_file:
|
|
@@ -113,13 +111,11 @@ class Logger:
|
|
| 113 |
datefmt='%Y-%m-%d %H:%M:%S'
|
| 114 |
)
|
| 115 |
file_handler.setFormatter(file_formatter)
|
| 116 |
-
|
| 117 |
|
| 118 |
-
# Prevent propagation to avoid duplicate logs
|
| 119 |
-
logger.propagate = False
|
| 120 |
cls._initialized = True
|
| 121 |
|
| 122 |
-
return
|
| 123 |
|
| 124 |
|
| 125 |
# Convenience function for easy import
|
|
@@ -161,4 +157,4 @@ if __name__ == "__main__":
|
|
| 161 |
api_logger.info("API logger initialized")
|
| 162 |
|
| 163 |
client_logger = get_logger("client")
|
| 164 |
-
client_logger.info("Client logger initialized")
|
|
|
|
| 80 |
Returns:
|
| 81 |
Configured logger instance
|
| 82 |
"""
|
| 83 |
+
# Always return a named logger, but configure root handlers once
|
| 84 |
+
if not cls._initialized:
|
| 85 |
+
root_logger = logging.getLogger()
|
| 86 |
+
root_logger.setLevel(level)
|
|
|
|
|
|
|
| 87 |
|
| 88 |
# Console handler with colors
|
| 89 |
console_handler = logging.StreamHandler(sys.stdout)
|
|
|
|
| 93 |
datefmt='%Y-%m-%d %H:%M:%S'
|
| 94 |
)
|
| 95 |
console_handler.setFormatter(console_formatter)
|
| 96 |
+
root_logger.addHandler(console_handler)
|
| 97 |
|
| 98 |
# File handler with rotation (if enabled)
|
| 99 |
if log_file:
|
|
|
|
| 111 |
datefmt='%Y-%m-%d %H:%M:%S'
|
| 112 |
)
|
| 113 |
file_handler.setFormatter(file_formatter)
|
| 114 |
+
root_logger.addHandler(file_handler)
|
| 115 |
|
|
|
|
|
|
|
| 116 |
cls._initialized = True
|
| 117 |
|
| 118 |
+
return logging.getLogger(name)
|
| 119 |
|
| 120 |
|
| 121 |
# Convenience function for easy import
|
|
|
|
| 157 |
api_logger.info("API logger initialized")
|
| 158 |
|
| 159 |
client_logger = get_logger("client")
|
| 160 |
+
client_logger.info("Client logger initialized")
|