Spaces:
Sleeping
Sleeping
add sse
Browse files
main.py
CHANGED
|
@@ -19,6 +19,8 @@ from fastapi.responses import StreamingResponse, JSONResponse
|
|
| 19 |
from pydantic import BaseModel, Field
|
| 20 |
from openai import OpenAI
|
| 21 |
from prompts import *
|
|
|
|
|
|
|
| 22 |
# ============================================================================
|
| 23 |
# Configuration and Setup
|
| 24 |
# ============================================================================
|
|
@@ -277,6 +279,7 @@ async def llm_agent(query: LLMAgentQueryModel, background_tasks: BackgroundTasks
|
|
| 277 |
logger.info(f"Completed LLM agent response for query: {query.prompt}")
|
| 278 |
|
| 279 |
return StreamingResponse(process_response(), media_type="text/event-stream")
|
|
|
|
| 280 |
|
| 281 |
@app.post("/v2/llm-agent")
|
| 282 |
async def llm_agent_v2(query: LLMAgentQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
|
|
@@ -323,6 +326,54 @@ async def llm_agent_v2(query: LLMAgentQueryModel, background_tasks: BackgroundTa
|
|
| 323 |
return StreamingResponse(process_response(), media_type="text/event-stream")
|
| 324 |
|
| 325 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
import edge_tts
|
| 327 |
import io
|
| 328 |
|
|
|
|
| 19 |
from pydantic import BaseModel, Field
|
| 20 |
from openai import OpenAI
|
| 21 |
from prompts import *
|
| 22 |
+
import json
|
| 23 |
+
from sse_starlette.sse import EventSourceResponse
|
| 24 |
# ============================================================================
|
| 25 |
# Configuration and Setup
|
| 26 |
# ============================================================================
|
|
|
|
| 279 |
logger.info(f"Completed LLM agent response for query: {query.prompt}")
|
| 280 |
|
| 281 |
return StreamingResponse(process_response(), media_type="text/event-stream")
|
| 282 |
+
|
| 283 |
|
| 284 |
@app.post("/v2/llm-agent")
|
| 285 |
async def llm_agent_v2(query: LLMAgentQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
|
|
|
|
| 326 |
return StreamingResponse(process_response(), media_type="text/event-stream")
|
| 327 |
|
| 328 |
|
| 329 |
+
@app.post("/v3/llm-agent")
|
| 330 |
+
async def llm_agent_v2(query: LLMAgentQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
|
| 331 |
+
"""
|
| 332 |
+
LLM agent endpoint that provides responses based on user queries, maintaining conversation history.
|
| 333 |
+
Accepts custom system messages and allows selection of different models.
|
| 334 |
+
Requires API Key authentication via X-API-Key header.
|
| 335 |
+
"""
|
| 336 |
+
logger.info(f"Received LLM agent query: {query.prompt}")
|
| 337 |
+
|
| 338 |
+
# Generate a new conversation ID if not provided
|
| 339 |
+
if not query.conversation_id:
|
| 340 |
+
query.conversation_id = str(uuid4())
|
| 341 |
+
|
| 342 |
+
# Initialize or retrieve conversation history
|
| 343 |
+
if query.conversation_id not in conversations:
|
| 344 |
+
system_message = query.system_message or "You are a helpful assistant."
|
| 345 |
+
conversations[query.conversation_id] = [
|
| 346 |
+
{"role": "system", "content": system_message}
|
| 347 |
+
]
|
| 348 |
+
elif query.system_message:
|
| 349 |
+
# Update system message if provided
|
| 350 |
+
conversations[query.conversation_id][0] = {"role": "system", "content": query.system_message}
|
| 351 |
+
|
| 352 |
+
# Add user's prompt to conversation history
|
| 353 |
+
conversations[query.conversation_id].append({"role": "user", "content": query.prompt})
|
| 354 |
+
last_activity[query.conversation_id] = time.time()
|
| 355 |
+
|
| 356 |
+
# Limit tokens in the conversation history
|
| 357 |
+
limited_conversation = limit_conversation_history(conversations[query.conversation_id])
|
| 358 |
+
|
| 359 |
+
def process_response():
|
| 360 |
+
full_response = ""
|
| 361 |
+
for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
|
| 362 |
+
full_response += content
|
| 363 |
+
yield json.dumps({"type": "response","content": content}) + "\n"
|
| 364 |
+
|
| 365 |
+
# Add the assistant's response to the conversation history
|
| 366 |
+
conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
|
| 367 |
+
|
| 368 |
+
background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.prompt, full_response)
|
| 369 |
+
logger.info(f"Completed LLM agent response for query: {query.prompt}")
|
| 370 |
+
|
| 371 |
+
return EventSourceResponse(
|
| 372 |
+
process_response(),
|
| 373 |
+
media_type="text/event-stream"
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
|
| 377 |
import edge_tts
|
| 378 |
import io
|
| 379 |
|