Spaces:
Sleeping
Sleeping
File size: 7,537 Bytes
01d5a5d 3cfc0a2 01d5a5d 3cfc0a2 01d5a5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
"""
Advanced Talk API Routes
This module provides enhanced chat capabilities with features like:
- Multi-turn conversations
- System prompt management
- Conversation history tracking
- Streaming response
- Advanced mode with multi-phase processing
"""
import json
import logging
from datetime import datetime
from typing import Iterator, Any
from flask import Blueprint, request, Response, jsonify
from flask_pydantic import validate
from lpm_kernel.api.common.responses import APIResponse
from lpm_kernel.api.services.local_llm_service import local_llm_service
from lpm_kernel.api.domains.kernel2.dto.chat_dto import ChatRequest
from lpm_kernel.api.domains.kernel2.dto.advanced_chat_dto import AdvancedChatRequest
from lpm_kernel.api.domains.kernel2.services.message_builder import MultiTurnMessageBuilder
from lpm_kernel.api.domains.kernel2.services.prompt_builder import (
BasePromptStrategy,
RoleBasedStrategy,
KnowledgeEnhancedStrategy,
)
from lpm_kernel.api.domains.kernel2.services.chat_service import chat_service
from lpm_kernel.api.domains.kernel2.services.advanced_chat_service import advanced_chat_service
logger = logging.getLogger(__name__)
talk_bp = Blueprint("talk", __name__)
@talk_bp.route("/api/talk", methods=["POST"])
@validate()
def chat(body: ChatRequest):
"""
Chat endpoint - streaming response
Request: ChatRequest JSON object containing:
- message: str, current user message
- system_prompt: str, optional system prompt, default is "You are a helpful assistant."
- role_id: str, optional role UUID, if provided will use the role's system_prompt
- history: List[ChatMessage], message history
- enable_l0_retrieval: bool, whether to enable L0 knowledge retrieval, default true
- enable_l1_retrieval: bool, whether to enable L1 knowledge retrieval, default true
- temperature: float, temperature parameter for randomness, default 0.01
- max_tokens: int, maximum tokens to generate, default 2000
"""
try:
# 1. Check server status
status = local_llm_service.get_server_status()
if not status.is_running:
error_response = APIResponse.error("LLama server is not running")
return local_llm_service.handle_stream_response(iter([{"error": error_response}]))
try:
# 2. Use chat service to handle request
response = chat_service.chat(
request=body,
stream=True,
json_response=False,
)
return local_llm_service.handle_stream_response(response)
except Exception as e:
logger.error(f"API call failed: {str(e)}", exc_info=True)
error_response = APIResponse.error(f"API call failed: {str(e)}")
return local_llm_service.handle_stream_response(iter([{"error": error_response}]))
except Exception as e:
logger.error(f"Request processing failed: {str(e)}", exc_info=True)
error_response = APIResponse.error(f"Request processing failed: {str(e)}")
return local_llm_service.handle_stream_response(iter([{"error": error_response}]))
@talk_bp.route("/chat_json", methods=["POST"])
@validate()
def chat_json(body: ChatRequest):
"""
Chat endpoint - JSON response (non-streaming)
Used for testing if the model supports JSON structure responses.
Request: ChatRequest JSON object, same as chat endpoint
Response:
JSON object containing:
- id: str, unique response identifier
- object: str, object type, usually "chat.completion"
- created: int, creation timestamp
- model: str, model name used
- system_fingerprint: str, system fingerprint
- choices: List[Dict], containing generated content, each choice contains:
- index: int, choice index
- message: Dict, containing generated content
- role: str, role (usually "assistant")
- content: str, generated text content
- function_call: Optional[Dict], if there's a function call
"""
try:
# 1. Check server status
status = local_llm_service.get_server_status()
if not status.is_running:
return jsonify(APIResponse.error("LLama server is not running"))
try:
# 2. Use chat service to handle request
response = chat_service.chat(
request=body,
stream=False,
json_response=True,
)
return jsonify(APIResponse.success(response))
except Exception as e:
logger.error(f"API call failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"API call failed: {str(e)}"))
except Exception as e:
logger.error(f"Request processing failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"Request processing failed: {str(e)}"))
@talk_bp.route("/advanced_chat", methods=["POST"])
@validate()
def advanced_chat(body: AdvancedChatRequest):
"""
Advanced chat endpoint - multi-phase processing
This endpoint implements a sophisticated chat process with multiple phases:
1. Requirement Enhancement - Enhances user's rough requirement with context
2. Expert Solution - Generates solution based on enhanced requirement
3. Validation and Refinement - Validates and improves solution iteratively
Request: AdvancedChatRequest JSON object containing:
- requirement: str, user's rough requirement
- max_iterations: int, maximum number of refinement iterations (default: 3)
- temperature: float, temperature for model generation (default: 0.01)
- enable_l0_retrieval: bool, whether to enable L0 knowledge retrieval (default: true)
- enable_l1_retrieval: bool, whether to enable L1 knowledge retrieval (default: true)
Response:
JSON object containing:
- enhanced_requirement: str, enhanced requirement with context
- solution: str, generated solution
- validation_history: List[ValidationResult], history of validation results
- final_format: Optional[str], final formatted solution if valid
"""
try:
# 1. Check server status
status = local_llm_service.get_server_status()
if not status.is_running:
return jsonify(APIResponse.error("LLama server is not running"))
try:
# 2. Process advanced chat request
response = advanced_chat_service.process_advanced_chat(body)
# process streaming responses
if isinstance(response.final_response, Iterator):
return local_llm_service.handle_stream_response(response.final_response)
# if not streaming, return the final response
return jsonify({
"enhanced_requirement": response.enhanced_requirement,
"solution": response.solution,
"validation_history": [v.dict() for v in response.validation_history],
"final_format": response.final_format,
"final_response": response.final_response
})
except Exception as e:
logger.error(f"Advanced chat processing failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"Advanced chat processing failed: {str(e)}"))
except Exception as e:
logger.error(f"Request processing failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"Request processing failed: {str(e)}"))
|