Gemini
fix: Correct /api/talk routing\n\n- Removed url_prefix from talk_bp Blueprint definition.\n- Changed route decorator to @talk_bp.route("/api/talk", methods=["POST"]) to correctly map the chat endpoint.
3cfc0a2
"""
Advanced Talk API Routes
This module provides enhanced chat capabilities with features like:
- Multi-turn conversations
- System prompt management
- Conversation history tracking
- Streaming response
- Advanced mode with multi-phase processing
"""
import json
import logging
from datetime import datetime
from typing import Iterator, Any
from flask import Blueprint, request, Response, jsonify
from flask_pydantic import validate
from lpm_kernel.api.common.responses import APIResponse
from lpm_kernel.api.services.local_llm_service import local_llm_service
from lpm_kernel.api.domains.kernel2.dto.chat_dto import ChatRequest
from lpm_kernel.api.domains.kernel2.dto.advanced_chat_dto import AdvancedChatRequest
from lpm_kernel.api.domains.kernel2.services.message_builder import MultiTurnMessageBuilder
from lpm_kernel.api.domains.kernel2.services.prompt_builder import (
BasePromptStrategy,
RoleBasedStrategy,
KnowledgeEnhancedStrategy,
)
from lpm_kernel.api.domains.kernel2.services.chat_service import chat_service
from lpm_kernel.api.domains.kernel2.services.advanced_chat_service import advanced_chat_service
logger = logging.getLogger(__name__)
talk_bp = Blueprint("talk", __name__)
@talk_bp.route("/api/talk", methods=["POST"])
@validate()
def chat(body: ChatRequest):
"""
Chat endpoint - streaming response
Request: ChatRequest JSON object containing:
- message: str, current user message
- system_prompt: str, optional system prompt, default is "You are a helpful assistant."
- role_id: str, optional role UUID, if provided will use the role's system_prompt
- history: List[ChatMessage], message history
- enable_l0_retrieval: bool, whether to enable L0 knowledge retrieval, default true
- enable_l1_retrieval: bool, whether to enable L1 knowledge retrieval, default true
- temperature: float, temperature parameter for randomness, default 0.01
- max_tokens: int, maximum tokens to generate, default 2000
"""
try:
# 1. Check server status
status = local_llm_service.get_server_status()
if not status.is_running:
error_response = APIResponse.error("LLama server is not running")
return local_llm_service.handle_stream_response(iter([{"error": error_response}]))
try:
# 2. Use chat service to handle request
response = chat_service.chat(
request=body,
stream=True,
json_response=False,
)
return local_llm_service.handle_stream_response(response)
except Exception as e:
logger.error(f"API call failed: {str(e)}", exc_info=True)
error_response = APIResponse.error(f"API call failed: {str(e)}")
return local_llm_service.handle_stream_response(iter([{"error": error_response}]))
except Exception as e:
logger.error(f"Request processing failed: {str(e)}", exc_info=True)
error_response = APIResponse.error(f"Request processing failed: {str(e)}")
return local_llm_service.handle_stream_response(iter([{"error": error_response}]))
@talk_bp.route("/chat_json", methods=["POST"])
@validate()
def chat_json(body: ChatRequest):
"""
Chat endpoint - JSON response (non-streaming)
Used for testing if the model supports JSON structure responses.
Request: ChatRequest JSON object, same as chat endpoint
Response:
JSON object containing:
- id: str, unique response identifier
- object: str, object type, usually "chat.completion"
- created: int, creation timestamp
- model: str, model name used
- system_fingerprint: str, system fingerprint
- choices: List[Dict], containing generated content, each choice contains:
- index: int, choice index
- message: Dict, containing generated content
- role: str, role (usually "assistant")
- content: str, generated text content
- function_call: Optional[Dict], if there's a function call
"""
try:
# 1. Check server status
status = local_llm_service.get_server_status()
if not status.is_running:
return jsonify(APIResponse.error("LLama server is not running"))
try:
# 2. Use chat service to handle request
response = chat_service.chat(
request=body,
stream=False,
json_response=True,
)
return jsonify(APIResponse.success(response))
except Exception as e:
logger.error(f"API call failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"API call failed: {str(e)}"))
except Exception as e:
logger.error(f"Request processing failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"Request processing failed: {str(e)}"))
@talk_bp.route("/advanced_chat", methods=["POST"])
@validate()
def advanced_chat(body: AdvancedChatRequest):
"""
Advanced chat endpoint - multi-phase processing
This endpoint implements a sophisticated chat process with multiple phases:
1. Requirement Enhancement - Enhances user's rough requirement with context
2. Expert Solution - Generates solution based on enhanced requirement
3. Validation and Refinement - Validates and improves solution iteratively
Request: AdvancedChatRequest JSON object containing:
- requirement: str, user's rough requirement
- max_iterations: int, maximum number of refinement iterations (default: 3)
- temperature: float, temperature for model generation (default: 0.01)
- enable_l0_retrieval: bool, whether to enable L0 knowledge retrieval (default: true)
- enable_l1_retrieval: bool, whether to enable L1 knowledge retrieval (default: true)
Response:
JSON object containing:
- enhanced_requirement: str, enhanced requirement with context
- solution: str, generated solution
- validation_history: List[ValidationResult], history of validation results
- final_format: Optional[str], final formatted solution if valid
"""
try:
# 1. Check server status
status = local_llm_service.get_server_status()
if not status.is_running:
return jsonify(APIResponse.error("LLama server is not running"))
try:
# 2. Process advanced chat request
response = advanced_chat_service.process_advanced_chat(body)
# process streaming responses
if isinstance(response.final_response, Iterator):
return local_llm_service.handle_stream_response(response.final_response)
# if not streaming, return the final response
return jsonify({
"enhanced_requirement": response.enhanced_requirement,
"solution": response.solution,
"validation_history": [v.dict() for v in response.validation_history],
"final_format": response.final_format,
"final_response": response.final_response
})
except Exception as e:
logger.error(f"Advanced chat processing failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"Advanced chat processing failed: {str(e)}"))
except Exception as e:
logger.error(f"Request processing failed: {str(e)}", exc_info=True)
return jsonify(APIResponse.error(f"Request processing failed: {str(e)}"))