Spaces:
Sleeping
Sleeping
Gemini
fix: Correct /api/talk routing\n\n- Removed url_prefix from talk_bp Blueprint definition.\n- Changed route decorator to @talk_bp.route("/api/talk", methods=["POST"]) to correctly map the chat endpoint.
3cfc0a2
| """ | |
| Advanced Talk API Routes | |
| This module provides enhanced chat capabilities with features like: | |
| - Multi-turn conversations | |
| - System prompt management | |
| - Conversation history tracking | |
| - Streaming response | |
| - Advanced mode with multi-phase processing | |
| """ | |
| import json | |
| import logging | |
| from datetime import datetime | |
| from typing import Iterator, Any | |
| from flask import Blueprint, request, Response, jsonify | |
| from flask_pydantic import validate | |
| from lpm_kernel.api.common.responses import APIResponse | |
| from lpm_kernel.api.services.local_llm_service import local_llm_service | |
| from lpm_kernel.api.domains.kernel2.dto.chat_dto import ChatRequest | |
| from lpm_kernel.api.domains.kernel2.dto.advanced_chat_dto import AdvancedChatRequest | |
| from lpm_kernel.api.domains.kernel2.services.message_builder import MultiTurnMessageBuilder | |
| from lpm_kernel.api.domains.kernel2.services.prompt_builder import ( | |
| BasePromptStrategy, | |
| RoleBasedStrategy, | |
| KnowledgeEnhancedStrategy, | |
| ) | |
| from lpm_kernel.api.domains.kernel2.services.chat_service import chat_service | |
| from lpm_kernel.api.domains.kernel2.services.advanced_chat_service import advanced_chat_service | |
| logger = logging.getLogger(__name__) | |
| talk_bp = Blueprint("talk", __name__) | |
| def chat(body: ChatRequest): | |
| """ | |
| Chat endpoint - streaming response | |
| Request: ChatRequest JSON object containing: | |
| - message: str, current user message | |
| - system_prompt: str, optional system prompt, default is "You are a helpful assistant." | |
| - role_id: str, optional role UUID, if provided will use the role's system_prompt | |
| - history: List[ChatMessage], message history | |
| - enable_l0_retrieval: bool, whether to enable L0 knowledge retrieval, default true | |
| - enable_l1_retrieval: bool, whether to enable L1 knowledge retrieval, default true | |
| - temperature: float, temperature parameter for randomness, default 0.01 | |
| - max_tokens: int, maximum tokens to generate, default 2000 | |
| """ | |
| try: | |
| # 1. Check server status | |
| status = local_llm_service.get_server_status() | |
| if not status.is_running: | |
| error_response = APIResponse.error("LLama server is not running") | |
| return local_llm_service.handle_stream_response(iter([{"error": error_response}])) | |
| try: | |
| # 2. Use chat service to handle request | |
| response = chat_service.chat( | |
| request=body, | |
| stream=True, | |
| json_response=False, | |
| ) | |
| return local_llm_service.handle_stream_response(response) | |
| except Exception as e: | |
| logger.error(f"API call failed: {str(e)}", exc_info=True) | |
| error_response = APIResponse.error(f"API call failed: {str(e)}") | |
| return local_llm_service.handle_stream_response(iter([{"error": error_response}])) | |
| except Exception as e: | |
| logger.error(f"Request processing failed: {str(e)}", exc_info=True) | |
| error_response = APIResponse.error(f"Request processing failed: {str(e)}") | |
| return local_llm_service.handle_stream_response(iter([{"error": error_response}])) | |
| def chat_json(body: ChatRequest): | |
| """ | |
| Chat endpoint - JSON response (non-streaming) | |
| Used for testing if the model supports JSON structure responses. | |
| Request: ChatRequest JSON object, same as chat endpoint | |
| Response: | |
| JSON object containing: | |
| - id: str, unique response identifier | |
| - object: str, object type, usually "chat.completion" | |
| - created: int, creation timestamp | |
| - model: str, model name used | |
| - system_fingerprint: str, system fingerprint | |
| - choices: List[Dict], containing generated content, each choice contains: | |
| - index: int, choice index | |
| - message: Dict, containing generated content | |
| - role: str, role (usually "assistant") | |
| - content: str, generated text content | |
| - function_call: Optional[Dict], if there's a function call | |
| """ | |
| try: | |
| # 1. Check server status | |
| status = local_llm_service.get_server_status() | |
| if not status.is_running: | |
| return jsonify(APIResponse.error("LLama server is not running")) | |
| try: | |
| # 2. Use chat service to handle request | |
| response = chat_service.chat( | |
| request=body, | |
| stream=False, | |
| json_response=True, | |
| ) | |
| return jsonify(APIResponse.success(response)) | |
| except Exception as e: | |
| logger.error(f"API call failed: {str(e)}", exc_info=True) | |
| return jsonify(APIResponse.error(f"API call failed: {str(e)}")) | |
| except Exception as e: | |
| logger.error(f"Request processing failed: {str(e)}", exc_info=True) | |
| return jsonify(APIResponse.error(f"Request processing failed: {str(e)}")) | |
| def advanced_chat(body: AdvancedChatRequest): | |
| """ | |
| Advanced chat endpoint - multi-phase processing | |
| This endpoint implements a sophisticated chat process with multiple phases: | |
| 1. Requirement Enhancement - Enhances user's rough requirement with context | |
| 2. Expert Solution - Generates solution based on enhanced requirement | |
| 3. Validation and Refinement - Validates and improves solution iteratively | |
| Request: AdvancedChatRequest JSON object containing: | |
| - requirement: str, user's rough requirement | |
| - max_iterations: int, maximum number of refinement iterations (default: 3) | |
| - temperature: float, temperature for model generation (default: 0.01) | |
| - enable_l0_retrieval: bool, whether to enable L0 knowledge retrieval (default: true) | |
| - enable_l1_retrieval: bool, whether to enable L1 knowledge retrieval (default: true) | |
| Response: | |
| JSON object containing: | |
| - enhanced_requirement: str, enhanced requirement with context | |
| - solution: str, generated solution | |
| - validation_history: List[ValidationResult], history of validation results | |
| - final_format: Optional[str], final formatted solution if valid | |
| """ | |
| try: | |
| # 1. Check server status | |
| status = local_llm_service.get_server_status() | |
| if not status.is_running: | |
| return jsonify(APIResponse.error("LLama server is not running")) | |
| try: | |
| # 2. Process advanced chat request | |
| response = advanced_chat_service.process_advanced_chat(body) | |
| # process streaming responses | |
| if isinstance(response.final_response, Iterator): | |
| return local_llm_service.handle_stream_response(response.final_response) | |
| # if not streaming, return the final response | |
| return jsonify({ | |
| "enhanced_requirement": response.enhanced_requirement, | |
| "solution": response.solution, | |
| "validation_history": [v.dict() for v in response.validation_history], | |
| "final_format": response.final_format, | |
| "final_response": response.final_response | |
| }) | |
| except Exception as e: | |
| logger.error(f"Advanced chat processing failed: {str(e)}", exc_info=True) | |
| return jsonify(APIResponse.error(f"Advanced chat processing failed: {str(e)}")) | |
| except Exception as e: | |
| logger.error(f"Request processing failed: {str(e)}", exc_info=True) | |
| return jsonify(APIResponse.error(f"Request processing failed: {str(e)}")) | |