Spaces:
Runtime error
Runtime error
| import json | |
| from pathlib import Path | |
| from pydantic import BaseModel, Field, create_model, ConfigDict | |
| from typing import List, Optional, Dict, Union | |
| from time import time | |
| class QueryExpansionRequest(BaseModel): | |
| query: str | |
| system_message: Optional[str] = None | |
| # Load the template to create the response model | |
| template_path = Path(__file__).parent / "prompt_templates" / "query_expansion.json" | |
| with open(template_path) as f: | |
| template = json.load(f) | |
| # Create model configuration with proper typing | |
| model_config = ConfigDict( | |
| json_schema_extra={ | |
| 'example': template['example_response'] | |
| } | |
| ) | |
| # Create the response model based on the template's schema | |
| QueryExpansionResponse = create_model( | |
| 'QueryExpansionResponse', | |
| original_query=(str, ...), | |
| expanded_query=(str, ...), | |
| search_terms=(List[str], ...), | |
| call_rag=(bool, ...), | |
| model_config=model_config | |
| ) | |
| class ChatMessage(BaseModel): | |
| role: str | |
| content: str | |
| class ChatCompletionRequest(BaseModel): | |
| model: str | |
| messages: List[ChatMessage] | |
| stream: bool = False | |
| class ChatCompletionMessage(BaseModel): | |
| role: str = "assistant" | |
| content: str | |
| class ChatCompletionChoice(BaseModel): | |
| index: int = 0 | |
| message: ChatCompletionMessage | |
| logprobs: Optional[None] = None | |
| finish_reason: str = "stop" | |
| class CompletionTokenDetails(BaseModel): | |
| reasoning_tokens: int = 0 | |
| accepted_prediction_tokens: int = 0 | |
| rejected_prediction_tokens: int = 0 | |
| class CompletionUsage(BaseModel): | |
| prompt_tokens: int = 9 # Placeholder values | |
| completion_tokens: int = 12 | |
| total_tokens: int = 21 | |
| completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails) | |
| class ChatCompletionResponse(BaseModel): | |
| id: str = Field(default="chatcmpl-123") | |
| object: str = "chat.completion" | |
| created: int = Field(default_factory=lambda: int(time())) | |
| model: str = "gpt-4o-mini" | |
| system_fingerprint: str = "fp_44709d6fcb" | |
| choices: List[ChatCompletionChoice] | |
| usage: CompletionUsage = Field(default_factory=CompletionUsage) | |
| def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse": | |
| """Create a ChatCompletionResponse from a simple response string""" | |
| return cls( | |
| model=model, | |
| choices=[ | |
| ChatCompletionChoice( | |
| message=ChatCompletionMessage(content=content) | |
| ) | |
| ] | |
| ) | |
| class GenerateRequest(BaseModel): | |
| prompt: str | |
| system_message: Optional[str] = None | |
| max_new_tokens: Optional[int] = None | |
| class EmbeddingRequest(BaseModel): | |
| text: str | |
| class EmbeddingResponse(BaseModel): | |
| embedding: List[float] | |
| dimension: int | |
| class ModelStatus(BaseModel): | |
| is_loaded: bool | |
| current_model: Optional[str] | |
| has_chat_template: Optional[bool] = None | |
| class EmbeddingModelStatus(BaseModel): | |
| is_loaded: bool | |
| current_model: Optional[str] | |
| class ModelStatusInfo(BaseModel): | |
| generation_model: ModelStatus | |
| embedding_model: EmbeddingModelStatus | |
| class SystemStatusResponse(BaseModel): | |
| """Pydantic model for system status response""" | |
| cpu: Optional[Dict[str, Union[float, str]]] = None | |
| memory: Optional[Dict[str, Union[float, str]]] = None | |
| gpu: Optional[Dict[str, Union[bool, str, float]]] = None | |
| storage: Optional[Dict[str, str]] = None | |
| model: ModelStatusInfo | |
| class ValidationResponse(BaseModel): | |
| config_validation: Dict[str, bool] | |
| model_validation: Dict[str, bool] | |
| folder_validation: Dict[str, bool] | |
| overall_status: str | |
| issues: List[str] | |
| class ChunkRerankRequest(BaseModel): | |
| query: str | |
| chunks: List[str] | |
| system_message: Optional[str] = None | |
| # Load example from template | |
| template_path = Path(__file__).parent / "prompt_templates" / "chunk_rerank.json" | |
| with open(template_path) as f: | |
| template = json.load(f) | |
| example = template['example_response'] | |
| class ChunkRerankResponse(BaseModel): | |
| """Response model for chunk reranking, based on template schema""" | |
| original_query: str = Field(..., description="The exact query being processed") | |
| ranked_chunks: List[str] = Field(..., description="Top 5 most relevant chunks in order of importance", max_items=5) | |
| got_chunks: bool = Field(..., description="Whether any relevant chunks were found") | |
| class Config: | |
| json_schema_extra = { | |
| "example": example | |
| } |