File size: 1,705 Bytes
e885bfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""Data models for document processing"""

from pydantic import BaseModel, Field
from typing import Optional, List


class DocumentChunk(BaseModel):
    """Represents a single document chunk with metadata"""
    
    chunk_id: str = Field(..., description="Unique identifier for the chunk")
    content: str = Field(..., description="The actual text content")
    source_doc: str = Field(..., description="Original document source/filename")
    chunk_index: int = Field(..., description="Index of this chunk in the document")
    start_char: int = Field(..., description="Starting character position in original document")
    end_char: int = Field(..., description="Ending character position in original document")
    token_count: int = Field(..., description="Number of tokens in this chunk")
    metadata: dict = Field(default_factory=dict, description="Additional metadata")
    embedding: Optional[List[float]] = Field(None, description="Vector embedding (optional)")


class Document(BaseModel):
    """Represents a source document"""
    
    doc_id: str = Field(..., description="Unique document identifier")
    filename: str = Field(..., description="Source filename")
    content: str = Field(..., description="Full document content")
    doc_type: str = Field(default="product_manual", description="Type of document")
    metadata: dict = Field(default_factory=dict, description="Document-level metadata")


class RetrievalResult(BaseModel):
    """Result from retrieval"""
    
    chunk_id: str
    content: str
    source_doc: str
    score: float = Field(..., description="Relevance score (0-1)")
    search_type: str = Field(..., description="Type of search that returned this result")