File size: 877 Bytes
a34068e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from datetime import datetime

from pydantic import BaseModel, Field

from app.utils.helpers import generate_id


class DocumentMetadata(BaseModel):
    source: str = ""
    doc_type: str = ""
    title: str | None = None
    created_date: datetime | None = None
    tags: list[str] = Field(default_factory=list)
    page_count: int | None = None


class Chunk(BaseModel):
    chunk_id: str = Field(default_factory=generate_id)
    document_id: str = ""
    text: str = ""
    metadata: DocumentMetadata = Field(default_factory=DocumentMetadata)
    chunk_index: int = 0
    start_char: int = 0
    end_char: int = 0


class Document(BaseModel):
    document_id: str = Field(default_factory=generate_id)
    filename: str = ""
    metadata: DocumentMetadata = Field(default_factory=DocumentMetadata)
    chunks: list[Chunk] = Field(default_factory=list)
    raw_text: str = ""