Ragcore / app /models /document.py
NinjainPJs's picture
Initial deploy: RagCore RAG system with hybrid search and Gradio UI
a34068e
raw
history blame contribute delete
877 Bytes
from datetime import datetime
from pydantic import BaseModel, Field
from app.utils.helpers import generate_id
class DocumentMetadata(BaseModel):
source: str = ""
doc_type: str = ""
title: str | None = None
created_date: datetime | None = None
tags: list[str] = Field(default_factory=list)
page_count: int | None = None
class Chunk(BaseModel):
chunk_id: str = Field(default_factory=generate_id)
document_id: str = ""
text: str = ""
metadata: DocumentMetadata = Field(default_factory=DocumentMetadata)
chunk_index: int = 0
start_char: int = 0
end_char: int = 0
class Document(BaseModel):
document_id: str = Field(default_factory=generate_id)
filename: str = ""
metadata: DocumentMetadata = Field(default_factory=DocumentMetadata)
chunks: list[Chunk] = Field(default_factory=list)
raw_text: str = ""