Spaces:

vivek0211
/

demo-app

Sleeping

demo-app / src /rag /document_processing /models.py

Vivek kumar

RAG based QNA

e885bfa about 1 month ago

1.71 kB

	"""Data models for document processing"""

	from pydantic import BaseModel, Field
	from typing import Optional, List


	class DocumentChunk(BaseModel):
	"""Represents a single document chunk with metadata"""

	chunk_id: str = Field(..., description="Unique identifier for the chunk")
	content: str = Field(..., description="The actual text content")
	source_doc: str = Field(..., description="Original document source/filename")
	chunk_index: int = Field(..., description="Index of this chunk in the document")
	start_char: int = Field(..., description="Starting character position in original document")
	end_char: int = Field(..., description="Ending character position in original document")
	token_count: int = Field(..., description="Number of tokens in this chunk")
	metadata: dict = Field(default_factory=dict, description="Additional metadata")
	embedding: Optional[List[float]] = Field(None, description="Vector embedding (optional)")


	class Document(BaseModel):
	"""Represents a source document"""

	doc_id: str = Field(..., description="Unique document identifier")
	filename: str = Field(..., description="Source filename")
	content: str = Field(..., description="Full document content")
	doc_type: str = Field(default="product_manual", description="Type of document")
	metadata: dict = Field(default_factory=dict, description="Document-level metadata")


	class RetrievalResult(BaseModel):
	"""Result from retrieval"""

	chunk_id: str
	content: str
	source_doc: str
	score: float = Field(..., description="Relevance score (0-1)")
	search_type: str = Field(..., description="Type of search that returned this result")