Spaces:
Sleeping
Sleeping
File size: 3,521 Bytes
409c17a e12568c 409c17a e12568c 409c17a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
"""
Infrastructure - Database Models (SQLAlchemy)
"""
import uuid
from datetime import datetime
from sqlalchemy import Column, DateTime, Integer, String, Text, BigInteger, Index
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class DocumentModel(Base):
"""Document table model"""
__tablename__ = "documents"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
title = Column(String(500), nullable=False, index=True)
filename = Column(String(255), nullable=False)
file_type = Column(String(50), nullable=False, index=True)
file_size = Column(BigInteger, nullable=False)
storage_path = Column(String(1000), nullable=False)
department = Column(String(100), nullable=False, index=True)
status = Column(String(50), nullable=False, default="pending", index=True)
upload_session_id = Column(String(100), nullable=True)
uploaded_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
indexed_at = Column(DateTime(timezone=True), nullable=True)
doc_metadata = Column(JSONB, default={}, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
updated_at = Column(
DateTime(timezone=True), default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False
)
__table_args__ = (
Index("ix_documents_department_status", "department", "status"),
Index("ix_documents_created_at", "created_at"),
)
class DocumentChunkModel(Base):
"""Document chunk table model"""
__tablename__ = "document_chunks"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
document_id = Column(UUID(as_uuid=True), nullable=False, index=True)
chunk_index = Column(Integer, nullable=False)
content = Column(Text, nullable=False)
token_count = Column(Integer, nullable=False)
vector_id = Column(String(100), nullable=True, index=True)
chunk_metadata = Column(JSONB, default={}, nullable=False)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False)
__table_args__ = (Index("ix_chunks_document_id_index", "document_id", "chunk_index"),)
class QueryModel(Base):
"""Query table model"""
__tablename__ = "queries"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
query_text = Column(Text, nullable=False)
department = Column(String(100), nullable=False, index=True)
user_id = Column(String(100), nullable=True, index=True)
session_id = Column(String(100), nullable=True, index=True)
status = Column(String(50), nullable=False, default="pending", index=True)
answer = Column(Text, nullable=True)
sources = Column(JSONB, default=[], nullable=False)
confidence = Column(Integer, default=0, nullable=False) # Store as int (0-100)
duration_ms = Column(Integer, default=0, nullable=False)
tokens_used = Column(Integer, default=0, nullable=False)
model = Column(String(100), nullable=True)
created_at = Column(DateTime(timezone=True), default=datetime.utcnow, nullable=False, index=True)
completed_at = Column(DateTime(timezone=True), nullable=True)
__table_args__ = (
Index("ix_queries_department_created", "department", "created_at"),
Index("ix_queries_user_created", "user_id", "created_at"),
)
|