File size: 2,183 Bytes
01d5a5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from sqlalchemy import (
    BigInteger,
    Column,
    DateTime,
    ForeignKey,
    JSON,
    String,
    Text,
    Integer,
    Boolean,
    Enum as SQLAlchemyEnum,
)
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime
from .process_status import ProcessStatus
from .dto.chunk_dto import ChunkDTO


Base = declarative_base()


class ChunkModel(Base):
    __tablename__ = "chunk"

    id = Column(BigInteger, primary_key=True)
    document_id = Column(BigInteger, ForeignKey("document.id"), nullable=False)
    content = Column(Text, nullable=False)
    has_embedding = Column(Boolean, default=False)
    tags = Column(JSON)
    topic = Column(String(255))
    create_time = Column(DateTime, default=datetime.utcnow)

    document = relationship("DocumentModel", back_populates="chunks")

    def to_dto(self) -> ChunkDTO:
        return ChunkDTO(
            id=self.id,
            document_id=self.document_id,
            content=self.content,
            has_embedding=self.has_embedding,  # ensure this field is correctly converted
            tags=self.tags,
            topic=self.topic,
            length=len(self.content) if self.content else 0,
        )


class DocumentModel(Base):
    __tablename__ = "document"

    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String(255), nullable=False)
    title = Column(String(255))
    mime_type = Column(String(100))
    user_description = Column(Text)
    url = Column(String(1024))
    document_size = Column(Integer, default=0)
    raw_content = Column(Text)
    insight = Column(JSON)
    summary = Column(JSON)
    keywords = Column(JSON)
    extract_status = Column(
        SQLAlchemyEnum(ProcessStatus), default=ProcessStatus.INITIALIZED
    )
    embedding_status = Column(
        SQLAlchemyEnum(ProcessStatus), default=ProcessStatus.INITIALIZED
    )
    create_time = Column(DateTime, default=datetime.now)
    update_time = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

    chunks = relationship(
        "ChunkModel", back_populates="document", cascade="all, delete-orphan"
    )