| """ |
| ============================================ |
| Database Models (Tables) |
| - Novel: Stores novel metadata & scraping status |
| - Chapter: Stores individual chapter content (English + Hindi) |
| ============================================ |
| """ |
|
|
| import enum |
| from datetime import datetime, timezone |
| from sqlalchemy import ( |
| Column, |
| Integer, |
| String, |
| Text, |
| DateTime, |
| ForeignKey, |
| Enum as SQLEnum, |
| Boolean, |
| Float, |
| Index, |
| UniqueConstraint, |
| ) |
| from sqlalchemy.orm import relationship |
| from app.database.connection import Base |
|
|
|
|
| class NovelStatus(str, enum.Enum): |
| QUEUED = "queued" |
| LOGGING_IN = "logging_in" |
| SCRAPING = "scraping" |
| PAUSED_CAPTCHA = "paused_captcha" |
| PAUSED_ERROR = "paused_error" |
| COMPLETED = "completed" |
| FAILED = "failed" |
|
|
|
|
| class Novel(Base): |
| __tablename__ = "novels" |
|
|
| id = Column(Integer, primary_key=True, autoincrement=True) |
| title = Column(String(500), nullable=False, default="Unknown Novel") |
| title_hindi = Column(String(500), nullable=True) |
| url = Column(String(2000), nullable=False) |
| current_url = Column(String(2000), nullable=True) |
| login_email = Column(String(500), nullable=True) |
| login_password = Column(String(500), nullable=True) |
| next_button_selector = Column( |
| String(500), nullable=False, |
| default="a.next_page, a[rel='next'], .next-chap, button.next-chapter" |
| ) |
| content_selector = Column( |
| String(500), nullable=False, |
| default=".chapter-content, .reading-content, #chapter-content, .text-left" |
| ) |
| status = Column(SQLEnum(NovelStatus), nullable=False, default=NovelStatus.QUEUED) |
| chapters_scraped = Column(Integer, default=0) |
| last_error = Column(Text, nullable=True) |
| screenshot_path = Column(String(1000), nullable=True) |
| needs_intervention = Column(Boolean, default=False) |
| created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) |
| updated_at = Column( |
| DateTime(timezone=True), |
| default=lambda: datetime.now(timezone.utc), |
| onupdate=lambda: datetime.now(timezone.utc) |
| ) |
|
|
| chapters = relationship( |
| "Chapter", back_populates="novel", |
| cascade="all, delete-orphan", |
| order_by="Chapter.chapter_number", |
| lazy="selectin", |
| ) |
|
|
| def __repr__(self): |
| return f"<Novel(id={self.id}, title='{self.title}', status={self.status})>" |
|
|
|
|
| class Chapter(Base): |
| __tablename__ = "chapters" |
|
|
| id = Column(Integer, primary_key=True, autoincrement=True) |
| novel_id = Column(Integer, ForeignKey("novels.id", ondelete="CASCADE"), nullable=False, index=True) |
| chapter_number = Column(Integer, nullable=False) |
| title = Column(String(1000), nullable=True, default="") |
| title_hindi = Column(String(1000), nullable=True) |
| content = Column(Text, nullable=False) |
| content_hindi = Column(Text, nullable=True) |
| url = Column(String(2000), nullable=True) |
| word_count = Column(Integer, default=0) |
| scraped_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) |
|
|
| novel = relationship("Novel", back_populates="chapters") |
|
|
| __table_args__ = ( |
| UniqueConstraint("novel_id", "chapter_number", name="uq_novel_chapter"), |
| Index("ix_chapter_novel_number", "novel_id", "chapter_number"), |
| ) |
|
|
| def __repr__(self): |
| return f"<Chapter(id={self.id}, novel_id={self.novel_id}, ch={self.chapter_number})>" |
|
|