Spaces:
Sleeping
Sleeping
| """ | |
| ============================================ | |
| Database Models (Tables) | |
| - Novel: Stores novel metadata & scraping status | |
| - Chapter: Stores individual chapter content (English + Hindi) | |
| ============================================ | |
| """ | |
| import enum | |
| from datetime import datetime, timezone | |
| from sqlalchemy import ( | |
| Column, | |
| Integer, | |
| String, | |
| Text, | |
| DateTime, | |
| ForeignKey, | |
| Enum as SQLEnum, | |
| Boolean, | |
| Float, | |
| Index, | |
| UniqueConstraint, | |
| ) | |
| from sqlalchemy.orm import relationship | |
| from app.database.connection import Base | |
| class NovelStatus(str, enum.Enum): | |
| QUEUED = "queued" | |
| LOGGING_IN = "logging_in" | |
| SCRAPING = "scraping" | |
| PAUSED_CAPTCHA = "paused_captcha" | |
| PAUSED_ERROR = "paused_error" | |
| COMPLETED = "completed" | |
| FAILED = "failed" | |
| class Novel(Base): | |
| __tablename__ = "novels" | |
| id = Column(Integer, primary_key=True, autoincrement=True) | |
| title = Column(String(500), nullable=False, default="Unknown Novel") | |
| title_hindi = Column(String(500), nullable=True) # Hindi translated title | |
| url = Column(String(2000), nullable=False) | |
| current_url = Column(String(2000), nullable=True) | |
| login_email = Column(String(500), nullable=True) | |
| login_password = Column(String(500), nullable=True) | |
| next_button_selector = Column( | |
| String(500), nullable=False, | |
| default="a.next_page, a[rel='next'], .next-chap, button.next-chapter" | |
| ) | |
| content_selector = Column( | |
| String(500), nullable=False, | |
| default=".chapter-content, .reading-content, #chapter-content, .text-left" | |
| ) | |
| status = Column(SQLEnum(NovelStatus), nullable=False, default=NovelStatus.QUEUED) | |
| chapters_scraped = Column(Integer, default=0) | |
| last_error = Column(Text, nullable=True) | |
| screenshot_path = Column(String(1000), nullable=True) | |
| needs_intervention = Column(Boolean, default=False) | |
| created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) | |
| updated_at = Column( | |
| DateTime(timezone=True), | |
| default=lambda: datetime.now(timezone.utc), | |
| onupdate=lambda: datetime.now(timezone.utc) | |
| ) | |
| chapters = relationship( | |
| "Chapter", back_populates="novel", | |
| cascade="all, delete-orphan", | |
| order_by="Chapter.chapter_number", | |
| lazy="selectin", | |
| ) | |
| def __repr__(self): | |
| return f"<Novel(id={self.id}, title='{self.title}', status={self.status})>" | |
| class Chapter(Base): | |
| __tablename__ = "chapters" | |
| id = Column(Integer, primary_key=True, autoincrement=True) | |
| novel_id = Column(Integer, ForeignKey("novels.id", ondelete="CASCADE"), nullable=False, index=True) | |
| chapter_number = Column(Integer, nullable=False) | |
| title = Column(String(1000), nullable=True, default="") | |
| title_hindi = Column(String(1000), nullable=True) # ✅ Hindi title | |
| content = Column(Text, nullable=False) # English content | |
| content_hindi = Column(Text, nullable=True) # ✅ Hindi content | |
| url = Column(String(2000), nullable=True) | |
| word_count = Column(Integer, default=0) | |
| scraped_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)) | |
| novel = relationship("Novel", back_populates="chapters") | |
| __table_args__ = ( | |
| UniqueConstraint("novel_id", "chapter_number", name="uq_novel_chapter"), | |
| Index("ix_chapter_novel_number", "novel_id", "chapter_number"), | |
| ) | |
| def __repr__(self): | |
| return f"<Chapter(id={self.id}, novel_id={self.novel_id}, ch={self.chapter_number})>" | |