Ruhivig65's picture
Upload 4 files
e29427f verified
"""
============================================
Database Models (Tables)
- Novel: Stores novel metadata & scraping status
- Chapter: Stores individual chapter content (English + Hindi)
============================================
"""
import enum
from datetime import datetime, timezone
from sqlalchemy import (
Column,
Integer,
String,
Text,
DateTime,
ForeignKey,
Enum as SQLEnum,
Boolean,
Float,
Index,
UniqueConstraint,
)
from sqlalchemy.orm import relationship
from app.database.connection import Base
class NovelStatus(str, enum.Enum):
QUEUED = "queued"
LOGGING_IN = "logging_in"
SCRAPING = "scraping"
PAUSED_CAPTCHA = "paused_captcha"
PAUSED_ERROR = "paused_error"
COMPLETED = "completed"
FAILED = "failed"
class Novel(Base):
__tablename__ = "novels"
id = Column(Integer, primary_key=True, autoincrement=True)
title = Column(String(500), nullable=False, default="Unknown Novel")
title_hindi = Column(String(500), nullable=True) # Hindi translated title
url = Column(String(2000), nullable=False)
current_url = Column(String(2000), nullable=True)
login_email = Column(String(500), nullable=True)
login_password = Column(String(500), nullable=True)
next_button_selector = Column(
String(500), nullable=False,
default="a.next_page, a[rel='next'], .next-chap, button.next-chapter"
)
content_selector = Column(
String(500), nullable=False,
default=".chapter-content, .reading-content, #chapter-content, .text-left"
)
status = Column(SQLEnum(NovelStatus), nullable=False, default=NovelStatus.QUEUED)
chapters_scraped = Column(Integer, default=0)
last_error = Column(Text, nullable=True)
screenshot_path = Column(String(1000), nullable=True)
needs_intervention = Column(Boolean, default=False)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(
DateTime(timezone=True),
default=lambda: datetime.now(timezone.utc),
onupdate=lambda: datetime.now(timezone.utc)
)
chapters = relationship(
"Chapter", back_populates="novel",
cascade="all, delete-orphan",
order_by="Chapter.chapter_number",
lazy="selectin",
)
def __repr__(self):
return f"<Novel(id={self.id}, title='{self.title}', status={self.status})>"
class Chapter(Base):
__tablename__ = "chapters"
id = Column(Integer, primary_key=True, autoincrement=True)
novel_id = Column(Integer, ForeignKey("novels.id", ondelete="CASCADE"), nullable=False, index=True)
chapter_number = Column(Integer, nullable=False)
title = Column(String(1000), nullable=True, default="")
title_hindi = Column(String(1000), nullable=True) # ✅ Hindi title
content = Column(Text, nullable=False) # English content
content_hindi = Column(Text, nullable=True) # ✅ Hindi content
url = Column(String(2000), nullable=True)
word_count = Column(Integer, default=0)
scraped_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
novel = relationship("Novel", back_populates="chapters")
__table_args__ = (
UniqueConstraint("novel_id", "chapter_number", name="uq_novel_chapter"),
Index("ix_chapter_novel_number", "novel_id", "chapter_number"),
)
def __repr__(self):
return f"<Chapter(id={self.id}, novel_id={self.novel_id}, ch={self.chapter_number})>"