| """ |
| Models for storing dark web content and mentions. |
| """ |
| from sqlalchemy import Column, String, Text, Integer, Float, DateTime, ForeignKey, Enum, Boolean |
| from sqlalchemy.orm import relationship |
| import enum |
| from datetime import datetime |
|
|
| from src.models.base import BaseModel |
|
|
| class ContentType(enum.Enum): |
| """Type of dark web content.""" |
| FORUM_POST = "Forum Post" |
| MARKETPLACE_LISTING = "Marketplace Listing" |
| BLOG_ARTICLE = "Blog Article" |
| CHAT_LOG = "Chat Log" |
| PASTE = "Paste" |
| DOCUMENT = "Document" |
| IMAGE = "Image" |
| VIDEO = "Video" |
| SOURCE_CODE = "Source Code" |
| OTHER = "Other" |
|
|
|
|
| class ContentStatus(enum.Enum): |
| """Status of dark web content.""" |
| NEW = "New" |
| ANALYZING = "Analyzing" |
| ANALYZED = "Analyzed" |
| RELEVANT = "Relevant" |
| IRRELEVANT = "Irrelevant" |
| ARCHIVED = "Archived" |
|
|
|
|
| class DarkWebContent(BaseModel): |
| """Model for storing dark web content.""" |
| __tablename__ = "dark_web_contents" |
| |
| |
| url = Column(String(1024), nullable=False) |
| domain = Column(String(255)) |
| |
| |
| title = Column(String(500)) |
| content = Column(Text, nullable=False) |
| content_type = Column(Enum(ContentType), default=ContentType.OTHER) |
| content_status = Column(Enum(ContentStatus), default=ContentStatus.NEW) |
| |
| |
| source_name = Column(String(255)) |
| source_type = Column(String(100)) |
| language = Column(String(10)) |
| scraped_at = Column(DateTime, default=datetime.utcnow) |
| |
| |
| relevance_score = Column(Float, default=0.0) |
| sentiment_score = Column(Float, default=0.0) |
| entity_data = Column(Text) |
| |
| |
| mentions = relationship("DarkWebMention", back_populates="content", cascade="all, delete-orphan") |
| search_results = relationship("SearchResult", back_populates="content") |
| |
| def __repr__(self): |
| return f"<DarkWebContent(id={self.id}, url={self.url}, content_type={self.content_type})>" |
|
|
|
|
| class DarkWebMention(BaseModel): |
| """Model for storing mentions of monitored entities in dark web content.""" |
| __tablename__ = "dark_web_mentions" |
| |
| |
| content_id = Column(Integer, ForeignKey("dark_web_contents.id"), nullable=False) |
| content = relationship("DarkWebContent", back_populates="mentions") |
| |
| |
| keyword = Column(String(100), nullable=False) |
| keyword_category = Column(String(50)) |
| |
| |
| context = Column(Text) |
| snippet = Column(Text) |
| |
| |
| mention_type = Column(String(50)) |
| confidence = Column(Float, default=0.0) |
| is_verified = Column(Boolean, default=False) |
| |
| |
| alerts = relationship("Alert", back_populates="mention", cascade="all, delete-orphan") |
| |
| def __repr__(self): |
| return f"<DarkWebMention(id={self.id}, keyword={self.keyword}, content_id={self.content_id})>" |