import uuid from django.db import models from django.utils import timezone from django.contrib.auth.models import AbstractUser class User(AbstractUser): """ Minimal User model to match the authentication_api User model. Uses UUID as primary key to resolve simplejwt ID type errors. """ id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) email = models.EmailField(unique=True, max_length=255) username = None # REMOVE since it's not in the DB USERNAME_FIELD = 'email' REQUIRED_FIELDS = [] class Meta: db_table = 'core_user' managed = False # This project does not manage the common User table class Page(models.Model): """ Model representing a page (URL) that has been indexed. """ url = models.TextField(unique=True, db_index=True) tenant_id = models.TextField(db_index=True) content_hash = models.TextField() is_active = models.BooleanField(default=True, db_index=True) last_indexed = models.DateTimeField(default=timezone.now) class Meta: db_table = 'pages' indexes = [ models.Index(fields=['tenant_id', 'is_active']), models.Index(fields=['url']), ] def __str__(self): return f"{self.url} ({self.tenant_id})" class Document(models.Model): """ Model representing a document chunk with its embedding. Note: The embedding field uses PostgreSQL's vector type (768 dimensions). This requires the pgvector extension to be installed. """ content = models.TextField() source = models.TextField() page_url = models.TextField(db_index=True) # embedding is stored as a vector(768) in PostgreSQL # We'll use a TextField to store it as JSON, or use raw SQL for vector operations embedding = models.TextField(help_text="Vector embedding stored as JSON array") hash = models.TextField(unique=True, db_index=True) class Meta: db_table = 'documents' indexes = [ models.Index(fields=['page_url']), models.Index(fields=['hash']), ] def __str__(self): return f"Document {self.id} from {self.source}"