Spaces:
Running
Running
File size: 2,270 Bytes
4847e7d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import uuid
from django.db import models
from django.utils import timezone
from django.contrib.auth.models import AbstractUser
class User(AbstractUser):
"""
Minimal User model to match the authentication_api User model.
Uses UUID as primary key to resolve simplejwt ID type errors.
"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
email = models.EmailField(unique=True, max_length=255)
username = None # REMOVE since it's not in the DB
USERNAME_FIELD = 'email'
REQUIRED_FIELDS = []
class Meta:
db_table = 'core_user'
managed = False # This project does not manage the common User table
class Page(models.Model):
"""
Model representing a page (URL) that has been indexed.
"""
url = models.TextField(unique=True, db_index=True)
tenant_id = models.TextField(db_index=True)
content_hash = models.TextField()
is_active = models.BooleanField(default=True, db_index=True)
last_indexed = models.DateTimeField(default=timezone.now)
class Meta:
db_table = 'pages'
indexes = [
models.Index(fields=['tenant_id', 'is_active']),
models.Index(fields=['url']),
]
def __str__(self):
return f"{self.url} ({self.tenant_id})"
class Document(models.Model):
"""
Model representing a document chunk with its embedding.
Note: The embedding field uses PostgreSQL's vector type (768 dimensions).
This requires the pgvector extension to be installed.
"""
content = models.TextField()
source = models.TextField()
page_url = models.TextField(db_index=True)
# embedding is stored as a vector(768) in PostgreSQL
# We'll use a TextField to store it as JSON, or use raw SQL for vector operations
embedding = models.TextField(help_text="Vector embedding stored as JSON array")
hash = models.TextField(unique=True, db_index=True)
class Meta:
db_table = 'documents'
indexes = [
models.Index(fields=['page_url']),
models.Index(fields=['hash']),
]
def __str__(self):
return f"Document {self.id} from {self.source}" |