Spaces:
Running
Running
File size: 7,515 Bytes
168b0da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
#!/usr/bin/env python3
"""
PostgreSQL Setup Script for Modal Vector Service
This script helps set up a PostgreSQL database with pgvector extension
for the Modal vector service.
"""
import os
import sys
import subprocess
import psycopg2
from urllib.parse import urlparse
def test_postgres_connection(postgres_url: str) -> bool:
"""Test PostgreSQL connection and pgvector availability"""
try:
print(f"π Testing connection to PostgreSQL...")
conn = psycopg2.connect(postgres_url)
cursor = conn.cursor()
# Test basic connection
cursor.execute("SELECT version();")
version = cursor.fetchone()[0]
print(f"β
Connected to PostgreSQL: {version}")
# Test pgvector extension
try:
cursor.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cursor.execute(
"SELECT extversion FROM pg_extension WHERE extname = 'vector';"
)
vector_version = cursor.fetchone()
if vector_version:
print(f"β
pgvector extension available: v{vector_version[0]}")
else:
print("β οΈ pgvector extension not found")
return False
except Exception as e:
print(f"β pgvector extension error: {e}")
return False
# Create test table to verify vector operations
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS vector_test (
id SERIAL PRIMARY KEY,
embedding vector(384)
);
"""
)
# Test vector operations
test_vector = [0.1] * 384 # 384-dimensional test vector
cursor.execute(
"INSERT INTO vector_test (embedding) VALUES (%s) RETURNING id;",
(test_vector,),
)
test_id = cursor.fetchone()[0]
print(f"β
Vector operations working (test ID: {test_id})")
# Clean up test
cursor.execute("DELETE FROM vector_test WHERE id = %s;", (test_id,))
conn.commit()
cursor.close()
conn.close()
return True
except Exception as e:
print(f"β PostgreSQL connection failed: {e}")
return False
def setup_modal_secret(postgres_url: str):
"""Set up Modal secret for PostgreSQL"""
try:
print("π Setting up Modal secret for PostgreSQL...")
# Create or update the Modal secret
result = subprocess.run(
[
"modal",
"secret",
"create",
"postgres-secret",
f"MODAL_POSTGRES_URL={postgres_url}",
],
capture_output=True,
text=True,
)
if result.returncode == 0:
print("β
Modal secret created successfully")
print("\nTo use in your Modal functions, add:")
print("@app.function(secrets=[modal.Secret.from_name('postgres-secret')])")
else:
# Try updating if creation failed
result = subprocess.run(
[
"modal",
"secret",
"update",
"postgres-secret",
f"MODAL_POSTGRES_URL={postgres_url}",
],
capture_output=True,
text=True,
)
if result.returncode == 0:
print("β
Modal secret updated successfully")
else:
print(f"β Failed to create/update Modal secret: {result.stderr}")
return False
return True
except Exception as e:
print(f"β Error setting up Modal secret: {e}")
return False
def create_vector_tables(postgres_url: str):
"""Create the vector memory tables"""
try:
print("π Creating vector memory tables...")
conn = psycopg2.connect(postgres_url)
cursor = conn.cursor()
# Create the main vector memories table
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS vector_memories (
id SERIAL PRIMARY KEY,
client_id VARCHAR(255) NOT NULL,
text TEXT NOT NULL,
embedding vector(384), -- all-MiniLM-L6-v2 produces 384-dim vectors
metadata JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""
)
# Create indexes for performance
cursor.execute(
"""
CREATE INDEX IF NOT EXISTS idx_vector_memories_client_id
ON vector_memories(client_id);
"""
)
cursor.execute(
"""
CREATE INDEX IF NOT EXISTS idx_vector_memories_created_at
ON vector_memories(created_at);
"""
)
# Create vector similarity index (HNSW for fast approximate search)
cursor.execute(
"""
CREATE INDEX IF NOT EXISTS idx_vector_memories_embedding
ON vector_memories USING hnsw (embedding vector_cosine_ops);
"""
)
conn.commit()
cursor.close()
conn.close()
print("β
Vector memory tables created successfully")
return True
except Exception as e:
print(f"β Error creating vector tables: {e}")
return False
def main():
print("π PostgreSQL Setup for Modal Vector Service")
print("=" * 50)
# Check if PostgreSQL URL is provided
postgres_url = os.getenv("POSTGRES_URL")
if not postgres_url:
print("\nπ PostgreSQL URL not found in environment.")
print("\nOptions for PostgreSQL with pgvector:")
print("1. Neon (https://neon.tech) - Free tier with pgvector")
print("2. Supabase (https://supabase.com) - Free tier with pgvector")
print("3. Railway (https://railway.app) - PostgreSQL with pgvector")
print("4. Your own PostgreSQL instance")
print("\nTo use this script:")
print("export POSTGRES_URL='postgresql://user:password@host:port/database'")
print("python setup_postgres.py")
# Try to get URL from user input
postgres_url = input(
"\nEnter PostgreSQL URL (or press Enter to skip): "
).strip()
if not postgres_url:
print("βοΈ Skipping PostgreSQL setup")
return
# Test the connection
if not test_postgres_connection(postgres_url):
print("β PostgreSQL setup failed - connection test failed")
return
# Create vector tables
if not create_vector_tables(postgres_url):
print("β PostgreSQL setup failed - table creation failed")
return
# Set up Modal secret
if not setup_modal_secret(postgres_url):
print("β PostgreSQL setup failed - Modal secret setup failed")
return
print("\nπ PostgreSQL setup completed successfully!")
print("\nNext steps:")
print("1. Redeploy your Modal vector service")
print("2. Test vector storage and search")
print("3. Monitor performance in Modal dashboard")
# Parse URL to show connection info (without password)
parsed = urlparse(postgres_url)
print(f"\nπ Database Info:")
print(f" Host: {parsed.hostname}")
print(f" Port: {parsed.port or 5432}")
print(f" Database: {parsed.path[1:] if parsed.path else 'postgres'}")
print(f" User: {parsed.username}")
if __name__ == "__main__":
main()
|