Clause-AI / data /build_index.py
Kan05's picture
Upload 9 files
87553a7 verified
import psycopg2
import os
from dotenv import load_dotenv
# Load env variables (optional, mostly for local dev)
load_dotenv()
# ================= CONFIGURATION (FILL THESE IN) =================
# 1. HOST: Use the "Transaction Pooler" Host (IPv4 compatible)
# Found in: Settings -> Database -> Connection Pooling
# Example: "aws-0-ap-south-1.pooler.supabase.com"
DB_HOST = "aws-1-ap-south-1.pooler.supabase.com"
# 2. USER: Use the "Transaction Pooler" User
# Found in: Settings -> Database -> Connection Pooling
# Example: "postgres.yourprojectid" (e.g., postgres.erecrmjorkafmqwspytb)
DB_USER = "postgres.erecrmjorkafmqwspytb"
# 3. PASSWORD: Your Database Password (same as before)
DB_PASS = "$Kanishka20"
# 4. PORT: MUST be 5432 (Do not change to 6543!)
# We use the pooler URL for connectivity, but Port 5432 to force "Session Mode"
# so we can run the SET commands below.
DB_PORT = 5432
# =================================================================
def build_index():
conn = None
try:
print(f"🔌 Connecting to {DB_HOST} on Port {DB_PORT}...")
conn = psycopg2.connect(
host=DB_HOST,
database="postgres",
user=DB_USER,
password=DB_PASS,
port=DB_PORT
)
conn.autocommit = True
cur = conn.cursor()
print("🚀 Connection successful!")
# 1. Disable Timeout (Prevents the 60-second crash)
print("⚙️ Step 1: Disabling timeouts...")
cur.execute("SET statement_timeout = 0;")
# 2. Boost Memory (Prevents the '65MB required' crash)
# We give it 150MB of RAM just for this session
print("⚙️ Step 2: Boosting memory to 150MB...")
cur.execute("SET maintenance_work_mem = '150MB';")
# 3. Clean up
print("🧹 Step 3: Cleaning up old indexes...")
cur.execute("DROP INDEX IF EXISTS child_vectors_embedding_idx;")
# 4. Build Index
print("🏗️ Step 4: Building IVFFlat Index (lists=100)...")
print(" (This will take 1-3 minutes. Please wait...)")
# 'lists=100' is the sweet spot for ~80,000 vectors
cur.execute("""
CREATE INDEX child_vectors_embedding_idx
ON child_vectors
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
""")
print("✅ SUCCESS! Index built. Your backend should now be instant.")
except Exception as e:
print(f"\n❌ ERROR: {e}")
print("Tip: Double check you copied the 'Pooler' Host and User correctly from Supabase Settings.")
finally:
if conn:
conn.close()
print("🔌 Connection closed.")
if __name__ == "__main__":
build_index()