matching / check_embeddings_table.py
Calcifer0323's picture
Fix: Update to RoSBERTa model (1024 dims), remove half precision, increase timeout
93cd57d
"""
Проверка наличия таблицы для эмбеддингов в БД
"""
import psycopg2
DB_CONFIG = {
'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
'port': 5432,
'database': 'lead_exchange_bk',
'user': 'lead_exchange_bk_user',
'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
}
try:
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()
print("=" * 70)
print("CHECKING EMBEDDINGS STORAGE")
print("=" * 70)
# Проверяем все таблицы
cursor.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
ORDER BY table_name
""")
tables = [row[0] for row in cursor.fetchall()]
print(f"\n📋 All tables in database:")
for table in tables:
print(f" - {table}")
# Проверяем наличие pgvector extension
cursor.execute("""
SELECT * FROM pg_extension WHERE extname = 'vector'
""")
has_pgvector = cursor.fetchone() is not None
print(f"\n🔌 pgvector extension: {'✅ Installed' if has_pgvector else '❌ Not installed'}")
# Проверяем структуру properties
cursor.execute("""
SELECT column_name, data_type, character_maximum_length
FROM information_schema.columns
WHERE table_name = 'properties'
ORDER BY ordinal_position
""")
print(f"\n🏠 Properties table structure:")
has_embedding_column = False
for col_name, data_type, max_length in cursor.fetchall():
if 'embedding' in col_name.lower():
has_embedding_column = True
print(f" ✅ {col_name}: {data_type}")
else:
print(f" - {col_name}: {data_type}")
if not has_embedding_column:
print(f"\n⚠️ No embedding column found in properties table")
# Проверяем наличие отдельной таблицы для эмбеддингов
embedding_tables = [t for t in tables if 'embedding' in t.lower() or 'vector' in t.lower()]
if embedding_tables:
print(f"\n📊 Found embedding-related tables:")
for table in embedding_tables:
print(f" - {table}")
cursor.execute(f"""
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = '{table}'
ORDER BY ordinal_position
""")
for col_name, data_type in cursor.fetchall():
print(f" - {col_name}: {data_type}")
else:
print(f"\n⚠️ No separate table for embeddings found")
print("\n" + "=" * 70)
print("RECOMMENDATION")
print("=" * 70)
if has_pgvector and has_embedding_column:
print("✅ Ready to store embeddings in properties table")
print(" Use: ALTER TABLE properties ADD COLUMN embedding vector(768)")
elif has_pgvector and embedding_tables:
print("✅ Can store embeddings in separate table")
else:
print("⚠️ Need to create storage for embeddings:")
print("")
print("Option 1: Add column to properties (recommended)")
print(" ALTER TABLE properties ADD COLUMN embedding vector(768);")
print(" CREATE INDEX ON properties USING ivfflat (embedding vector_cosine_ops);")
print("")
print("Option 2: Create separate table")
print(" CREATE TABLE property_embeddings (")
print(" property_id UUID PRIMARY KEY REFERENCES properties(property_id),")
print(" embedding vector(768),")
print(" created_at TIMESTAMPTZ DEFAULT NOW()")
print(" );")
print("")
print("⚠️ First install pgvector: CREATE EXTENSION vector;")
cursor.close()
conn.close()
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()