Calcifer0323 commited on
Commit
93cd57d
·
1 Parent(s): d99e8d3

Fix: Update to RoSBERTa model (1024 dims), remove half precision, increase timeout

Browse files
add_admin_user.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Скрипт для добавления тестового пользователя-администратора в БД.
3
+
4
+ Подключается к PostgreSQL на Render и создает пользователя с ролью ADMIN.
5
+ """
6
+
7
+ import psycopg2
8
+ import bcrypt
9
+ from uuid import uuid4
10
+
11
+ # Параметры подключения к БД на Render
12
+ DB_CONFIG = {
13
+ 'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
14
+ 'port': 5432,
15
+ 'database': 'lead_exchange_bk',
16
+ 'user': 'lead_exchange_bk_user',
17
+ 'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
18
+ }
19
+
20
+ # Данные администратора
21
+ ADMIN_USER = {
22
+ 'user_id': str(uuid4()),
23
+ 'email': 'admin@leadexchange.com',
24
+ 'password': 'admin123', # Будет захеширован
25
+ 'first_name': 'Админ',
26
+ 'last_name': 'Администраторов',
27
+ 'phone': '+79999999999',
28
+ 'agency_name': 'Lead Exchange Administration',
29
+ 'avatar_url': 'https://cdn.pixabay.com/photo/2015/10/05/22/37/blank-profile-picture-973460_1280.png',
30
+ 'role': 'ADMIN'
31
+ }
32
+
33
+
34
+ def hash_password(password: str) -> str:
35
+ """Хеширование пароля с помощью bcrypt."""
36
+ salt = bcrypt.gensalt()
37
+ hashed = bcrypt.hashpw(password.encode('utf-8'), salt)
38
+ return hashed.decode('utf-8')
39
+
40
+
41
+ def create_admin_user():
42
+ """Создание пользователя-администратора в БД."""
43
+ try:
44
+ # Подключение к БД
45
+ print(f"Подключение к базе данных {DB_CONFIG['database']}...")
46
+ conn = psycopg2.connect(**DB_CONFIG)
47
+ cursor = conn.cursor()
48
+
49
+ # Проверяем, существует ли таблица users
50
+ cursor.execute("""
51
+ SELECT EXISTS (
52
+ SELECT FROM information_schema.tables
53
+ WHERE table_name = 'users'
54
+ );
55
+ """)
56
+ table_exists = cursor.fetchone()[0]
57
+
58
+ if not table_exists:
59
+ print("⚠️ Таблица 'users' не существует. Создаем...")
60
+ cursor.execute("""
61
+ CREATE TABLE IF NOT EXISTS users (
62
+ user_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
63
+ email TEXT NOT NULL UNIQUE,
64
+ password_hash TEXT NOT NULL,
65
+ first_name TEXT NOT NULL,
66
+ last_name TEXT NOT NULL,
67
+ phone TEXT UNIQUE,
68
+ agency_name TEXT,
69
+ avatar_url TEXT,
70
+ role TEXT NOT NULL,
71
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
72
+ );
73
+ """)
74
+ conn.commit()
75
+ print("✅ Таблица 'users' создана")
76
+
77
+ # Проверяем, существует ли уже пользователь с таким email
78
+ cursor.execute("SELECT email FROM users WHERE email = %s", (ADMIN_USER['email'],))
79
+ existing_user = cursor.fetchone()
80
+
81
+ if existing_user:
82
+ print(f"⚠️ Пользователь с email {ADMIN_USER['email']} уже существует")
83
+
84
+ # Спрашиваем, обновить ли пароль
85
+ update = input("Обновить пароль? (y/n): ").lower().strip()
86
+ if update == 'y':
87
+ password_hash = hash_password(ADMIN_USER['password'])
88
+ cursor.execute(
89
+ "UPDATE users SET password_hash = %s WHERE email = %s",
90
+ (password_hash, ADMIN_USER['email'])
91
+ )
92
+ conn.commit()
93
+ print("✅ Пароль обновлен")
94
+ else:
95
+ print("❌ Операция отменена")
96
+ return
97
+
98
+ # Хешируем пароль
99
+ print("Хеширование пароля...")
100
+ password_hash = hash_password(ADMIN_USER['password'])
101
+
102
+ # Вставляем пользователя
103
+ print(f"Создание администратора {ADMIN_USER['email']}...")
104
+ cursor.execute("""
105
+ INSERT INTO users (
106
+ user_id, email, password_hash, first_name, last_name,
107
+ phone, agency_name, avatar_url, role
108
+ ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
109
+ """, (
110
+ ADMIN_USER['user_id'],
111
+ ADMIN_USER['email'],
112
+ password_hash,
113
+ ADMIN_USER['first_name'],
114
+ ADMIN_USER['last_name'],
115
+ ADMIN_USER['phone'],
116
+ ADMIN_USER['agency_name'],
117
+ ADMIN_USER['avatar_url'],
118
+ ADMIN_USER['role']
119
+ ))
120
+
121
+ conn.commit()
122
+
123
+ print("\n" + "="*60)
124
+ print("✅ Администратор успешно создан!")
125
+ print("="*60)
126
+ print(f"ID: {ADMIN_USER['user_id']}")
127
+ print(f"Email: {ADMIN_USER['email']}")
128
+ print(f"Пароль: {ADMIN_USER['password']}")
129
+ print(f"Роль: {ADMIN_USER['role']}")
130
+ print(f"Имя: {ADMIN_USER['first_name']} {ADMIN_USER['last_name']}")
131
+ print(f"Телефон: {ADMIN_USER['phone']}")
132
+ print("="*60)
133
+
134
+ cursor.close()
135
+ conn.close()
136
+
137
+ except psycopg2.Error as e:
138
+ print(f"❌ Ошибка PostgreSQL: {e}")
139
+ raise
140
+ except Exception as e:
141
+ print(f"❌ Ошибка: {e}")
142
+ raise
143
+
144
+
145
+ def verify_admin_user():
146
+ """Проверка, что администратор успешно создан."""
147
+ try:
148
+ conn = psycopg2.connect(**DB_CONFIG)
149
+ cursor = conn.cursor()
150
+
151
+ cursor.execute("""
152
+ SELECT user_id, email, first_name, last_name, role, created_at
153
+ FROM users
154
+ WHERE email = %s
155
+ """, (ADMIN_USER['email'],))
156
+
157
+ user = cursor.fetchone()
158
+
159
+ if user:
160
+ print("\n📋 Информация о пользователе в БД:")
161
+ print(f" ID: {user[0]}")
162
+ print(f" Email: {user[1]}")
163
+ print(f" Имя: {user[2]} {user[3]}")
164
+ print(f" Роль: {user[4]}")
165
+ print(f" Создан: {user[5]}")
166
+ else:
167
+ print("❌ Пользователь не найден в БД")
168
+
169
+ cursor.close()
170
+ conn.close()
171
+
172
+ except Exception as e:
173
+ print(f"❌ Ошибка при проверке: {e}")
174
+
175
+
176
+ if __name__ == "__main__":
177
+ print("🚀 Скрипт создания администратора Lead Exchange")
178
+ print("-" * 60)
179
+
180
+ try:
181
+ create_admin_user()
182
+ verify_admin_user()
183
+ except KeyboardInterrupt:
184
+ print("\n\n⚠️ Операция прервана пользователем")
185
+ except Exception as e:
186
+ print(f"\n❌ Критическая ошибка: {e}")
187
+ exit(1)
188
+
add_properties.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Скрипт для добавления объектов недвижимости из pars_samolet.sql в базу данных Render
3
+ """
4
+ import psycopg2
5
+ import re
6
+ import sqlite3
7
+ import uuid
8
+ import sys
9
+ from datetime import datetime
10
+
11
+ # Данные подключения к БД на Render
12
+ DB_CONFIG = {
13
+ 'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
14
+ 'port': 5432,
15
+ 'database': 'lead_exchange_bk',
16
+ 'user': 'lead_exchange_bk_user',
17
+ 'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
18
+ }
19
+
20
+ # ID администратора (который мы создали ранее)
21
+ ADMIN_USER_ID = None # Будет получен из БД
22
+
23
+ def get_admin_user_id(conn):
24
+ """Получить ID администратора из БД"""
25
+ cursor = conn.cursor()
26
+
27
+ # Получаем всех администраторов
28
+ cursor.execute("""
29
+ SELECT user_id, email, first_name, last_name
30
+ FROM users
31
+ WHERE role = 'ADMIN'
32
+ ORDER BY created_at DESC
33
+ """)
34
+ admins = cursor.fetchall()
35
+
36
+ if not admins:
37
+ raise Exception("Admin user not found in database. Please run add_admin_user.py first.")
38
+
39
+ if len(admins) == 1:
40
+ admin_id, email, first_name, last_name = admins[0]
41
+ print(f" Found admin: {email} ({first_name} {last_name})")
42
+ return str(admin_id)
43
+
44
+ # Если несколько админов, показываем их и выбираем последнего созданного (самый новый)
45
+ print(f" Found {len(admins)} admin users:")
46
+ for admin_id, email, first_name, last_name in admins:
47
+ print(f" - {email} ({first_name} {last_name}) - ID: {admin_id}")
48
+
49
+ # Используем самого нового администратора (первый в списке, т.к. ORDER BY created_at DESC)
50
+ selected_admin = admins[0]
51
+ print(f" ✅ Using: {selected_admin[1]}")
52
+ return str(selected_admin[0])
53
+
54
+ def parse_sql_file():
55
+ """Парсинг SQL файла с объектами недвижимости"""
56
+ print("Reading pars_samolet.sql...")
57
+
58
+ with open('pars_samolet.sql', 'r', encoding='utf-8') as f:
59
+ sql_content = f.read()
60
+
61
+ # Извлекаем CREATE TABLE
62
+ create_match = re.search(r'CREATE TABLE[^;]+;', sql_content, re.DOTALL)
63
+ if not create_match:
64
+ raise ValueError("CREATE TABLE not found")
65
+ create_stmt = create_match.group(0)
66
+
67
+ # Извлекаем все INSERT
68
+ insert_pattern = r'INSERT INTO mytable\([^)]+\) VALUES\s*\([^;]+\);'
69
+ inserts = re.findall(insert_pattern, sql_content, re.DOTALL)
70
+
71
+ print(f"Found {len(inserts)} INSERT statements")
72
+
73
+ # Создаем временную базу для парсинга
74
+ conn = sqlite3.connect(':memory:')
75
+ cursor = conn.cursor()
76
+ cursor.execute(create_stmt)
77
+
78
+ # Выполняем INSERT
79
+ successful = 0
80
+ for i, insert in enumerate(inserts):
81
+ try:
82
+ cursor.execute(insert)
83
+ successful += 1
84
+ except sqlite3.Error as e:
85
+ print(f"Warning: Could not parse INSERT #{i+1}: {e}")
86
+ continue
87
+
88
+ print(f"Successfully parsed {successful}/{len(inserts)} records")
89
+
90
+ # Получаем данные
91
+ cursor.execute('SELECT * FROM mytable')
92
+ rows = cursor.fetchall()
93
+
94
+ # Получаем имена колонок
95
+ cursor.execute("PRAGMA table_info(mytable)")
96
+ columns = [col[1] for col in cursor.fetchall()]
97
+
98
+ # Создаем список словарей
99
+ objects = [dict(zip(columns, row)) for row in rows]
100
+ conn.close()
101
+
102
+ return objects
103
+
104
+ def map_property_type(old_type):
105
+ """Маппинг типов недвижимости"""
106
+ mapping = {
107
+ 'Квартира': 'APARTMENT',
108
+ 'Дом': 'HOUSE',
109
+ 'Коммерческая': 'COMMERCIAL',
110
+ 'Участок': 'LAND'
111
+ }
112
+ return mapping.get(old_type, 'APARTMENT')
113
+
114
+ def map_status(old_status):
115
+ """Маппинг статусов"""
116
+ mapping = {
117
+ 'Доступно': 'PUBLISHED',
118
+ 'Продано': 'SOLD',
119
+ 'Архив': 'ARCHIVED'
120
+ }
121
+ return mapping.get(old_status, 'PUBLISHED')
122
+
123
+ def insert_properties(objects, owner_user_id, auto_confirm=False):
124
+ """Вставка объектов недвижимости в БД"""
125
+ print(f"\nConnecting to database at {DB_CONFIG['host']}...")
126
+
127
+ try:
128
+ conn = psycopg2.connect(**DB_CONFIG)
129
+ cursor = conn.cursor()
130
+
131
+ print("Connected successfully!")
132
+
133
+ # Проверяем, есть ли уже данные
134
+ cursor.execute("SELECT COUNT(*) FROM properties")
135
+ existing_count = cursor.fetchone()[0]
136
+ print(f"Current properties in database: {existing_count}")
137
+
138
+ if existing_count > 0:
139
+ if auto_confirm:
140
+ print(f"\nAuto-confirm enabled: Deleting {existing_count} existing properties...")
141
+ cursor.execute("DELETE FROM properties")
142
+ conn.commit()
143
+ print("Deleted existing properties")
144
+ else:
145
+ try:
146
+ response = input(f"\nDatabase already has {existing_count} properties. Delete them? (yes/y/no/n): ")
147
+ if response.lower() in ['yes', 'y']:
148
+ cursor.execute("DELETE FROM properties")
149
+ conn.commit()
150
+ print("Deleted existing properties")
151
+ except EOFError:
152
+ print("\n⚠️ Input interrupted. Keeping existing properties.")
153
+ print("Run with --yes flag to auto-confirm or run interactively.")
154
+
155
+ # Вставляем объекты
156
+ inserted = 0
157
+ failed = 0
158
+
159
+ insert_query = """
160
+ INSERT INTO properties (
161
+ property_id, title, description, address, property_type,
162
+ area, price, rooms, status, owner_user_id, created_user_id,
163
+ created_at, updated_at
164
+ ) VALUES (
165
+ %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
166
+ )
167
+ """
168
+
169
+ print(f"\nInserting {len(objects)} properties...")
170
+
171
+ for i, obj in enumerate(objects):
172
+ try:
173
+ # Подготавливаем данные
174
+ property_id = obj.get('property_id', str(uuid.uuid4()))
175
+ title = obj.get('title', '')[:255] # Ограничиваем длину
176
+ description = obj.get('description', '')
177
+ address = obj.get('address', '')
178
+ property_type = map_property_type(obj.get('property_type', 'Квартира'))
179
+ area = float(obj['area']) if obj.get('area') else None
180
+ price = int(obj['price']) if obj.get('price') else None
181
+ rooms = int(obj['rooms']) if obj.get('rooms') else None
182
+ status = map_status(obj.get('status', 'Доступно'))
183
+
184
+ # Даты
185
+ created_at = obj.get('created_at', datetime.now().isoformat())
186
+ updated_at = obj.get('updated_at', datetime.now().isoformat())
187
+
188
+ cursor.execute(insert_query, (
189
+ property_id, title, description, address, property_type,
190
+ area, price, rooms, status, owner_user_id, owner_user_id,
191
+ created_at, updated_at
192
+ ))
193
+
194
+ inserted += 1
195
+
196
+ if (i + 1) % 50 == 0:
197
+ print(f" Inserted {i + 1}/{len(objects)}...")
198
+
199
+ except Exception as e:
200
+ failed += 1
201
+ print(f" Failed to insert property {obj.get('property_id', 'unknown')}: {e}")
202
+ continue
203
+
204
+ # Коммитим изменения
205
+ conn.commit()
206
+
207
+ print(f"\n✅ Successfully inserted {inserted} properties")
208
+ if failed > 0:
209
+ print(f"⚠️ Failed to insert {failed} properties")
210
+
211
+ # Проверяем финальное количество
212
+ cursor.execute("SELECT COUNT(*) FROM properties")
213
+ final_count = cursor.fetchone()[0]
214
+ print(f"\nTotal properties in database: {final_count}")
215
+
216
+ cursor.close()
217
+ conn.close()
218
+
219
+ except psycopg2.Error as e:
220
+ print(f"❌ Database error: {e}")
221
+ raise
222
+ except Exception as e:
223
+ print(f"❌ Error: {e}")
224
+ raise
225
+
226
+ def main():
227
+ print("=" * 60)
228
+ print("Adding Properties to Database")
229
+ print("=" * 60)
230
+
231
+ # Проверяем параметры командной строки
232
+ auto_confirm = '--yes' in sys.argv or '-y' in sys.argv
233
+
234
+ if auto_confirm:
235
+ print("🤖 Auto-confirm mode enabled")
236
+
237
+ # Парсим SQL файл
238
+ objects = parse_sql_file()
239
+
240
+ if not objects:
241
+ print("No objects found in pars_samolet.sql")
242
+ return
243
+
244
+ print(f"\nParsed {len(objects)} properties from file")
245
+ print(f"Sample property: {objects[0].get('title', 'N/A')}")
246
+
247
+ # Подключаемся к БД и получаем ID администратора
248
+ print("\nGetting admin user ID...")
249
+ try:
250
+ conn = psycopg2.connect(**DB_CONFIG)
251
+ admin_id = get_admin_user_id(conn)
252
+ conn.close()
253
+ print(f"Admin user ID: {admin_id}")
254
+ except Exception as e:
255
+ print(f"❌ Error: {e}")
256
+ return
257
+
258
+ # Подтверждение
259
+ if not auto_confirm:
260
+ print(f"\nReady to insert {len(objects)} properties into database")
261
+ print(f"Database: {DB_CONFIG['host']}/{DB_CONFIG['database']}")
262
+ try:
263
+ response = input("\nProceed? (yes/y/no/n): ")
264
+ if response.lower() not in ['yes', 'y']:
265
+ print("Cancelled by user")
266
+ return
267
+ except EOFError:
268
+ print("\n❌ Error: EOF when reading input")
269
+ print("Run with --yes flag to auto-confirm: python add_properties.py --yes")
270
+ return
271
+ else:
272
+ print(f"\n✅ Auto-confirming insertion of {len(objects)} properties")
273
+
274
+ # Вставляем объекты
275
+ insert_properties(objects, admin_id, auto_confirm)
276
+
277
+ print("\n" + "=" * 60)
278
+ print("✅ Done!")
279
+ print("=" * 60)
280
+
281
+ if __name__ == '__main__':
282
+ main()
283
+
check_embeddings_table.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Проверка наличия таблицы для эмбеддингов в БД
3
+ """
4
+ import psycopg2
5
+
6
+ DB_CONFIG = {
7
+ 'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
8
+ 'port': 5432,
9
+ 'database': 'lead_exchange_bk',
10
+ 'user': 'lead_exchange_bk_user',
11
+ 'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
12
+ }
13
+
14
+ try:
15
+ conn = psycopg2.connect(**DB_CONFIG)
16
+ cursor = conn.cursor()
17
+
18
+ print("=" * 70)
19
+ print("CHECKING EMBEDDINGS STORAGE")
20
+ print("=" * 70)
21
+
22
+ # Проверяем все таблицы
23
+ cursor.execute("""
24
+ SELECT table_name
25
+ FROM information_schema.tables
26
+ WHERE table_schema = 'public'
27
+ ORDER BY table_name
28
+ """)
29
+
30
+ tables = [row[0] for row in cursor.fetchall()]
31
+ print(f"\n📋 All tables in database:")
32
+ for table in tables:
33
+ print(f" - {table}")
34
+
35
+ # Проверяем наличие pgvector extension
36
+ cursor.execute("""
37
+ SELECT * FROM pg_extension WHERE extname = 'vector'
38
+ """)
39
+ has_pgvector = cursor.fetchone() is not None
40
+ print(f"\n🔌 pgvector extension: {'✅ Installed' if has_pgvector else '❌ Not installed'}")
41
+
42
+ # Проверяем структуру properties
43
+ cursor.execute("""
44
+ SELECT column_name, data_type, character_maximum_length
45
+ FROM information_schema.columns
46
+ WHERE table_name = 'properties'
47
+ ORDER BY ordinal_position
48
+ """)
49
+
50
+ print(f"\n🏠 Properties table structure:")
51
+ has_embedding_column = False
52
+ for col_name, data_type, max_length in cursor.fetchall():
53
+ if 'embedding' in col_name.lower():
54
+ has_embedding_column = True
55
+ print(f" ✅ {col_name}: {data_type}")
56
+ else:
57
+ print(f" - {col_name}: {data_type}")
58
+
59
+ if not has_embedding_column:
60
+ print(f"\n⚠️ No embedding column found in properties table")
61
+
62
+ # Проверяем наличие отдельной таблицы для эмбеддингов
63
+ embedding_tables = [t for t in tables if 'embedding' in t.lower() or 'vector' in t.lower()]
64
+
65
+ if embedding_tables:
66
+ print(f"\n📊 Found embedding-related tables:")
67
+ for table in embedding_tables:
68
+ print(f" - {table}")
69
+ cursor.execute(f"""
70
+ SELECT column_name, data_type
71
+ FROM information_schema.columns
72
+ WHERE table_name = '{table}'
73
+ ORDER BY ordinal_position
74
+ """)
75
+ for col_name, data_type in cursor.fetchall():
76
+ print(f" - {col_name}: {data_type}")
77
+ else:
78
+ print(f"\n⚠️ No separate table for embeddings found")
79
+
80
+ print("\n" + "=" * 70)
81
+ print("RECOMMENDATION")
82
+ print("=" * 70)
83
+
84
+ if has_pgvector and has_embedding_column:
85
+ print("✅ Ready to store embeddings in properties table")
86
+ print(" Use: ALTER TABLE properties ADD COLUMN embedding vector(768)")
87
+ elif has_pgvector and embedding_tables:
88
+ print("✅ Can store embeddings in separate table")
89
+ else:
90
+ print("⚠️ Need to create storage for embeddings:")
91
+ print("")
92
+ print("Option 1: Add column to properties (recommended)")
93
+ print(" ALTER TABLE properties ADD COLUMN embedding vector(768);")
94
+ print(" CREATE INDEX ON properties USING ivfflat (embedding vector_cosine_ops);")
95
+ print("")
96
+ print("Option 2: Create separate table")
97
+ print(" CREATE TABLE property_embeddings (")
98
+ print(" property_id UUID PRIMARY KEY REFERENCES properties(property_id),")
99
+ print(" embedding vector(768),")
100
+ print(" created_at TIMESTAMPTZ DEFAULT NOW()")
101
+ print(" );")
102
+ print("")
103
+ print("⚠️ First install pgvector: CREATE EXTENSION vector;")
104
+
105
+ cursor.close()
106
+ conn.close()
107
+
108
+ except Exception as e:
109
+ print(f"❌ Error: {e}")
110
+ import traceback
111
+ traceback.print_exc()
112
+
check_service_status.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Проверка статуса сервиса на HuggingFace Spaces
3
+ """
4
+ import requests
5
+ import time
6
+
7
+ HF_SERVICE_URL = "https://calcifer0323-matching.hf.space"
8
+
9
+ print("=" * 70)
10
+ print("CHECKING HUGGINGFACE SPACES SERVICE STATUS")
11
+ print("=" * 70)
12
+
13
+ endpoints = [
14
+ "/health",
15
+ "/docs",
16
+ "/"
17
+ ]
18
+
19
+ for endpoint in endpoints:
20
+ try:
21
+ print(f"\n🔍 Checking {endpoint}...")
22
+ response = requests.get(
23
+ f"{HF_SERVICE_URL}{endpoint}",
24
+ timeout=10
25
+ )
26
+
27
+ print(f" Status: {response.status_code}")
28
+
29
+ if response.status_code == 200:
30
+ print(f" ✅ Endpoint working")
31
+ if endpoint == "/health":
32
+ data = response.json()
33
+ print(f" Status: {data.get('status', 'unknown')}")
34
+ print(f" Model: {data.get('model', 'unknown')}")
35
+ print(f" Dimensions: {data.get('dimensions', 'unknown')}")
36
+ else:
37
+ print(f" ⚠️ Endpoint returned {response.status_code}")
38
+ print(f" Response: {response.text[:200]}")
39
+
40
+ except requests.exceptions.Timeout:
41
+ print(f" ❌ Timeout")
42
+ except requests.exceptions.ConnectionError:
43
+ print(f" ❌ Connection error")
44
+ except Exception as e:
45
+ print(f" ❌ Error: {e}")
46
+
47
+ print("\n" + "=" * 70)
48
+ print("TESTING /embed ENDPOINT")
49
+ print("=" * 70)
50
+
51
+ test_text = "3-комнатная квартира в центре Москвы"
52
+ payload = {"text": test_text}
53
+
54
+ try:
55
+ print(f"\n📝 Test text: {test_text}")
56
+ print(f"📤 Sending to /embed...")
57
+
58
+ response = requests.post(
59
+ f"{HF_SERVICE_URL}/embed",
60
+ json=payload,
61
+ timeout=30
62
+ )
63
+
64
+ print(f" Status: {response.status_code}")
65
+
66
+ if response.status_code == 200:
67
+ data = response.json()
68
+ print(f" ✅ Success!")
69
+ print(f" Embedding dimensions: {data.get('dimensions', 'unknown')}")
70
+ print(f" Model: {data.get('model', 'unknown')}")
71
+ else:
72
+ print(f" ❌ Error")
73
+ print(f" Response: {response.text[:500]}")
74
+
75
+ except Exception as e:
76
+ print(f" ❌ Failed: {e}")
77
+
78
+ print("\n" + "=" * 70)
79
+
cleanup_and_verify.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Скрипт для полной очистки и повторной вставки объектов с правильным admin user
3
+ """
4
+ import psycopg2
5
+
6
+ DB_CONFIG = {
7
+ 'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
8
+ 'port': 5432,
9
+ 'database': 'lead_exchange_bk',
10
+ 'user': 'lead_exchange_bk_user',
11
+ 'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
12
+ }
13
+
14
+ print("=" * 70)
15
+ print("CLEANUP AND VERIFICATION")
16
+ print("=" * 70)
17
+
18
+ try:
19
+ conn = psycopg2.connect(**DB_CONFIG)
20
+ cursor = conn.cursor()
21
+
22
+ # Показываем текущее состояние
23
+ cursor.execute("SELECT COUNT(*) FROM properties")
24
+ current_count = cursor.fetchone()[0]
25
+ print(f"\n📊 Current properties count: {current_count}")
26
+
27
+ # Показываем всех админов
28
+ cursor.execute("""
29
+ SELECT user_id, email, first_name, last_name, created_at
30
+ FROM users
31
+ WHERE role = 'ADMIN'
32
+ ORDER BY created_at DESC
33
+ """)
34
+ admins = cursor.fetchall()
35
+ print(f"\n👥 Admin users in database:")
36
+ for user_id, email, first_name, last_name, created in admins:
37
+ print(f" - {email}")
38
+ print(f" Name: {first_name} {last_name}")
39
+ print(f" ID: {user_id}")
40
+ print(f" Created: {created}")
41
+
42
+ # Проверяем сколько properties у этого админа
43
+ cursor.execute("SELECT COUNT(*) FROM properties WHERE owner_user_id = %s", (user_id,))
44
+ prop_count = cursor.fetchone()[0]
45
+ print(f" Properties: {prop_count}")
46
+ print()
47
+
48
+ # Удаляем все properties
49
+ print("🗑️ Deleting all properties...")
50
+ cursor.execute("DELETE FROM properties")
51
+ conn.commit()
52
+ print("✅ All properties deleted")
53
+
54
+ # Проверяем
55
+ cursor.execute("SELECT COUNT(*) FROM properties")
56
+ final_count = cursor.fetchone()[0]
57
+ print(f"📊 Properties after cleanup: {final_count}")
58
+
59
+ print("\n" + "=" * 70)
60
+ print("✅ Ready for fresh insert!")
61
+ print("Run: python add_properties.py --yes")
62
+ print("=" * 70)
63
+
64
+ cursor.close()
65
+ conn.close()
66
+
67
+ except Exception as e:
68
+ print(f"❌ Error: {e}")
69
+ import traceback
70
+ traceback.print_exc()
71
+
debug_properties.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Скрипт для детальной проверки состояния properties в БД
3
+ """
4
+ import psycopg2
5
+
6
+ DB_CONFIG = {
7
+ 'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
8
+ 'port': 5432,
9
+ 'database': 'lead_exchange_bk',
10
+ 'user': 'lead_exchange_bk_user',
11
+ 'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
12
+ }
13
+
14
+ try:
15
+ conn = psycopg2.connect(**DB_CONFIG)
16
+ cursor = conn.cursor()
17
+
18
+ print("=" * 70)
19
+ print("DETAILED PROPERTIES CHECK")
20
+ print("=" * 70)
21
+
22
+ # Проверяем общее количество
23
+ cursor.execute("SELECT COUNT(*) FROM properties")
24
+ total = cursor.fetchone()[0]
25
+ print(f"\n📊 Total properties: {total}")
26
+
27
+ # Проверяем по owner_user_id
28
+ cursor.execute("""
29
+ SELECT owner_user_id, COUNT(*)
30
+ FROM properties
31
+ GROUP BY owner_user_id
32
+ ORDER BY COUNT(*) DESC
33
+ """)
34
+ print(f"\n👥 Properties by owner:")
35
+ for owner_id, count in cursor.fetchall():
36
+ print(f" {owner_id}: {count} properties")
37
+
38
+ # Проверяем, существуют ли эти пользователи
39
+ print(f"\n🔍 Checking if owners exist in users table:")
40
+ cursor.execute("""
41
+ SELECT p.owner_user_id, u.email, u.role, COUNT(p.property_id) as prop_count
42
+ FROM properties p
43
+ LEFT JOIN users u ON p.owner_user_id = u.user_id
44
+ GROUP BY p.owner_user_id, u.email, u.role
45
+ """)
46
+ for owner_id, email, role, count in cursor.fetchall():
47
+ if email:
48
+ print(f" ✅ {owner_id[:8]}... → {email} ({role}) - {count} props")
49
+ else:
50
+ print(f" ❌ {owner_id[:8]}... → USER NOT FOUND! - {count} props (will be deleted on cascade)")
51
+
52
+ # Проверяем ограничения (constraints)
53
+ print(f"\n🔗 Foreign key constraints on properties:")
54
+ cursor.execute("""
55
+ SELECT
56
+ tc.constraint_name,
57
+ tc.constraint_type,
58
+ kcu.column_name,
59
+ ccu.table_name AS foreign_table_name,
60
+ ccu.column_name AS foreign_column_name,
61
+ rc.delete_rule
62
+ FROM information_schema.table_constraints AS tc
63
+ JOIN information_schema.key_column_usage AS kcu
64
+ ON tc.constraint_name = kcu.constraint_name
65
+ JOIN information_schema.constraint_column_usage AS ccu
66
+ ON ccu.constraint_name = tc.constraint_name
67
+ LEFT JOIN information_schema.referential_constraints AS rc
68
+ ON tc.constraint_name = rc.constraint_name
69
+ WHERE tc.table_name = 'properties' AND tc.constraint_type = 'FOREIGN KEY'
70
+ """)
71
+
72
+ for constraint_name, constraint_type, column, foreign_table, foreign_column, delete_rule in cursor.fetchall():
73
+ print(f" {column} → {foreign_table}.{foreign_column}")
74
+ print(f" Delete rule: {delete_rule}")
75
+
76
+ # Показываем примеры объектов
77
+ print(f"\n📄 Sample properties (first 5):")
78
+ cursor.execute("""
79
+ SELECT property_id, title, owner_user_id, created_at
80
+ FROM properties
81
+ ORDER BY created_at DESC
82
+ LIMIT 5
83
+ """)
84
+ for prop_id, title, owner_id, created in cursor.fetchall():
85
+ print(f" - {title[:50]}...")
86
+ print(f" ID: {prop_id}")
87
+ print(f" Owner: {owner_id}")
88
+ print(f" Created: {created}")
89
+
90
+ # Проверяем пользователя f4e8f58b...
91
+ print(f"\n🔍 Checking specific admin user (f4e8f58b-94f4-4e0f-bd85-1b06b8a3f242):")
92
+ cursor.execute("""
93
+ SELECT user_id, email, role, first_name, last_name
94
+ FROM users
95
+ WHERE user_id = 'f4e8f58b-94f4-4e0f-bd85-1b06b8a3f242'
96
+ """)
97
+ result = cursor.fetchone()
98
+ if result:
99
+ print(f" ✅ User exists: {result[1]} ({result[2]}) - {result[3]} {result[4]}")
100
+ else:
101
+ print(f" ❌ User NOT FOUND! This is why properties are being deleted!")
102
+ print(f" ℹ️ Properties reference a user that doesn't exist")
103
+
104
+ print("\n" + "=" * 70)
105
+
106
+ cursor.close()
107
+ conn.close()
108
+
109
+ except Exception as e:
110
+ print(f"❌ Error: {e}")
111
+ import traceback
112
+ traceback.print_exc()
113
+
deploy_to_hf.bat ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ REM Скрипт для обновления сервиса на HuggingFace Spaces
3
+
4
+ echo.
5
+ echo ======================================================================
6
+ echo DEPLOYING UPDATED SERVICE TO HUGGINGFACE SPACES
7
+ echo ======================================================================
8
+ echo.
9
+
10
+ cd huggingface
11
+
12
+ echo Current changes:
13
+ git status
14
+
15
+ echo.
16
+ echo Committing changes...
17
+ git add -A
18
+ git commit -m "Fix: Update to RoSBERTa model (1024 dims), remove half precision, increase timeout"
19
+
20
+ echo.
21
+ echo Pushing to HuggingFace Spaces...
22
+ git push
23
+
24
+ echo.
25
+ echo ======================================================================
26
+ echo DEPLOYMENT COMPLETE
27
+ echo ======================================================================
28
+ echo.
29
+ echo Wait ~2-3 minutes for HuggingFace Spaces to rebuild and restart
30
+ echo Then run: python check_service_status.py
31
+ echo.
32
+
33
+ pause
34
+
huggingface/Dockerfile CHANGED
@@ -18,8 +18,9 @@ ENV PYTHONUNBUFFERED=1
18
  ENV TRANSFORMERS_CACHE=/home/user/.cache/transformers
19
  ENV SENTENCE_TRANSFORMERS_HOME=/home/user/.cache/sentence_transformers
20
  ENV HF_HOME=/home/user/.cache/huggingface
21
- ENV EMBEDDING_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
22
- ENV EMBEDDING_DIMENSIONS=384
 
23
 
24
  # Copy requirements and install dependencies
25
  COPY --chown=user requirements.txt .
 
18
  ENV TRANSFORMERS_CACHE=/home/user/.cache/transformers
19
  ENV SENTENCE_TRANSFORMERS_HOME=/home/user/.cache/sentence_transformers
20
  ENV HF_HOME=/home/user/.cache/huggingface
21
+ # Model settings
22
+ ENV EMBEDDING_MODEL=ai-forever/ru-en-RoSBERTa
23
+ ENV EMBEDDING_DIMENSIONS=1024
24
 
25
  # Copy requirements and install dependencies
26
  COPY --chown=user requirements.txt .
huggingface/main.py CHANGED
@@ -28,8 +28,8 @@ from dotenv import load_dotenv
28
  load_dotenv()
29
 
30
  # Конфигурация
31
- MODEL_NAME = os.getenv("EMBEDDING_MODEL", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
32
- EMBEDDING_DIMENSIONS = 384
33
 
34
  # Глобальная модель
35
  model: Optional[SentenceTransformer] = None
@@ -41,12 +41,9 @@ async def lifespan(app: FastAPI):
41
  global model
42
  print(f"Loading embedding model: {MODEL_NAME}")
43
  model = SentenceTransformer(MODEL_NAME, device='cpu')
44
- try:
45
- model.half()
46
- print("Model converted to half precision (float16)")
47
- except Exception as e:
48
- print(f"Could not convert to half precision: {e}")
49
- print(f"Model loaded. Dimensions: {model.get_sentence_embedding_dimension()}")
50
  yield
51
  model = None
52
 
 
28
  load_dotenv()
29
 
30
  # Конфигурация
31
+ MODEL_NAME = os.getenv("EMBEDDING_MODEL", "ai-forever/ru-en-RoSBERTa")
32
+ EMBEDDING_DIMENSIONS = 1024 # RoSBERTa actual dimensions (verified)
33
 
34
  # Глобальная модель
35
  model: Optional[SentenceTransformer] = None
 
41
  global model
42
  print(f"Loading embedding model: {MODEL_NAME}")
43
  model = SentenceTransformer(MODEL_NAME, device='cpu')
44
+ # NOTE: half precision (float16) может вызывать ошибки на CPU
45
+ # Используем float32 для лучшей совместимости
46
+ print(f"Model loaded in full precision (float32). Dimensions: {model.get_sentence_embedding_dimension()}")
 
 
 
47
  yield
48
  model = None
49
 
index_all_properties.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Скрипт для индексации всех объектов недвижимости через HuggingFace Spaces сервис
3
+
4
+ Usage:
5
+ python index_all_properties.py # Интерактивный режим
6
+ python index_all_properties.py --yes # Автоподтверждение
7
+ """
8
+ import psycopg2
9
+ import requests
10
+ import time
11
+ import sys
12
+ from typing import List, Dict, Any
13
+
14
+ # Конфигурация БД
15
+ DB_CONFIG = {
16
+ 'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
17
+ 'port': 5432,
18
+ 'database': 'lead_exchange_bk',
19
+ 'user': 'lead_exchange_bk_user',
20
+ 'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
21
+ }
22
+
23
+ # URL сервиса на HuggingFace Spaces
24
+ HF_SERVICE_URL = "https://calcifer0323-matching.hf.space"
25
+
26
+ def get_properties_from_db() -> List[Dict[str, Any]]:
27
+ """Получить все объекты недвижимости из БД"""
28
+ print("📥 Fetching properties from database...")
29
+
30
+ conn = psycopg2.connect(**DB_CONFIG)
31
+ cursor = conn.cursor()
32
+
33
+ cursor.execute("""
34
+ SELECT property_id, title, description, address, property_type,
35
+ area, price, rooms, status
36
+ FROM properties
37
+ ORDER BY created_at DESC
38
+ """)
39
+
40
+ columns = ['property_id', 'title', 'description', 'address', 'property_type',
41
+ 'area', 'price', 'rooms', 'status']
42
+
43
+ properties = []
44
+ for row in cursor.fetchall():
45
+ prop = dict(zip(columns, row))
46
+ properties.append(prop)
47
+
48
+ cursor.close()
49
+ conn.close()
50
+
51
+ print(f"✅ Fetched {len(properties)} properties")
52
+ return properties
53
+
54
+ def prepare_text_for_property(prop: Dict[str, Any]) -> str:
55
+ """Подготовить текст для генерации эмбеддинга"""
56
+ parts = []
57
+
58
+ if prop.get('title'):
59
+ parts.append(f"Название: {prop['title']}")
60
+
61
+ if prop.get('description'):
62
+ parts.append(f"Описание: {prop['description']}")
63
+
64
+ if prop.get('address'):
65
+ parts.append(f"Адрес: {prop['address']}")
66
+
67
+ # Добавляем структурированные данные
68
+ details = []
69
+ if prop.get('property_type'):
70
+ details.append(f"тип: {prop['property_type']}")
71
+ if prop.get('rooms'):
72
+ details.append(f"комнат: {prop['rooms']}")
73
+ if prop.get('area'):
74
+ details.append(f"площадь: {prop['area']} м²")
75
+ if prop.get('price'):
76
+ details.append(f"цена: {prop['price']:,} ₽")
77
+
78
+ if details:
79
+ parts.append("Характеристики: " + ", ".join(details))
80
+
81
+ return ". ".join(parts)
82
+
83
+ def index_batch(properties: List[Dict[str, Any]], batch_size: int = 20) -> Dict[str, Any]:
84
+ """Индексировать батч объектов через HuggingFace Spaces"""
85
+ items = []
86
+
87
+ for prop in properties:
88
+ # Подготавливаем данные для эндпоинта /batch
89
+ item = {
90
+ "entity_id": str(prop['property_id']),
91
+ "title": prop.get('title', ''),
92
+ "description": prop.get('description', ''),
93
+ "price": float(prop['price']) if prop.get('price') else None,
94
+ "rooms": int(prop['rooms']) if prop.get('rooms') else None,
95
+ "area": float(prop['area']) if prop.get('area') else None,
96
+ "address": prop.get('address', ''),
97
+ "district": "" # Можно извлечь из address если нужно
98
+ }
99
+ items.append(item)
100
+
101
+ payload = {"items": items}
102
+
103
+ try:
104
+ print(f" 📤 Sending batch of {len(items)} items to {HF_SERVICE_URL}/batch")
105
+ print(f" Payload size: {len(str(payload))} bytes")
106
+
107
+ response = requests.post(
108
+ f"{HF_SERVICE_URL}/batch",
109
+ json=payload,
110
+ timeout=120 # 2 минуты на батч (было 5 минут, но timeout на сервере 30с)
111
+ )
112
+
113
+ print(f" Response status: {response.status_code}")
114
+
115
+ if response.status_code == 200:
116
+ result = response.json()
117
+ return result
118
+ else:
119
+ print(f" ❌ Error: {response.status_code}")
120
+ print(f" Response: {response.text[:500]}")
121
+
122
+ # Пробуем получить более детальную информацию об ошибке
123
+ try:
124
+ error_detail = response.json()
125
+ print(f" Detail: {error_detail}")
126
+ except:
127
+ pass
128
+
129
+ return None
130
+
131
+ except requests.exceptions.Timeout:
132
+ print(f" ❌ Request timeout (120 seconds)")
133
+ return None
134
+ except requests.exceptions.ConnectionError as e:
135
+ print(f" ❌ Connection error: {e}")
136
+ return None
137
+ except requests.exceptions.RequestException as e:
138
+ print(f" ❌ Request failed: {e}")
139
+ return None
140
+
141
+ def save_embeddings_to_file(results: List[Dict], filename: str = "generated_embeddings.json"):
142
+ """��охранить результаты индексации в файл (для проверки)"""
143
+ import json
144
+
145
+ with open(filename, 'w', encoding='utf-8') as f:
146
+ json.dump(results, f, ensure_ascii=False, indent=2)
147
+
148
+ print(f"💾 Saved embeddings to {filename}")
149
+
150
+ def main():
151
+ print("=" * 70)
152
+ print("INDEXING PROPERTIES THROUGH HUGGINGFACE SPACES")
153
+ print("=" * 70)
154
+
155
+ # Проверяем параметры командной строки
156
+ auto_confirm = '--yes' in sys.argv or '-y' in sys.argv
157
+
158
+ if auto_confirm:
159
+ print("🤖 Auto-confirm mode enabled")
160
+
161
+ # 1. Получаем объекты из БД
162
+ properties = get_properties_from_db()
163
+
164
+ if not properties:
165
+ print("⚠️ No properties found in database")
166
+ return
167
+
168
+ print(f"\n📊 Total properties to index: {len(properties)}")
169
+
170
+ # Показываем пример
171
+ print(f"\n📄 Sample property:")
172
+ sample = properties[0]
173
+ print(f" ID: {sample['property_id']}")
174
+ print(f" Title: {sample.get('title', 'N/A')}")
175
+ print(f" Text preview: {prepare_text_for_property(sample)[:150]}...")
176
+
177
+ # Подтверждение
178
+ if not auto_confirm:
179
+ print(f"\n🚀 Ready to index {len(properties)} properties")
180
+ print(f" Service: {HF_SERVICE_URL}")
181
+ print(f" Endpoint: /batch")
182
+
183
+ try:
184
+ response = input("\nProceed? (yes/y/no/n): ")
185
+ if response.lower() not in ['yes', 'y']:
186
+ print("Cancelled by user")
187
+ return
188
+ except EOFError:
189
+ print("\n❌ Error: EOF when reading input")
190
+ print("Run with --yes flag to auto-confirm: python index_all_properties.py --yes")
191
+ return
192
+ else:
193
+ print(f"\n✅ Auto-confirming indexing of {len(properties)} properties")
194
+ print(f" Service: {HF_SERVICE_URL}")
195
+ print(f" Endpoint: /batch")
196
+
197
+ # 2. Индексируем батчами
198
+ batch_size = 20 # Уменьшено с 50 до 20 (время обработки ~30 сек на сервере)
199
+ total_batches = (len(properties) + batch_size - 1) // batch_size
200
+
201
+ print(f"\n📦 Processing {total_batches} batches (batch size: {batch_size})")
202
+ print(f" ⏱️ Each batch will take ~30-40 seconds to process")
203
+ print(f" 📊 Total time estimate: ~{(total_batches * 35) // 60} minutes")
204
+
205
+ all_results = []
206
+ successful = 0
207
+ failed = 0
208
+
209
+ for i in range(0, len(properties), batch_size):
210
+ batch = properties[i:i + batch_size]
211
+ batch_num = i // batch_size + 1
212
+
213
+ print(f"\n🔄 Batch {batch_num}/{total_batches} ({len(batch)} items)")
214
+
215
+ result = index_batch(batch, batch_size)
216
+
217
+ if result:
218
+ all_results.append(result)
219
+ batch_successful = result.get('successful', 0)
220
+ batch_failed = result.get('failed', 0)
221
+ successful += batch_successful
222
+ failed += batch_failed
223
+
224
+ print(f" ✅ Success: {batch_successful}/{len(batch)}")
225
+ if batch_failed > 0:
226
+ print(f" ⚠️ Failed: {batch_failed}")
227
+ else:
228
+ print(f" ❌ Batch failed completely")
229
+ failed += len(batch)
230
+
231
+ # Задержка между батчами
232
+ if i + batch_size < len(properties):
233
+ print(f" ⏳ Waiting 10 seconds before next batch...")
234
+ time.sleep(10)
235
+
236
+ # 3. Сохраняем результаты
237
+ if all_results:
238
+ save_embeddings_to_file(all_results, "indexing_results.json")
239
+
240
+ # 4. Итоги
241
+ print("\n" + "=" * 70)
242
+ print("INDEXING COMPLETE")
243
+ print("=" * 70)
244
+ print(f"✅ Successfully indexed: {successful}/{len(properties)}")
245
+ print(f"❌ Failed: {failed}/{len(properties)}")
246
+
247
+ if successful > 0:
248
+ print(f"\n💡 Note: Embeddings were generated on HuggingFace Spaces")
249
+ print(f" Results saved to: indexing_results.json")
250
+ print(f" Backend should fetch these embeddings and store in DB")
251
+
252
+ print("\n" + "=" * 70)
253
+
254
+ if __name__ == '__main__':
255
+ main()
256
+
index_objects.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import re
4
+ import sqlite3
5
+ import time
6
+
7
+ # Читаем SQL файл
8
+ with open('pars_samolet.sql', 'r', encoding='utf-8') as f:
9
+ sql_content = f.read()
10
+
11
+ # Извлекаем CREATE TABLE
12
+ create_match = re.search(r'CREATE TABLE[^;]+;', sql_content, re.DOTALL)
13
+ if not create_match:
14
+ raise ValueError("CREATE TABLE not found")
15
+ create_stmt = create_match.group(0)
16
+
17
+ # Извлекаем все INSERT
18
+ insert_pattern = r'INSERT INTO mytable\([^)]+\) VALUES\s*\([^;]+\);'
19
+ inserts = re.findall(insert_pattern, sql_content, re.DOTALL)
20
+
21
+ # Создаем временную базу для парсинга
22
+ conn = sqlite3.connect(':memory:')
23
+ cursor = conn.cursor()
24
+ cursor.execute(create_stmt)
25
+
26
+ # Функция для экранирования SQL-строк
27
+ def escape_sql_value(value):
28
+ if value is None:
29
+ return 'NULL'
30
+ # Заменяем одинарные кавычки на две одинарные кавычки
31
+ return str(value).replace("'", "''")
32
+
33
+ # Выполняем INSERT с экранированием
34
+ for insert in inserts:
35
+ try:
36
+ cursor.execute(insert)
37
+ except sqlite3.OperationalError as e:
38
+ # Если есть ошибка, пытаемся очистить строку от проблемных символов
39
+ # Находим VALUES часть
40
+ match = re.search(r'VALUES\s*\((.+)\);', insert, re.DOTALL)
41
+ if match:
42
+ values_str = match.group(1)
43
+ # Разбиваем значения по запятым, учитывая строки в кавычках
44
+ values = []
45
+ current = ''
46
+ in_string = False
47
+ for char in values_str:
48
+ if char == "'" and (len(current) == 0 or current[-1] != '\\'):
49
+ in_string = not in_string
50
+ current += char
51
+ if not in_string and char == ',':
52
+ values.append(current[:-1].strip())
53
+ current = ''
54
+ if current:
55
+ values.append(current.strip())
56
+
57
+ # Экранируем каждое строковое значение
58
+ cleaned_values = []
59
+ for val in values:
60
+ if val.upper() == 'NULL':
61
+ cleaned_values.append('NULL')
62
+ elif val.startswith("'") and val.endswith("'"):
63
+ # Это строковое значение
64
+ inner = val[1:-1]
65
+ # Удаляем лишние переводы строк и табуляции
66
+ inner = inner.replace('\n', ' ').replace('\r', ' ')
67
+ inner = ' '.join(inner.split()) # Удаляем лишние пробелы
68
+ inner = escape_sql_value(inner)
69
+ cleaned_values.append(f"'{inner}'")
70
+ else:
71
+ # Числовое или другое значение
72
+ cleaned_values.append(val)
73
+
74
+ # Собираем новый INSERT
75
+ insert_start = insert[:match.start(1)]
76
+ insert_end = insert[match.end(1):]
77
+ new_insert = insert_start + ', '.join(cleaned_values) + insert_end
78
+
79
+ try:
80
+ cursor.execute(new_insert)
81
+ print(f"Fixed problematic INSERT")
82
+ except Exception as e2:
83
+ print(f"Still failed to execute INSERT: {e2}")
84
+ # Пропускаем проблемную запись
85
+ continue
86
+ else:
87
+ print(f"Could not parse INSERT: {insert[:100]}...")
88
+ continue
89
+
90
+ # Получаем данные
91
+ cursor.execute('SELECT * FROM mytable')
92
+ rows = cursor.fetchall()
93
+
94
+ # Получаем имена колонок
95
+ cursor.execute("PRAGMA table_info(mytable)")
96
+ columns = [col[1] for col in cursor.fetchall()]
97
+
98
+ # Создаем список словарей
99
+ objects = [dict(zip(columns, row)) for row in rows]
100
+ conn.close()
101
+
102
+ print(f"Total objects parsed: {len(objects)}")
103
+
104
+ # Разделяем на батчи по 10, берем первые 4
105
+ batch_size = 50
106
+ batches = [objects[i:i + batch_size] for i in range(0, len(objects), batch_size)][:4]
107
+
108
+ # URL для API
109
+ url = 'https://calcifer0323-matching.hf.space/batch'
110
+
111
+ responses = []
112
+ for i, batch in enumerate(batches):
113
+ print(f"Sending batch {i+1} with {len(batch)} objects")
114
+
115
+ # Преобразуем объекты в items для /batch
116
+ items = []
117
+ for obj in batch:
118
+ item = {
119
+ "entity_id": str(obj["property_id"]),
120
+ "title": str(obj.get("title", "")),
121
+ "description": str(obj.get("description", "")),
122
+ "price": float(obj.get("price", 0)) if obj.get("price") else None,
123
+ "rooms": float(obj.get("rooms", 0)) if obj.get("rooms") else None,
124
+ "area": float(obj.get("area", 0)) if obj.get("area") else None,
125
+ "address": str(obj.get("address", "")),
126
+ "district": str(obj.get("city", ""))
127
+ }
128
+ items.append(item)
129
+
130
+ payload = {"items": items}
131
+
132
+ try:
133
+ response = requests.post(url, json=payload, timeout=300)
134
+ if response.status_code == 200:
135
+ data = response.json()
136
+ responses.append(data)
137
+ successful = data.get('successful', 0)
138
+ total = data.get('total', 0)
139
+ print(f"Batch {i+1} successful, embedded {successful}/{total}")
140
+ else:
141
+ print(f"Batch {i+1} failed: {response.status_code} - {response.text}")
142
+ except Exception as e:
143
+ print(f"Error sending batch {i+1}: {e}")
144
+
145
+ # Задержка между батчами
146
+ if i < len(batches) - 1:
147
+ print("Waiting 10 seconds before next batch...")
148
+ time.sleep(10)
149
+
150
+ # Сохраняем результаты в SQL файл
151
+ with open('indexed_objects.sql', 'w', encoding='utf-8') as f:
152
+ f.write("CREATE TABLE IF NOT EXISTS indexed_objects (\n")
153
+ f.write(" property_id VARCHAR(36) PRIMARY KEY,\n")
154
+ f.write(" embedding JSON\n")
155
+ f.write(");\n\n")
156
+
157
+ f.write("DELETE FROM indexed_objects;\n\n")
158
+
159
+ for resp in responses:
160
+ for result in resp.get("results", []):
161
+ if result.get("success"):
162
+ property_id = result["entity_id"]
163
+ embedding = json.dumps(result["embedding"])
164
+ # Экранируем для SQL
165
+ embedding_escaped = embedding.replace("'", "''")
166
+ f.write(f"INSERT INTO indexed_objects (property_id, embedding) VALUES ('{property_id}', '{embedding_escaped}');\n")
167
+
168
+ print(f"Indexing complete. Results saved to indexed_objects.sql")
169
+ print(f"Total batches processed: {len(responses)}")
indexed_objects.sql ADDED
The diff for this file is too large to render. See raw diff
 
pars_samolet.sql ADDED
The diff for this file is too large to render. See raw diff
 
requirements-scripts.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Зависимости для вспомогательных скриптов
2
+ # Установка: pip install -r requirements-scripts.txt
3
+
4
+ psycopg2-binary>=2.9.9 # PostgreSQL адаптер
5
+ bcrypt>=4.1.2 # Хеширование паролей
6
+
test_db_connection.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Тестовый скрипт для проверки подключения к БД
3
+ """
4
+ try:
5
+ import psycopg2
6
+ print("✅ psycopg2 installed")
7
+ except ImportError:
8
+ print("❌ psycopg2 not installed. Run: pip install psycopg2-binary")
9
+ exit(1)
10
+
11
+ try:
12
+ import bcrypt
13
+ print("✅ bcrypt installed")
14
+ except ImportError:
15
+ print("❌ bcrypt not installed. Run: pip install bcrypt")
16
+ exit(1)
17
+
18
+ # Данные подключения
19
+ DB_CONFIG = {
20
+ 'host': 'dpg-d5ht8vi4d50c739akh2g-a.virginia-postgres.render.com',
21
+ 'port': 5432,
22
+ 'database': 'lead_exchange_bk',
23
+ 'user': 'lead_exchange_bk_user',
24
+ 'password': '8m2gtTRBW0iAr7nY2Aadzz0VcZBEVKYM'
25
+ }
26
+
27
+ print(f"\nConnecting to {DB_CONFIG['host']}...")
28
+
29
+ try:
30
+ conn = psycopg2.connect(**DB_CONFIG)
31
+ print("✅ Connected successfully!")
32
+
33
+ cursor = conn.cursor()
34
+
35
+ # Проверяем таблицы
36
+ cursor.execute("""
37
+ SELECT table_name
38
+ FROM information_schema.tables
39
+ WHERE table_schema = 'public'
40
+ ORDER BY table_name
41
+ """)
42
+
43
+ tables = cursor.fetchall()
44
+ print(f"\n📋 Tables in database:")
45
+ for table in tables:
46
+ print(f" - {table[0]}")
47
+
48
+ # Проверяем пользователей
49
+ cursor.execute("SELECT COUNT(*) FROM users")
50
+ user_count = cursor.fetchone()[0]
51
+ print(f"\n👤 Users count: {user_count}")
52
+
53
+ if user_count > 0:
54
+ cursor.execute("SELECT email, role FROM users LIMIT 5")
55
+ users = cursor.fetchall()
56
+ print(" Sample users:")
57
+ for email, role in users:
58
+ print(f" - {email} ({role})")
59
+
60
+ # Проверяем properties
61
+ cursor.execute("SELECT COUNT(*) FROM properties")
62
+ prop_count = cursor.fetchone()[0]
63
+ print(f"\n🏠 Properties count: {prop_count}")
64
+
65
+ cursor.close()
66
+ conn.close()
67
+
68
+ print("\n✅ All checks passed!")
69
+
70
+ except psycopg2.Error as e:
71
+ print(f"\n❌ Database error: {e}")
72
+ except Exception as e:
73
+ print(f"\n❌ Error: {e}")
74
+