Spaces:
Sleeping
Sleeping
| import pickle | |
| import sqlite3 | |
| import logging | |
| from pathlib import Path | |
| from tqdm import tqdm | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def migrate_itemcf(pkl_path, db_path): | |
| pkl_path = Path(pkl_path) | |
| db_path = Path(db_path) | |
| if not pkl_path.exists(): | |
| logger.error(f"Pickle not found at {pkl_path}") | |
| return | |
| logger.info(f"Loading massive pickle {pkl_path} (1.4GB)... This may take a minute.") | |
| with open(pkl_path, 'rb') as f: | |
| data = pickle.load(f) | |
| sim_matrix = data.get('sim_matrix', {}) | |
| user_hist = data.get('user_hist', {}) | |
| logger.info(f"Connecting to SQLite {db_path}...") | |
| conn = sqlite3.connect(db_path) | |
| cursor = conn.cursor() | |
| # Create tables | |
| cursor.execute("DROP TABLE IF EXISTS item_similarity") | |
| cursor.execute(""" | |
| CREATE TABLE item_similarity ( | |
| item1 TEXT, | |
| item2 TEXT, | |
| score REAL | |
| ) | |
| """) | |
| cursor.execute("DROP TABLE IF EXISTS user_history") | |
| cursor.execute(""" | |
| CREATE TABLE user_history ( | |
| user_id TEXT, | |
| isbn TEXT | |
| ) | |
| """) | |
| # Insert Item Similarity | |
| logger.info("Inserting item similarity data...") | |
| batch = [] | |
| for item1, related in tqdm(sim_matrix.items(), desc="ItemCF Similarity"): | |
| for item2, score in related.items(): | |
| batch.append((item1, item2, score)) | |
| if len(batch) >= 100000: | |
| cursor.executemany("INSERT INTO item_similarity VALUES (?, ?, ?)", batch) | |
| batch = [] | |
| if batch: | |
| cursor.executemany("INSERT INTO item_similarity VALUES (?, ?, ?)", batch) | |
| # Insert User History | |
| logger.info("Inserting user history data...") | |
| batch = [] | |
| for user_id, isbns in tqdm(user_hist.items(), desc="User History"): | |
| for isbn in isbns: | |
| batch.append((user_id, isbn)) | |
| if len(batch) >= 100000: | |
| cursor.executemany("INSERT INTO user_history VALUES (?, ?)", batch) | |
| batch = [] | |
| if batch: | |
| cursor.executemany("INSERT INTO user_history VALUES (?, ?)", batch) | |
| # Create Indices | |
| logger.info("Creating indices...") | |
| cursor.execute("CREATE INDEX idx_item1 ON item_similarity(item1)") | |
| cursor.execute("CREATE INDEX idx_user ON user_history(user_id)") | |
| conn.commit() | |
| conn.close() | |
| logger.info("Migration complete.") | |
| if __name__ == "__main__": | |
| migrate_itemcf("data/model/recall/itemcf.pkl", "data/recall_models.db") | |