| import argparse | |
| import os | |
| import sqlite3 | |
| import lmdb | |
| def get_map_size(lmdb_path): | |
| data_path = os.path.join(lmdb_path, "data.mdb") | |
| try: | |
| size = os.path.getsize(data_path) | |
| except OSError: | |
| return 1048576 * 2 | |
| return max(size * 2, 1048576 * 2) | |
| def open_sqlite(path): | |
| conn = sqlite3.connect(path) | |
| conn.execute( | |
| """ | |
| CREATE TABLE IF NOT EXISTS pixif_cache ( | |
| post_id TEXT PRIMARY KEY, | |
| url TEXT | |
| ) | |
| """ | |
| ) | |
| conn.commit() | |
| return conn | |
| def upsert_rows(conn, rows): | |
| if not rows: | |
| return | |
| conn.executemany( | |
| """ | |
| INSERT INTO pixif_cache (post_id, url) | |
| VALUES (?, ?) | |
| ON CONFLICT(post_id) DO UPDATE SET url = excluded.url | |
| """, | |
| rows, | |
| ) | |
| def migrate(lmdb_path, sqlite_path, batch_size): | |
| map_size = get_map_size(lmdb_path) | |
| lmdb_env = lmdb.open( | |
| lmdb_path, | |
| subdir=True, | |
| readonly=True, | |
| lock=False, | |
| map_size=map_size, | |
| ) | |
| conn = open_sqlite(sqlite_path) | |
| rows = [] | |
| with lmdb_env.begin() as txn: | |
| cursor = txn.cursor() | |
| for key, value in cursor: | |
| post_id = key.decode("utf-8") | |
| url = value.decode("utf-8") if value else "" | |
| rows.append((post_id, url)) | |
| if len(rows) >= batch_size: | |
| with conn: | |
| upsert_rows(conn, rows) | |
| rows.clear() | |
| if rows: | |
| with conn: | |
| upsert_rows(conn, rows) | |
| conn.close() | |
| lmdb_env.close() | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Migrate LMDB cache to SQLite.") | |
| parser.add_argument("--lmdb", default="db", help="Path to LMDB directory.") | |
| parser.add_argument("--sqlite", default="db.sqlite", help="Path to SQLite file.") | |
| parser.add_argument("--batch-size", type=int, default=1000, help="Rows per batch insert.") | |
| args = parser.parse_args() | |
| os.chdir(os.path.dirname(os.path.abspath(__file__))) | |
| migrate(args.lmdb, args.sqlite, args.batch_size) | |
| if __name__ == "__main__": | |
| main() | |