import argparse import os import sqlite3 import lmdb def get_map_size(lmdb_path): data_path = os.path.join(lmdb_path, "data.mdb") try: size = os.path.getsize(data_path) except OSError: return 1048576 * 2 return max(size * 2, 1048576 * 2) def open_sqlite(path): conn = sqlite3.connect(path) conn.execute( """ CREATE TABLE IF NOT EXISTS pixif_cache ( post_id TEXT PRIMARY KEY, url TEXT ) """ ) conn.commit() return conn def upsert_rows(conn, rows): if not rows: return conn.executemany( """ INSERT INTO pixif_cache (post_id, url) VALUES (?, ?) ON CONFLICT(post_id) DO UPDATE SET url = excluded.url """, rows, ) def migrate(lmdb_path, sqlite_path, batch_size): map_size = get_map_size(lmdb_path) lmdb_env = lmdb.open( lmdb_path, subdir=True, readonly=True, lock=False, map_size=map_size, ) conn = open_sqlite(sqlite_path) rows = [] with lmdb_env.begin() as txn: cursor = txn.cursor() for key, value in cursor: post_id = key.decode("utf-8") url = value.decode("utf-8") if value else "" rows.append((post_id, url)) if len(rows) >= batch_size: with conn: upsert_rows(conn, rows) rows.clear() if rows: with conn: upsert_rows(conn, rows) conn.close() lmdb_env.close() def main(): parser = argparse.ArgumentParser(description="Migrate LMDB cache to SQLite.") parser.add_argument("--lmdb", default="db", help="Path to LMDB directory.") parser.add_argument("--sqlite", default="db.sqlite", help="Path to SQLite file.") parser.add_argument("--batch-size", type=int, default=1000, help="Rows per batch insert.") args = parser.parse_args() os.chdir(os.path.dirname(os.path.abspath(__file__))) migrate(args.lmdb, args.sqlite, args.batch_size) if __name__ == "__main__": main()