| import argparse |
| import os |
| import sqlite3 |
|
|
| import lmdb |
|
|
|
|
| def get_map_size(lmdb_path): |
| data_path = os.path.join(lmdb_path, "data.mdb") |
| try: |
| size = os.path.getsize(data_path) |
| except OSError: |
| return 1048576 * 2 |
| return max(size * 2, 1048576 * 2) |
|
|
|
|
| def open_sqlite(path): |
| conn = sqlite3.connect(path) |
| conn.execute( |
| """ |
| CREATE TABLE IF NOT EXISTS pixif_cache ( |
| post_id TEXT PRIMARY KEY, |
| url TEXT |
| ) |
| """ |
| ) |
| conn.commit() |
| return conn |
|
|
|
|
| def upsert_rows(conn, rows): |
| if not rows: |
| return |
| conn.executemany( |
| """ |
| INSERT INTO pixif_cache (post_id, url) |
| VALUES (?, ?) |
| ON CONFLICT(post_id) DO UPDATE SET url = excluded.url |
| """, |
| rows, |
| ) |
|
|
|
|
| def migrate(lmdb_path, sqlite_path, batch_size): |
| map_size = get_map_size(lmdb_path) |
| lmdb_env = lmdb.open( |
| lmdb_path, |
| subdir=True, |
| readonly=True, |
| lock=False, |
| map_size=map_size, |
| ) |
| conn = open_sqlite(sqlite_path) |
|
|
| rows = [] |
| with lmdb_env.begin() as txn: |
| cursor = txn.cursor() |
| for key, value in cursor: |
| post_id = key.decode("utf-8") |
| url = value.decode("utf-8") if value else "" |
| rows.append((post_id, url)) |
|
|
| if len(rows) >= batch_size: |
| with conn: |
| upsert_rows(conn, rows) |
| rows.clear() |
|
|
| if rows: |
| with conn: |
| upsert_rows(conn, rows) |
|
|
| conn.close() |
| lmdb_env.close() |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Migrate LMDB cache to SQLite.") |
| parser.add_argument("--lmdb", default="db", help="Path to LMDB directory.") |
| parser.add_argument("--sqlite", default="db.sqlite", help="Path to SQLite file.") |
| parser.add_argument("--batch-size", type=int, default=1000, help="Rows per batch insert.") |
| args = parser.parse_args() |
|
|
| os.chdir(os.path.dirname(os.path.abspath(__file__))) |
| migrate(args.lmdb, args.sqlite, args.batch_size) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|