p / Client /migrate_to_sqlite.py
q6's picture
change client to sqlite
a1a9773
raw
history blame
2.1 kB
import argparse
import os
import sqlite3
import lmdb
def get_map_size(lmdb_path):
data_path = os.path.join(lmdb_path, "data.mdb")
try:
size = os.path.getsize(data_path)
except OSError:
return 1048576 * 2
return max(size * 2, 1048576 * 2)
def open_sqlite(path):
conn = sqlite3.connect(path)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS pixif_cache (
post_id TEXT PRIMARY KEY,
url TEXT
)
"""
)
conn.commit()
return conn
def upsert_rows(conn, rows):
if not rows:
return
conn.executemany(
"""
INSERT INTO pixif_cache (post_id, url)
VALUES (?, ?)
ON CONFLICT(post_id) DO UPDATE SET url = excluded.url
""",
rows,
)
def migrate(lmdb_path, sqlite_path, batch_size):
map_size = get_map_size(lmdb_path)
lmdb_env = lmdb.open(
lmdb_path,
subdir=True,
readonly=True,
lock=False,
map_size=map_size,
)
conn = open_sqlite(sqlite_path)
rows = []
with lmdb_env.begin() as txn:
cursor = txn.cursor()
for key, value in cursor:
post_id = key.decode("utf-8")
url = value.decode("utf-8") if value else ""
rows.append((post_id, url))
if len(rows) >= batch_size:
with conn:
upsert_rows(conn, rows)
rows.clear()
if rows:
with conn:
upsert_rows(conn, rows)
conn.close()
lmdb_env.close()
def main():
parser = argparse.ArgumentParser(description="Migrate LMDB cache to SQLite.")
parser.add_argument("--lmdb", default="db", help="Path to LMDB directory.")
parser.add_argument("--sqlite", default="db.sqlite", help="Path to SQLite file.")
parser.add_argument("--batch-size", type=int, default=1000, help="Rows per batch insert.")
args = parser.parse_args()
os.chdir(os.path.dirname(os.path.abspath(__file__)))
migrate(args.lmdb, args.sqlite, args.batch_size)
if __name__ == "__main__":
main()