ThesisProject / migrate_to_postgres.py
JeyBii's picture
Upload folder using huggingface_hub
2b9b5b5 verified
Raw
History Blame Contribute Delete
2.17 kB
"""
One-time migration: SQLite → PostgreSQL
=========================================
Copies all articles from data/news.db into the PostgreSQL factcheck database,
then deletes the old SQLite file.
Usage:
python migrate_to_postgres.py
"""
import sqlite3
import os
from db.database import get_connection, init_db
SQLITE_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "data", "news.db"
)
def migrate():
# 1. Ensure PostgreSQL table exists
init_db()
# 2. Check if SQLite DB exists
if not os.path.exists(SQLITE_PATH):
print(f"No SQLite database found at {SQLITE_PATH}. Nothing to migrate.")
return
# 3. Read all articles from SQLite
sqlite_conn = sqlite3.connect(SQLITE_PATH)
sqlite_conn.row_factory = sqlite3.Row
rows = sqlite_conn.execute("SELECT * FROM articles").fetchall()
articles = [dict(row) for row in rows]
sqlite_conn.close()
print(f"Found {len(articles)} articles in SQLite database.")
if not articles:
print("No articles to migrate.")
return
# 4. Insert into PostgreSQL
pg_conn = get_connection()
cursor = pg_conn.cursor()
inserted = 0
for article in articles:
cursor.execute(
"INSERT INTO articles (title, url, source, published, scraped_at) "
"VALUES (%s, %s, %s, %s, %s) "
"ON CONFLICT (url) DO NOTHING",
(
article["title"],
article["url"],
article["source"],
article.get("published", "N/A"),
article.get("scraped_at", ""),
),
)
if cursor.rowcount > 0:
inserted += 1
pg_conn.commit()
pg_conn.close()
print(f"Migrated {inserted} articles to PostgreSQL.")
if inserted < len(articles):
print(f" ({len(articles) - inserted} duplicates skipped)")
# 5. Delete old SQLite database
os.remove(SQLITE_PATH)
print(f"Deleted old SQLite database: {SQLITE_PATH}")
print("\nMigration complete!")
if __name__ == "__main__":
migrate()