File size: 2,625 Bytes
653865f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sqlite3
import pandas as pd
import logging
from pathlib import Path

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def init_sqlite_db(data_dir: str = "data"):
    data_path = Path(data_dir)
    db_path = data_path / "books.db"
    processed_csv = data_path / "books_processed.csv"
    basic_info_csv = data_path / "books_basic_info.csv"

    if not processed_csv.exists():
        logger.error(f"Source file {processed_csv} not found.")
        return

    logger.info("Loading CSV data into memory for migration...")
    df_p = pd.read_csv(processed_csv)
    
    # Normalize ISBN13 in processed df
    df_p['isbn13'] = df_p['isbn13'].astype(str).str.strip().str.replace(".0", "", regex=False)

    if basic_info_csv.exists():
        logger.info(f"Merging with {basic_info_csv}...")
        df_b = pd.read_csv(basic_info_csv)
        df_b['isbn13'] = df_b['isbn13'].astype(str).str.strip().str.replace(".0", "", regex=False)
        # Keep only unique info from basic_info
        df_b = df_b[['isbn13', 'isbn10', 'image', 'publisher', 'publishedDate']].drop_duplicates(subset=['isbn13'])
        df = pd.merge(df_p, df_b, on='isbn13', how='left')
    else:
        df = df_p

    # Fill NaN
    df = df.fillna("")

    logger.info(f"Connecting to {db_path}...")
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # 1. Create main books table
    logger.info("Creating 'books' table...")
    df.to_sql('books', conn, if_exists='replace', index=False)
    cursor.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_isbn13 ON books (isbn13)")
    cursor.execute("CREATE INDEX IF NOT EXISTS idx_isbn10 ON books (isbn10)")

    # 2. Create FTS5 virtual table for keyword search
    logger.info("Creating FTS5 virtual table 'books_fts'...")
    cursor.execute("DROP TABLE IF EXISTS books_fts")
    cursor.execute("""
        CREATE VIRTUAL TABLE books_fts USING fts5(
            isbn13 UNINDEXED,
            title,
            description,
            authors,
            simple_categories,
            content='books',
            tokenize='porter unicode61'
        )
    """)
    
    # Populate FTS index
    cursor.execute("""
        INSERT INTO books_fts(isbn13, title, description, authors, simple_categories)
        SELECT isbn13, title, description, authors, simple_categories FROM books
    """)

    conn.commit()
    
    # Verify
    cursor.execute("SELECT COUNT(*) FROM books")
    count = cursor.fetchone()[0]
    logger.info(f"Migration complete. {count} books imported into SQLite.")
    
    conn.close()

if __name__ == "__main__":
    init_sqlite_db()