Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| 数据库管理器 - 支持新的 data 数据结构 | |
| 提供数据库操作的封装接口 | |
| """ | |
| import sqlite3 | |
| import json | |
| from pathlib import Path | |
| from typing import Dict, List, Optional, Tuple | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class DatabaseManager: | |
| """数据库管理器类""" | |
| def __init__(self, db_path: str = 'books.db'): | |
| """ | |
| 初始化数据库管理器 | |
| Args: | |
| db_path: 数据库文件路径 | |
| """ | |
| self.db_path = db_path | |
| self.conn = None | |
| self._connect() | |
| def _connect(self): | |
| """连接数据库""" | |
| try: | |
| self.conn = sqlite3.connect(self.db_path, check_same_thread=False) | |
| self.conn.row_factory = sqlite3.Row # 使用字典形式返回结果 | |
| logger.info(f"数据库连接成功: {self.db_path}") | |
| except Exception as e: | |
| logger.error(f"数据库连接失败: {e}") | |
| raise | |
| def close(self): | |
| """关闭数据库连接""" | |
| if self.conn: | |
| self.conn.close() | |
| logger.info("数据库连接已关闭") | |
| def _dict_from_row(self, row) -> Dict: | |
| """将SQLite Row对象转换为字典""" | |
| if row is None: | |
| return None | |
| return dict(zip(row.keys(), row)) | |
| # ==================== 书籍相关操作 ==================== | |
| def get_all_books(self) -> List[Dict]: | |
| """ | |
| 获取所有书籍列表 | |
| Returns: | |
| 书籍列表 | |
| """ | |
| cursor = self.conn.cursor() | |
| cursor.execute(''' | |
| SELECT | |
| market_book_id, market_book_name, market_book_cover, | |
| max_page, market_classify_name, grade_id, reel_id, | |
| create_date, source_dir | |
| FROM books | |
| ORDER BY grade_id, reel_id, market_book_id | |
| ''') | |
| books = [] | |
| for row in cursor.fetchall(): | |
| books.append(self._dict_from_row(row)) | |
| return books | |
| def get_book_by_id(self, book_id: int) -> Optional[Dict]: | |
| """ | |
| 根据ID获取书籍详情 | |
| Args: | |
| book_id: 书籍ID | |
| Returns: | |
| 书籍信息字典,如果不存在返回None | |
| """ | |
| cursor = self.conn.cursor() | |
| cursor.execute(''' | |
| SELECT * FROM books WHERE market_book_id = ? | |
| ''', (book_id,)) | |
| row = cursor.fetchone() | |
| return self._dict_from_row(row) | |
| def get_books_by_grade(self, grade_id: int) -> List[Dict]: | |
| """ | |
| 根据年级获取书籍列表 | |
| Args: | |
| grade_id: 年级ID | |
| Returns: | |
| 书籍列表 | |
| """ | |
| cursor = self.conn.cursor() | |
| cursor.execute(''' | |
| SELECT | |
| market_book_id, market_book_name, market_book_cover, | |
| max_page, market_classify_name, reel_id | |
| FROM books | |
| WHERE grade_id = ? | |
| ORDER BY reel_id, market_book_id | |
| ''', (grade_id,)) | |
| books = [] | |
| for row in cursor.fetchall(): | |
| books.append(self._dict_from_row(row)) | |
| return books | |
| # ==================== 页面相关操作 ==================== | |
| def get_book_pages(self, book_id: int) -> List[Dict]: | |
| """ | |
| 获取书籍的所有页面列表(目录) | |
| Args: | |
| book_id: 书籍ID | |
| Returns: | |
| 页面列表 | |
| """ | |
| cursor = self.conn.cursor() | |
| cursor.execute(''' | |
| SELECT page_id, page_number, origin_img_url, encrypt_img_url | |
| FROM pages | |
| WHERE book_id = ? | |
| ORDER BY page_number | |
| ''', (book_id,)) | |
| pages = [] | |
| for row in cursor.fetchall(): | |
| pages.append(self._dict_from_row(row)) | |
| return pages | |
| def get_page_by_number(self, book_id: int, page_number: int) -> Optional[Dict]: | |
| """ | |
| 根据页码获取页面信息(不含片段) | |
| Args: | |
| book_id: 书籍ID | |
| page_number: 页码 | |
| Returns: | |
| 页面信息字典 | |
| """ | |
| cursor = self.conn.cursor() | |
| cursor.execute(''' | |
| SELECT * FROM pages | |
| WHERE book_id = ? AND page_number = ? | |
| ''', (book_id, page_number)) | |
| row = cursor.fetchone() | |
| return self._dict_from_row(row) | |
| def get_page_content(self, book_id: int, page_number: int) -> Optional[Dict]: | |
| """ | |
| 获取页面的完整内容(含片段) | |
| Args: | |
| book_id: 书籍ID | |
| page_number: 页码 | |
| Returns: | |
| 包含pieces的完整页面数据 | |
| """ | |
| # 获取页面基本信息 | |
| page = self.get_page_by_number(book_id, page_number) | |
| if not page: | |
| return None | |
| # 获取页面片段 | |
| cursor = self.conn.cursor() | |
| cursor.execute(''' | |
| SELECT | |
| piece_id, original, translation, | |
| origin_sound_url, encrypt_sound_url, duration, | |
| coordinate_x, coordinate_y, coordinate_width, coordinate_height, | |
| is_evaluated, show_translation, rich_original, rich_translation | |
| FROM pieces | |
| WHERE page_id = ? | |
| ORDER BY piece_order | |
| ''', (page['page_id'],)) | |
| pieces = [] | |
| for row in cursor.fetchall(): | |
| piece = self._dict_from_row(row) | |
| # 构建coordinate对象 | |
| piece['coordinate'] = { | |
| 'x': piece.pop('coordinate_x'), | |
| 'y': piece.pop('coordinate_y'), | |
| 'width': piece.pop('coordinate_width'), | |
| 'height': piece.pop('coordinate_height') | |
| } | |
| pieces.append(piece) | |
| # 将pieces添加到page中 | |
| page['pieces'] = pieces | |
| page['piece_count'] = len(pieces) | |
| return page | |
| # ==================== 目录相关操作 ==================== | |
| def get_book_catalog(self, book_id: int) -> List[Dict]: | |
| """ | |
| 获取书籍目录结构 | |
| Args: | |
| book_id: 书籍ID | |
| Returns: | |
| 目录列表(树形结构) | |
| """ | |
| cursor = self.conn.cursor() | |
| cursor.execute(''' | |
| SELECT | |
| catalog_id, catalog_name, catalog_name_cn, | |
| start_page, end_page, thumbnail, parent_id, sort | |
| FROM catalogs | |
| WHERE book_id = ? | |
| ORDER BY sort | |
| ''', (book_id,)) | |
| catalogs = [] | |
| for row in cursor.fetchall(): | |
| catalogs.append(self._dict_from_row(row)) | |
| # 构建树形结构 | |
| catalog_dict = {cat['catalog_id']: dict(cat, children=[]) for cat in catalogs} | |
| tree = [] | |
| for catalog in catalogs: | |
| cat_id = catalog['catalog_id'] | |
| parent_id = catalog['parent_id'] | |
| if parent_id == 0: | |
| tree.append(catalog_dict[cat_id]) | |
| else: | |
| if parent_id in catalog_dict: | |
| catalog_dict[parent_id]['children'].append(catalog_dict[cat_id]) | |
| return tree | |
| # ==================== 搜索相关操作 ==================== | |
| def search_content(self, book_id: int, keyword: str, limit: int = 20) -> List[Dict]: | |
| """ | |
| 在书籍中搜索内容(使用FTS全文搜索) | |
| Args: | |
| book_id: 书籍ID | |
| keyword: 搜索关键词 | |
| limit: 返回结果数量限制 | |
| Returns: | |
| 搜索结果列表 | |
| """ | |
| cursor = self.conn.cursor() | |
| # 使用FTS5全文搜索 | |
| sql = ''' | |
| SELECT | |
| p.page_number, | |
| pc.piece_id, | |
| pc.original, | |
| pc.translation, | |
| pc.origin_sound_url | |
| FROM pieces_fts | |
| JOIN pieces pc ON pieces_fts.rowid = pc.piece_id | |
| JOIN pages p ON pc.page_id = p.page_id | |
| WHERE p.book_id = ? | |
| AND pieces_fts MATCH ? | |
| ORDER BY rank | |
| LIMIT ? | |
| ''' | |
| cursor.execute(sql, (book_id, keyword, limit)) | |
| results = [] | |
| for row in cursor.fetchall(): | |
| results.append(self._dict_from_row(row)) | |
| return results | |
| def search_all_books(self, keyword: str, limit: int = 50) -> List[Dict]: | |
| """ | |
| 在所有书籍中搜索内容 | |
| Args: | |
| keyword: 搜索关键词 | |
| limit: 返回结果数量限制 | |
| Returns: | |
| 搜索结果列表(包含书籍信息) | |
| """ | |
| cursor = self.conn.cursor() | |
| sql = ''' | |
| SELECT | |
| b.market_book_id, | |
| b.market_book_name, | |
| p.page_number, | |
| pc.piece_id, | |
| pc.original, | |
| pc.translation, | |
| pc.origin_sound_url | |
| FROM pieces_fts | |
| JOIN pieces pc ON pieces_fts.rowid = pc.piece_id | |
| JOIN pages p ON pc.page_id = p.page_id | |
| JOIN books b ON p.book_id = b.market_book_id | |
| WHERE pieces_fts MATCH ? | |
| ORDER BY rank | |
| LIMIT ? | |
| ''' | |
| cursor.execute(sql, (keyword, limit)) | |
| results = [] | |
| for row in cursor.fetchall(): | |
| results.append(self._dict_from_row(row)) | |
| return results | |
| # ==================== 统计相关操作 ==================== | |
| def get_book_statistics(self, book_id: int) -> Dict: | |
| """ | |
| 获取书籍统计信息 | |
| Args: | |
| book_id: 书籍ID | |
| Returns: | |
| 统计信息字典 | |
| """ | |
| cursor = self.conn.cursor() | |
| # 获取页面数 | |
| cursor.execute('SELECT COUNT(*) FROM pages WHERE book_id = ?', (book_id,)) | |
| page_count = cursor.fetchone()[0] | |
| # 获取片段数 | |
| cursor.execute(''' | |
| SELECT COUNT(*) FROM pieces | |
| WHERE page_id IN (SELECT page_id FROM pages WHERE book_id = ?) | |
| ''', (book_id,)) | |
| piece_count = cursor.fetchone()[0] | |
| # 获取有音频的片段数 | |
| cursor.execute(''' | |
| SELECT COUNT(*) FROM pieces | |
| WHERE page_id IN (SELECT page_id FROM pages WHERE book_id = ?) | |
| AND (origin_sound_url != '' OR encrypt_sound_url != '') | |
| ''', (book_id,)) | |
| audio_count = cursor.fetchone()[0] | |
| # 获取目录项数 | |
| cursor.execute('SELECT COUNT(*) FROM catalogs WHERE book_id = ?', (book_id,)) | |
| catalog_count = cursor.fetchone()[0] | |
| return { | |
| 'book_id': book_id, | |
| 'total_pages': page_count, | |
| 'total_pieces': piece_count, | |
| 'total_audio': audio_count, | |
| 'total_catalogs': catalog_count | |
| } | |
| def get_overall_statistics(self) -> Dict: | |
| """ | |
| 获取整体统计信息 | |
| Returns: | |
| 整体统计信息字典 | |
| """ | |
| cursor = self.conn.cursor() | |
| cursor.execute('SELECT COUNT(*) FROM books') | |
| book_count = cursor.fetchone()[0] | |
| cursor.execute('SELECT COUNT(*) FROM pages') | |
| page_count = cursor.fetchone()[0] | |
| cursor.execute('SELECT COUNT(*) FROM pieces') | |
| piece_count = cursor.fetchone()[0] | |
| cursor.execute('SELECT COUNT(*) FROM catalogs') | |
| catalog_count = cursor.fetchone()[0] | |
| return { | |
| 'total_books': book_count, | |
| 'total_pages': page_count, | |
| 'total_pieces': piece_count, | |
| 'total_catalogs': catalog_count | |
| } | |
| # 全局数据库实例 | |
| _db_instance = None | |
| def get_db_instance(db_path: str = 'books.db') -> DatabaseManager: | |
| """ | |
| 获取数据库管理器单例 | |
| Args: | |
| db_path: 数据库文件路径 | |
| Returns: | |
| DatabaseManager实例 | |
| """ | |
| global _db_instance | |
| if _db_instance is None: | |
| _db_instance = DatabaseManager(db_path) | |
| return _db_instance | |
| def close_db(): | |
| """关闭全局数据库连接""" | |
| global _db_instance | |
| if _db_instance: | |
| _db_instance.close() | |
| _db_instance = None | |