File size: 4,267 Bytes
aa08cd6 6cfe55f e8d1a53 6cfe55f aa08cd6 6cfe55f b98be44 aa08cd6 6cfe55f b98be44 aa08cd6 6cfe55f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | import glob
import json
import os
from app.db.models.app_config import AppConfig
from app.db.models.articles import ArticleItem, ArticleSubscription, ArticleSubscriptionItem
from app.db.models.download_job import DownloadJob
from app.db.models.models import Model
from app.db.models.note import Note
from app.db.models.providers import Provider
from app.db.models.trend_subscription import (
NotificationChannel,
TrendSubscription,
TrendSubscriptionMatch,
)
from app.db.models.video_tasks import VideoTask
from app.db.engine import get_engine, Base
from sqlalchemy import inspect, text
def init_db():
engine = get_engine()
Base.metadata.create_all(bind=engine)
_ensure_article_content_text(engine)
_ensure_model_provider_id_text(engine)
_import_legacy_notes(engine)
# 注:原 _ensure_model_columns 为 models.supports_multimodal 做的迁移已删除——
# 该列在「drop multimodal」重构后已不再被 ORM 使用(纯遗留),且它的
# `ALTER ... BOOLEAN NOT NULL DEFAULT 0` 在 Postgres 上会因 boolean 默认值类型不符直接报错。
# 已有 SQLite 库里残留的该列无害,保持不动即可。
def _ensure_model_provider_id_text(engine):
# 早期 Postgres 部署里 models.provider_id 被建成 INTEGER(ORM 旧定义),
# 但实际存的是字符串 provider id(如 "deepseek"),导致查询报
# invalid input syntax for type integer。这里把已有的整型列就地改成 VARCHAR。
# 仅 Postgres 需要;SQLite 动态类型无此问题。幂等(改完后类型名不含 INT 即跳过)。
if engine.dialect.name != "postgresql":
return
inspector = inspect(engine)
if "models" not in inspector.get_table_names():
return
for column in inspector.get_columns("models"):
if column["name"] != "provider_id":
continue
if "INT" in str(column["type"]).upper():
with engine.begin() as conn:
conn.execute(text(
"ALTER TABLE models ALTER COLUMN provider_id TYPE VARCHAR "
"USING provider_id::varchar"
))
break
def _import_legacy_notes(engine):
# 一次性导入:把旧的 note_results/{task_id}.json(+ {task_id}.status.json)迁进 notes 表。
# 仅当 notes 表为空时执行,幂等;HF 临时盘上目录为空时直接跳过。失败不影响启动。
from app.db.note_dao import save_note, set_status
note_dir = os.getenv("NOTE_OUTPUT_DIR", "note_results")
if not os.path.isdir(note_dir):
return
try:
with engine.connect() as conn:
count = conn.execute(text("SELECT COUNT(*) FROM notes")).scalar()
if count and int(count) > 0:
return
except Exception:
return
imported = 0
for path in glob.glob(os.path.join(note_dir, "*.json")):
name = os.path.basename(path)
stem = name[:-5] # 去掉 .json
# 跳过缓存/状态/检查点:转写、音频缓存、{id}.status、{key}.gpt.checkpoint 等。
# 正经笔记的 task_id 是 UUID(不含点),据此排除带点的派生文件。
if stem.endswith("_transcript") or stem.endswith("_audio") or "." in stem:
continue
try:
content = json.loads(open(path, "r", encoding="utf-8").read())
except Exception:
continue
save_note(stem, content)
status_path = os.path.join(note_dir, f"{stem}.status.json")
if os.path.exists(status_path):
try:
set_status(stem, json.loads(open(status_path, "r", encoding="utf-8").read()))
except Exception:
pass
imported += 1
if imported:
print(f"[init_db] 已从本地文件导入 {imported} 篇历史笔记到数据库")
def _ensure_article_content_text(engine):
inspector = inspect(engine)
if "article_items" not in inspector.get_table_names():
return
columns = {column["name"] for column in inspector.get_columns("article_items")}
if "content_text" in columns:
return
with engine.begin() as conn:
conn.execute(text("ALTER TABLE article_items ADD COLUMN content_text TEXT NOT NULL DEFAULT ''"))
|