XHS / orchestrator /seed_data.py
Trae Bot
Upload Spider_XHS project
c481f8a
import sqlite3
import os
# 获取当前脚本所在目录的绝对路径,并构造数据库文件路径
base_dir = os.path.dirname(os.path.abspath(__file__))
db_path = os.path.join(base_dir, 'data', 'mvp.db')
def seed_data():
print(f"正在连接数据库: {db_path}")
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 10个医疗健康相关的测试关键词
keywords = [
("医疗健康",),
("中医养生",),
("康复治疗",),
("三甲医院",),
("体检套餐",),
("健康饮食",),
("减肥瘦身",),
("心理咨询",),
("慢性病管理",),
("妇科健康",)
]
# 10个竞品账号(以医疗健康领域为例)
competitor_accounts = [
("丁香医生", "小红书"),
("春雨医生", "小红书"),
("平安好医生", "小红书"),
("微医", "小红书"),
("好大夫在线", "小红书"),
("企鹅杏仁", "小红书"),
("阿里健康", "小红书"),
("京东健康", "小红书"),
("医学界", "小红书"),
("健康时报", "小红书")
]
try:
# 清理已有测试数据(可选,防止重复运行脚本导致数据不断增加)
cursor.execute("DELETE FROM keyword")
cursor.execute("DELETE FROM competitor_account")
# 插入关键词数据
cursor.executemany(
"INSERT INTO keyword (word) VALUES (?)",
keywords
)
print(f"成功插入 {len(keywords)} 条关键词数据。")
# 插入竞品账号数据
cursor.executemany(
"INSERT INTO competitor_account (account_name, platform) VALUES (?, ?)",
competitor_accounts
)
print(f"成功插入 {len(competitor_accounts)} 条竞品账号数据。")
conn.commit()
print("数据保存成功!")
except Exception as e:
print(f"发生错误: {e}")
conn.rollback()
finally:
conn.close()
if __name__ == "__main__":
seed_data()