XHS / orchestrator /db_init.py
Trae Bot
Upload Spider_XHS project
c481f8a
import sqlite3
import os
import sys
# Add the orchestrator directory to sys.path so we can import config
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from config import DB_PATH
def init_db():
# Ensure directory exists
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# 1. keyword
cursor.execute('''
CREATE TABLE IF NOT EXISTS keyword (
id INTEGER PRIMARY KEY AUTOINCREMENT,
word TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# 2. competitor_account
cursor.execute('''
CREATE TABLE IF NOT EXISTS competitor_account (
id INTEGER PRIMARY KEY AUTOINCREMENT,
account_name TEXT NOT NULL,
platform TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# 3. own_account
cursor.execute('''
CREATE TABLE IF NOT EXISTS own_account (
id INTEGER PRIMARY KEY AUTOINCREMENT,
account_name TEXT NOT NULL,
platform TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# 4. crawl_task
cursor.execute('''
CREATE TABLE IF NOT EXISTS crawl_task (
id INTEGER PRIMARY KEY AUTOINCREMENT,
target_type TEXT,
target_value TEXT,
status TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# 5. raw_note
cursor.execute('''
CREATE TABLE IF NOT EXISTS raw_note (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_platform TEXT,
content TEXT,
author TEXT,
url TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# 6. cleaned_note
cursor.execute('''
CREATE TABLE IF NOT EXISTS cleaned_note (
id INTEGER PRIMARY KEY AUTOINCREMENT,
raw_note_id INTEGER,
cleaned_content TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (raw_note_id) REFERENCES raw_note(id)
)
''')
# 7. content_material
cursor.execute('''
CREATE TABLE IF NOT EXISTS content_material (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
body TEXT,
type TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# 8. prompt_template
cursor.execute('''
CREATE TABLE IF NOT EXISTS prompt_template (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT,
template_content TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# Insert default compliance prompt if table is empty
cursor.execute("SELECT count(*) FROM prompt_template")
if cursor.fetchone()[0] == 0:
system_prompt = """Rewrite the following material to be engaging for Xiaohongshu.
IMPORTANT COMPLIANCE GUIDELINES (小红书笔记十不要原则):
1. 严禁低俗擦边(如性暗示、暴露)和引人不适(如血腥、恐怖)的内容。
2. 严禁模板化洗稿,追求原创真实,不高度雷同。
3. 禁止利用特殊身份(如医生、专家)进行无授权的营销背书。
4. 禁止虚假价格、虚构原价或虚假承诺(如发货时效不符)。
5. 禁止过度修图、滤镜失真、P图夸大商品效果,避免货不对板。
6. 禁止向微信、淘宝、抖音、B站、微博等第三方平台导流,禁止留联系方式。
7. 禁止付费互动,禁止通过送福利等方式诱导用户点赞、关注或抽奖。
8. 严禁炫富内容(如晒账单、鼓吹财富自由等超出常人消费能力的行为)。
9. 禁止网络暴力、挂人或挑起对立。
10. 禁止使用极限用语(如国家级、最高级、第一等)、封建迷信用语及虚假医疗功效用语。
Please ensure the rewritten content strictly follows the above guidelines.
Material to rewrite:"""
cursor.execute("INSERT INTO prompt_template (name, template_content) VALUES (?, ?)", ("Compliance Rewrite", system_prompt))
# 9. generated_post
cursor.execute('''
CREATE TABLE IF NOT EXISTS generated_post (
id INTEGER PRIMARY KEY AUTOINCREMENT,
material_id INTEGER,
prompt_id INTEGER,
content TEXT,
status TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (material_id) REFERENCES content_material(id),
FOREIGN KEY (prompt_id) REFERENCES prompt_template(id)
)
''')
# 10. publish_record
cursor.execute('''
CREATE TABLE IF NOT EXISTS publish_record (
id INTEGER PRIMARY KEY AUTOINCREMENT,
post_id INTEGER,
account_id INTEGER,
platform TEXT,
status TEXT,
publish_url TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (post_id) REFERENCES generated_post(id),
FOREIGN KEY (account_id) REFERENCES own_account(id)
)
''')
# 11. interaction_record
cursor.execute('''
CREATE TABLE IF NOT EXISTS interaction_record (
id INTEGER PRIMARY KEY AUTOINCREMENT,
publish_record_id INTEGER,
user_name TEXT,
interaction_type TEXT,
content TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (publish_record_id) REFERENCES publish_record(id)
)
''')
# 12. lead
cursor.execute('''
CREATE TABLE IF NOT EXISTS lead (
id INTEGER PRIMARY KEY AUTOINCREMENT,
interaction_id INTEGER,
contact_info TEXT,
status TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (interaction_id) REFERENCES interaction_record(id)
)
''')
# 13. bd_followup
cursor.execute('''
CREATE TABLE IF NOT EXISTS bd_followup (
id INTEGER PRIMARY KEY AUTOINCREMENT,
lead_id INTEGER,
followup_content TEXT,
followup_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (lead_id) REFERENCES lead(id)
)
''')
# 14. system_error_log
cursor.execute('''
CREATE TABLE IF NOT EXISTS system_error_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
module_name TEXT,
error_message TEXT,
stack_trace TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
conn.commit()
conn.close()
print(f"Database initialized successfully with 14 tables at {DB_PATH}")
if __name__ == "__main__":
init_db()