| import sqlite3 |
| import json |
| import os |
| import sys |
|
|
| |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| from orchestrator.config import DB_PATH |
|
|
| def main(): |
| conn = sqlite3.connect(DB_PATH) |
| cursor = conn.cursor() |
| |
| |
| try: |
| cursor.execute("ALTER TABLE cleaned_note ADD COLUMN processed INTEGER DEFAULT 0;") |
| conn.commit() |
| except sqlite3.OperationalError: |
| pass |
| |
| cursor.execute("SELECT id, cleaned_content FROM cleaned_note WHERE processed = 0") |
| unprocessed_notes = cursor.fetchall() |
| |
| if not unprocessed_notes: |
| print("No unprocessed cleaned notes found.") |
| conn.close() |
| return |
| |
| material_count = 0 |
| for note_id, content_str in unprocessed_notes: |
| try: |
| data = json.loads(content_str) |
| except (json.JSONDecodeError, TypeError): |
| data = {} |
| |
| title = data.get("title", "").strip() |
| body = data.get("content", "").strip() |
| |
| |
| if title: |
| cursor.execute(""" |
| INSERT INTO content_material (title, body, type) |
| VALUES (?, ?, ?) |
| """, (title, title, "title")) |
| material_count += 1 |
| |
| if body: |
| cursor.execute(""" |
| INSERT INTO content_material (title, body, type) |
| VALUES (?, ?, ?) |
| """, (title[:20] + "..." if title else "Body Material", body, "body")) |
| material_count += 1 |
| |
| cursor.execute("UPDATE cleaned_note SET processed = 1 WHERE id = ?", (note_id,)) |
| |
| conn.commit() |
| print(f"Successfully extracted {material_count} materials from {len(unprocessed_notes)} cleaned notes.") |
| conn.close() |
|
|
| if __name__ == "__main__": |
| main() |
|
|