import sqlite3 import json import os import sys # Add the orchestrator directory to sys.path so we can import config sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from orchestrator.config import DB_PATH def main(): conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # Ensure cleaned_note has a processed flag try: cursor.execute("ALTER TABLE cleaned_note ADD COLUMN processed INTEGER DEFAULT 0;") conn.commit() except sqlite3.OperationalError: pass cursor.execute("SELECT id, cleaned_content FROM cleaned_note WHERE processed = 0") unprocessed_notes = cursor.fetchall() if not unprocessed_notes: print("No unprocessed cleaned notes found.") conn.close() return material_count = 0 for note_id, content_str in unprocessed_notes: try: data = json.loads(content_str) except (json.JSONDecodeError, TypeError): data = {} title = data.get("title", "").strip() body = data.get("content", "").strip() # We can extract title as one material, and body as another material if title: cursor.execute(""" INSERT INTO content_material (title, body, type) VALUES (?, ?, ?) """, (title, title, "title")) material_count += 1 if body: cursor.execute(""" INSERT INTO content_material (title, body, type) VALUES (?, ?, ?) """, (title[:20] + "..." if title else "Body Material", body, "body")) material_count += 1 cursor.execute("UPDATE cleaned_note SET processed = 1 WHERE id = ?", (note_id,)) conn.commit() print(f"Successfully extracted {material_count} materials from {len(unprocessed_notes)} cleaned notes.") conn.close() if __name__ == "__main__": main()