File size: 1,989 Bytes
c481f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import sqlite3
import json
import os
import sys

# Add the orchestrator directory to sys.path so we can import config
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from orchestrator.config import DB_PATH

def main():
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    
    # Ensure cleaned_note has a processed flag
    try:
        cursor.execute("ALTER TABLE cleaned_note ADD COLUMN processed INTEGER DEFAULT 0;")
        conn.commit()
    except sqlite3.OperationalError:
        pass
        
    cursor.execute("SELECT id, cleaned_content FROM cleaned_note WHERE processed = 0")
    unprocessed_notes = cursor.fetchall()
    
    if not unprocessed_notes:
        print("No unprocessed cleaned notes found.")
        conn.close()
        return
        
    material_count = 0
    for note_id, content_str in unprocessed_notes:
        try:
            data = json.loads(content_str)
        except (json.JSONDecodeError, TypeError):
            data = {}
            
        title = data.get("title", "").strip()
        body = data.get("content", "").strip()
        
        # We can extract title as one material, and body as another material
        if title:
            cursor.execute("""
                INSERT INTO content_material (title, body, type)
                VALUES (?, ?, ?)
            """, (title, title, "title"))
            material_count += 1
            
        if body:
            cursor.execute("""
                INSERT INTO content_material (title, body, type)
                VALUES (?, ?, ?)
            """, (title[:20] + "..." if title else "Body Material", body, "body"))
            material_count += 1
            
        cursor.execute("UPDATE cleaned_note SET processed = 1 WHERE id = ?", (note_id,))
        
    conn.commit()
    print(f"Successfully extracted {material_count} materials from {len(unprocessed_notes)} cleaned notes.")
    conn.close()

if __name__ == "__main__":
    main()