import json import sqlite3 import hashlib import time from datetime import datetime from src.knowledge.repository import KnowledgeRepository class KnowledgeBuilder: def __init__(self, db_path: str = "data/knowledge/build/knowledge.db"): self.repo = KnowledgeRepository(db_path) def build(self, consensus_file: str): self.repo.connect() self.repo.init_schema() with open(consensus_file, "r", encoding="utf-8") as f: consensus_data = json.load(f) conn = self.repo._conn cursor = conn.cursor() build_id = f"build_{int(time.time())}" entity_count = len(consensus_data) trait_count = 0 for entity_id, traits in consensus_data.items(): # Add entity (in a real scenario, names come from AniList mapping) self.repo.add_entity(entity_id, canonical_name=entity_id.replace("_", " ").title()) for trait in traits: trait_id = trait["trait"] # Add canonical trait cursor.execute("INSERT OR IGNORE INTO canonical_traits (trait_id, trait_type) VALUES (?, ?)", (trait_id, "visual")) # Add visual trait mapping cursor.execute(""" INSERT OR REPLACE INTO entity_visual_traits (entity_id, trait_id, confidence, frequency, source_count) VALUES (?, ?, ?, ?, ?) """, (entity_id, trait_id, trait["confidence"], trait["frequency"], trait["source_count"])) # Provenance Tracking provenance_id = hashlib.sha256(f"{entity_id}_{trait_id}_{build_id}".encode()).hexdigest() payload_hash = hashlib.sha256(str(trait).encode()).hexdigest() cursor.execute(""" INSERT OR REPLACE INTO trait_provenance (provenance_id, entity_id, trait_id, source_uri, extraction_timestamp, extractor_version, raw_payload_hash, trust_score) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (provenance_id, entity_id, trait_id, "consensus_engine", datetime.utcnow().isoformat(), "v3.1", payload_hash, trait["canonical_score"])) trait_count += 1 # Register build manifest cursor.execute(""" INSERT INTO knowledge_builds (build_id, build_timestamp, entity_count, trait_count) VALUES (?, ?, ?, ?) """, (build_id, datetime.utcnow().isoformat(), entity_count, trait_count)) conn.commit() self.repo.close() return build_id, entity_count, trait_count