Spaces:
Sleeping
Sleeping
| import json | |
| import sqlite3 | |
| import hashlib | |
| import time | |
| from datetime import datetime | |
| from src.knowledge.repository import KnowledgeRepository | |
| class KnowledgeBuilder: | |
| def __init__(self, db_path: str = "data/knowledge/build/knowledge.db"): | |
| self.repo = KnowledgeRepository(db_path) | |
| def build(self, consensus_file: str): | |
| self.repo.connect() | |
| self.repo.init_schema() | |
| with open(consensus_file, "r", encoding="utf-8") as f: | |
| consensus_data = json.load(f) | |
| conn = self.repo._conn | |
| cursor = conn.cursor() | |
| build_id = f"build_{int(time.time())}" | |
| entity_count = len(consensus_data) | |
| trait_count = 0 | |
| for entity_id, traits in consensus_data.items(): | |
| # Add entity (in a real scenario, names come from AniList mapping) | |
| self.repo.add_entity(entity_id, canonical_name=entity_id.replace("_", " ").title()) | |
| for trait in traits: | |
| trait_id = trait["trait"] | |
| # Add canonical trait | |
| cursor.execute("INSERT OR IGNORE INTO canonical_traits (trait_id, trait_type) VALUES (?, ?)", (trait_id, "visual")) | |
| # Add visual trait mapping | |
| cursor.execute(""" | |
| INSERT OR REPLACE INTO entity_visual_traits | |
| (entity_id, trait_id, confidence, frequency, source_count) | |
| VALUES (?, ?, ?, ?, ?) | |
| """, (entity_id, trait_id, trait["confidence"], trait["frequency"], trait["source_count"])) | |
| # Provenance Tracking | |
| provenance_id = hashlib.sha256(f"{entity_id}_{trait_id}_{build_id}".encode()).hexdigest() | |
| payload_hash = hashlib.sha256(str(trait).encode()).hexdigest() | |
| cursor.execute(""" | |
| INSERT OR REPLACE INTO trait_provenance | |
| (provenance_id, entity_id, trait_id, source_uri, extraction_timestamp, extractor_version, raw_payload_hash, trust_score) | |
| VALUES (?, ?, ?, ?, ?, ?, ?, ?) | |
| """, (provenance_id, entity_id, trait_id, "consensus_engine", datetime.utcnow().isoformat(), "v3.1", payload_hash, trait["canonical_score"])) | |
| trait_count += 1 | |
| # Register build manifest | |
| cursor.execute(""" | |
| INSERT INTO knowledge_builds (build_id, build_timestamp, entity_count, trait_count) | |
| VALUES (?, ?, ?, ?) | |
| """, (build_id, datetime.utcnow().isoformat(), entity_count, trait_count)) | |
| conn.commit() | |
| self.repo.close() | |
| return build_id, entity_count, trait_count | |