JairoDanielMT's picture
Upload folder using huggingface_hub
4ef6c2b verified
Raw
History Blame Contribute Delete
2.83 kB
import json
import sqlite3
import hashlib
import time
from datetime import datetime
from src.knowledge.repository import KnowledgeRepository
class KnowledgeBuilder:
def __init__(self, db_path: str = "data/knowledge/build/knowledge.db"):
self.repo = KnowledgeRepository(db_path)
def build(self, consensus_file: str):
self.repo.connect()
self.repo.init_schema()
with open(consensus_file, "r", encoding="utf-8") as f:
consensus_data = json.load(f)
conn = self.repo._conn
cursor = conn.cursor()
build_id = f"build_{int(time.time())}"
entity_count = len(consensus_data)
trait_count = 0
for entity_id, traits in consensus_data.items():
# Add entity (in a real scenario, names come from AniList mapping)
self.repo.add_entity(entity_id, canonical_name=entity_id.replace("_", " ").title())
for trait in traits:
trait_id = trait["trait"]
# Add canonical trait
cursor.execute("INSERT OR IGNORE INTO canonical_traits (trait_id, trait_type) VALUES (?, ?)", (trait_id, "visual"))
# Add visual trait mapping
cursor.execute("""
INSERT OR REPLACE INTO entity_visual_traits
(entity_id, trait_id, confidence, frequency, source_count)
VALUES (?, ?, ?, ?, ?)
""", (entity_id, trait_id, trait["confidence"], trait["frequency"], trait["source_count"]))
# Provenance Tracking
provenance_id = hashlib.sha256(f"{entity_id}_{trait_id}_{build_id}".encode()).hexdigest()
payload_hash = hashlib.sha256(str(trait).encode()).hexdigest()
cursor.execute("""
INSERT OR REPLACE INTO trait_provenance
(provenance_id, entity_id, trait_id, source_uri, extraction_timestamp, extractor_version, raw_payload_hash, trust_score)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (provenance_id, entity_id, trait_id, "consensus_engine", datetime.utcnow().isoformat(), "v3.1", payload_hash, trait["canonical_score"]))
trait_count += 1
# Register build manifest
cursor.execute("""
INSERT INTO knowledge_builds (build_id, build_timestamp, entity_count, trait_count)
VALUES (?, ?, ?, ?)
""", (build_id, datetime.utcnow().isoformat(), entity_count, trait_count))
conn.commit()
self.repo.close()
return build_id, entity_count, trait_count