import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) import os from nzfc_gram_runtime import NZFCGramLongMemoryChat from nzfc_gram_runtime.quality import attach_answer_quality_governor from nzfc_gram_runtime.large_document import attach_large_document_memory from nzfc_gram_runtime.diffusiongemma_adapter import attach_diffusiongemma_block_diffusion MODEL_ID = 'google/diffusiongemma-26B-A4B-it' LOAD_MODEL = os.environ.get('LOAD_MODEL', '0') == '1' bot = NZFCGramLongMemoryChat( repo_dir=str(ROOT), model_id=MODEL_ID, memory_db_path='./user_memory_diffusiongemma_generation.sqlite3', load_model=False, require_model=False, preload_static_memory=False, ) attach_large_document_memory(bot) attach_answer_quality_governor(bot) if not LOAD_MODEL: print('Set LOAD_MODEL=1 to load google/diffusiongemma-26B-A4B-it on suitable hardware.') print('[PASS] runtime initialized without loading base model') raise SystemExit(0) meta = attach_diffusiongemma_block_diffusion( bot, model_id=MODEL_ID, device_map='auto', dtype='auto', ) print(meta) out = bot.generate_answer( system_prompt='You are a concise assistant. Answer in one sentence.', user_prompt='Explain memory as evidence, not instruction.', max_new_tokens=80, ) print(out['answer'])