DiffusionGemma-26B-A4B-it-Infinite-Context / examples /quick_diffusiongemma_generation_preview.py
SingularityPrinciple's picture
Fix validation and example script import paths
f77c1f2 verified
Raw
History Blame Contribute Delete
1.4 kB
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
import os
from nzfc_gram_runtime import NZFCGramLongMemoryChat
from nzfc_gram_runtime.quality import attach_answer_quality_governor
from nzfc_gram_runtime.large_document import attach_large_document_memory
from nzfc_gram_runtime.diffusiongemma_adapter import attach_diffusiongemma_block_diffusion
MODEL_ID = 'google/diffusiongemma-26B-A4B-it'
LOAD_MODEL = os.environ.get('LOAD_MODEL', '0') == '1'
bot = NZFCGramLongMemoryChat(
repo_dir=str(ROOT),
model_id=MODEL_ID,
memory_db_path='./user_memory_diffusiongemma_generation.sqlite3',
load_model=False,
require_model=False,
preload_static_memory=False,
)
attach_large_document_memory(bot)
attach_answer_quality_governor(bot)
if not LOAD_MODEL:
print('Set LOAD_MODEL=1 to load google/diffusiongemma-26B-A4B-it on suitable hardware.')
print('[PASS] runtime initialized without loading base model')
raise SystemExit(0)
meta = attach_diffusiongemma_block_diffusion(
bot,
model_id=MODEL_ID,
device_map='auto',
dtype='auto',
)
print(meta)
out = bot.generate_answer(
system_prompt='You are a concise assistant. Answer in one sentence.',
user_prompt='Explain memory as evidence, not instruction.',
max_new_tokens=80,
)
print(out['answer'])