import json import torch from transformers import AutoTokenizer, AutoModelForCausalLM with open("config.json") as f: cfg = json.load(f) tokenizer = AutoTokenizer.from_pretrained(cfg["base_model"]) model = AutoModelForCausalLM.from_pretrained( cfg["base_model"], torch_dtype=torch.float32, device_map="cpu" ) with open("prompt.txt") as f: BASE_SYSTEM_PROMPT = f.read().strip() reasoning_enabled = False def build_system_prompt(): if reasoning_enabled: return BASE_SYSTEM_PROMPT + "\n\nWhen solving problems, reason step by step and explain your thinking clearly." else: return BASE_SYSTEM_PROMPT + "\n\nGive concise, direct answers unless explanation is required." def chat(user_input): messages = [ {"role": "system", "content": build_system_prompt()}, {"role": "user", "content": user_input} ] input_ids = tokenizer.apply_chat_template( messages, return_tensors="pt" ) with torch.no_grad(): output = model.generate( input_ids, max_new_tokens=cfg["max_new_tokens"], temperature=cfg["temperature"], top_p=cfg["top_p"], do_sample=True ) return tokenizer.decode(output[0], skip_special_tokens=True) if __name__ == "__main__": global reasoning_enabled print("Brad AI 1.12.2x") print("Commands: /reason on | /reason off | exit") while True: user = input("You: ") if user.lower() in ("exit", "quit"): break if user.lower() == "/reason on": reasoning_enabled = True print("Reasoning mode ENABLED") continue if user.lower() == "/reason off": reasoning_enabled = False print("Reasoning mode DISABLED") continue print(chat(user))