| import json | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| with open("config.json") as f: | |
| cfg = json.load(f) | |
| tokenizer = AutoTokenizer.from_pretrained(cfg["base_model"]) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| cfg["base_model"], | |
| torch_dtype=torch.float32, | |
| device_map="cpu" | |
| ) | |
| with open("prompt.txt") as f: | |
| BASE_SYSTEM_PROMPT = f.read().strip() | |
| reasoning_enabled = False | |
| def build_system_prompt(): | |
| if reasoning_enabled: | |
| return BASE_SYSTEM_PROMPT + "\n\nWhen solving problems, reason step by step and explain your thinking clearly." | |
| else: | |
| return BASE_SYSTEM_PROMPT + "\n\nGive concise, direct answers unless explanation is required." | |
| def chat(user_input): | |
| messages = [ | |
| {"role": "system", "content": build_system_prompt()}, | |
| {"role": "user", "content": user_input} | |
| ] | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, | |
| return_tensors="pt" | |
| ) | |
| with torch.no_grad(): | |
| output = model.generate( | |
| input_ids, | |
| max_new_tokens=cfg["max_new_tokens"], | |
| temperature=cfg["temperature"], | |
| top_p=cfg["top_p"], | |
| do_sample=True | |
| ) | |
| return tokenizer.decode(output[0], skip_special_tokens=True) | |
| if __name__ == "__main__": | |
| global reasoning_enabled | |
| print("Brad AI 1.12.2x") | |
| print("Commands: /reason on | /reason off | exit") | |
| while True: | |
| user = input("You: ") | |
| if user.lower() in ("exit", "quit"): | |
| break | |
| if user.lower() == "/reason on": | |
| reasoning_enabled = True | |
| print("Reasoning mode ENABLED") | |
| continue | |
| if user.lower() == "/reason off": | |
| reasoning_enabled = False | |
| print("Reasoning mode DISABLED") | |
| continue | |
| print(chat(user)) | |