ORA / scripts /chat_ora.py
Abdalkaderdev's picture
Initial ORA deployment
5e0532d
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import sys
# Settings
BASE_MODEL = "unsloth/Llama-3.2-1B-Instruct"
ADAPTER_PATH = "important/finetuning/models/ora_adapter"
def chat():
print("Loading ORA (may take a minute)...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")
# 1. Load Base Model
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map=device,
low_cpu_mem_usage=True
)
# 2. Load Adapter
print(f"Loading adapter from {ADAPTER_PATH}...")
try:
model = PeftModel.from_pretrained(model, ADAPTER_PATH)
print("Adapter loaded successfully!")
except Exception as e:
print(f"Error loading adapter: {e}")
print("Running with Base Model only.")
# 3. Chat Loop
print("\n" + "="*40)
print("ORA: Peace be with you. How can I guide you today?")
print("="*40 + "\n")
history = []
# System Prompt
system_prompt = "You are ORA, a spiritual assistant specializing in theological insights and biblical wisdom. Provide discerning, compassionate, and doctrine-aware responses."
while True:
try:
user_input = input("You: ")
if user_input.lower() in ["quit", "exit"]:
break
# Construct Prompt (Llama 3 format)
messages = [
{"role": "system", "content": system_prompt},
]
# Add history (last 2 turns context)
messages.extend(history[-4:])
messages.append({"role": "user", "content": user_input})
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(device)
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = model.generate(
input_ids,
max_new_tokens=256,
eos_token_id=terminators,
do_sample=True,
temperature=0.7,
top_p=0.9,
)
response = outputs[0][input_ids.shape[-1]:]
decoded_response = tokenizer.decode(response, skip_special_tokens=True)
print(f"ORA: {decoded_response}\n")
history.append({"role": "user", "content": user_input})
history.append({"role": "assistant", "content": decoded_response})
except KeyboardInterrupt:
print("\nExiting...")
break
if __name__ == "__main__":
chat()