|
|
--- |
|
|
license: mit |
|
|
--- |
|
|
Instruction-tuned model finetuned on WizardLMTeam/WizardLM_evol_instruct_V2_196k |
|
|
- Small enough to be run on a phone |
|
|
- 124 million parameters |
|
|
- Comparable performance to TinyLlama-Chat |
|
|
|
|
|
We ran some zero-shot tests to compare Lazarus Instruct with the much larger TinyLlama-Chat |
|
|
 |
|
|
|
|
|
|
|
|
|
|
|
## ๐ Usage |
|
|
|
|
|
You can interact with Lazarus using the script below: |
|
|
|
|
|
```python |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
|
|
|
print("CUDA Available:", torch.cuda.is_available()) |
|
|
|
|
|
model_name = "Aclevo/Lazarus-Instruct" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
model.eval() |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
model.to(device) |
|
|
|
|
|
system_prompt = ( |
|
|
"Your name is Lazarus. You are an intelligent AI assistant. You help users with whatever they need. " |
|
|
"You always think before answering, and explain your reasoning out loud step by step.\n" |
|
|
) |
|
|
|
|
|
chat_history = [] |
|
|
|
|
|
def chat(): |
|
|
print("Chatting with GPT-2 (type 'exit' to quit)\n") |
|
|
|
|
|
while True: |
|
|
user_input = input("You: ") |
|
|
if user_input.lower() == "exit": |
|
|
break |
|
|
|
|
|
chat_history.append(f"You: {user_input}") |
|
|
recent_history = chat_history[-6:] |
|
|
full_prompt = system_prompt + "\n".join(recent_history) + "\nAI:" |
|
|
|
|
|
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True).to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_length=inputs["input_ids"].shape[1] + 150, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
do_sample=True, |
|
|
top_k=100, |
|
|
top_p=0.92, |
|
|
temperature=0.7, |
|
|
eos_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) |
|
|
response = response.strip() |
|
|
|
|
|
bad_responses = {"I hope that", "I don't know", "", "I'm excited"} |
|
|
if response in bad_responses: |
|
|
print("AI: [Regenerating due to low-quality response]") |
|
|
continue |
|
|
|
|
|
print(f"AI: {response}") |
|
|
chat_history.append(f"AI: {response}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
chat() |
|
|
``` |
|
|
Please consider citing us if you find this model useful |
|
|
|
|
|
# Aclevo is not responsible for the misuse of this model |
|
|
|