how to use it :
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Define the repository ID
REPO_ID = "iko-01/iko-v5e-1"
# Load the model and tokenizer
print("Loading model and tokenizer from", REPO_ID)
tokenizer = AutoTokenizer.from_pretrained(REPO_ID)
model = AutoModelForCausalLM.from_pretrained(REPO_ID)
# Determine device for generation
device = "cpu"
if torch.cuda.is_available():
device = "cuda"
print("Using CUDA device for generation")
model.to(device)
def generate_from_user(user_text, max_new_tokens=250, do_sample=False):
prompt = f"### User:\n{user_text.strip()}\n\n### Assistant:\n<think>"
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs.input_ids.to(device)
attention_mask = inputs.attention_mask.to(device)
gen = model.generate(
input_ids,
attention_mask=attention_mask,
max_new_tokens=max_new_tokens,
do_sample=do_sample,
top_p=0.95,
temperature=0.8,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
num_return_sequences=1,
repetition_penalty=1.1
)
out = tokenizer.decode(gen[0], skip_special_tokens=False)
# Isolate the generated assistant part
assistant_part = out.split("### Assistant:")[1].strip() if "### Assistant:" in out else out
# Clean up <think> tags if they are still open
if "<think>" in assistant_part and "</think>" not in assistant_part:
assistant_part = assistant_part.replace("<think>", "", 1)
return assistant_part
# Define three English test questions
test_questions = [
"",
"How can I calculate two numbers in Python code?",
"What do you think about the death penalty in Egypt?"
]
# Generate and print responses
print("\n--- Testing Model with Questions ---")
for question in test_questions:
print("\nUSER:", question)
response = generate_from_user(question, max_new_tokens=250, do_sample=False)
print("MODEL OUTPUT:\n", response)
print("-" * 60)
print("\nTesting complete.")
تدريب : البداية : 100 2.420000 النهاية : 7400 2.190000
- Downloads last month
- 5