how to use it :

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Define the repository ID
REPO_ID = "iko-01/iko-v5e-1"

# Load the model and tokenizer
print("Loading model and tokenizer from", REPO_ID)
tokenizer = AutoTokenizer.from_pretrained(REPO_ID)
model = AutoModelForCausalLM.from_pretrained(REPO_ID)

# Determine device for generation
device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
    print("Using CUDA device for generation")
model.to(device)

def generate_from_user(user_text, max_new_tokens=250, do_sample=False):
    prompt = f"### User:\n{user_text.strip()}\n\n### Assistant:\n<think>"
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs.input_ids.to(device)
    attention_mask = inputs.attention_mask.to(device)

    gen = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        top_p=0.95,
        temperature=0.8,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
        repetition_penalty=1.1
    )
    out = tokenizer.decode(gen[0], skip_special_tokens=False)
    
    # Isolate the generated assistant part
    assistant_part = out.split("### Assistant:")[1].strip() if "### Assistant:" in out else out
    
    # Clean up <think> tags if they are still open
    if "<think>" in assistant_part and "</think>" not in assistant_part:
        assistant_part = assistant_part.replace("<think>", "", 1)

    return assistant_part

# Define three English test questions
test_questions = [
    "",
    "How can I calculate two numbers in Python code?",
    "What do you think about the death penalty in Egypt?"
]

# Generate and print responses
print("\n--- Testing Model with Questions ---")
for question in test_questions:
    print("\nUSER:", question)
    response = generate_from_user(question, max_new_tokens=250, do_sample=False)
    print("MODEL OUTPUT:\n", response)
    print("-" * 60)

print("\nTesting complete.")

تدريب : البداية : 100 2.420000 النهاية : 7400 2.190000

Downloads last month
5
Safetensors
Model size
0.2B params
Tensor type
F32
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Dataset used to train iko-01/iko-v5e-1