from transformers import AutoModelForCausalLM, AutoTokenizer import torch model = AutoModelForCausalLM.from_pretrained( "Girinath11/recursive-language-model-198m", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained( "Girinath11/recursive-language-model-198m", trust_remote_code=True ) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) model.eval() print(f"āœ… Model loaded on {device}") print(f"šŸ“Š Parameters: {sum(p.numel() for p in model.parameters()):,}\n") def chat(question, max_new_tokens=150, temperature=0.7, top_p=0.9): prompt = f"<|user|>\n{question}\n<|assistant|>\n" inputs = tokenizer( prompt, return_tensors="pt", add_special_tokens=False ).to(device) with torch.no_grad(): outputs = model.generate( inputs['input_ids'], max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True, ) full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) if "<|assistant|>" in full_text: response = full_text.split("<|assistant|>")[-1].strip() else: response = full_text.replace(question, "").strip() return response questions = [ "What is machine learning?", "What is Python programming?", "Explain neural networks simply", "What is artificial intelligence?", ] for q in questions: print(f"\nā“ {q}") print(f"šŸ’¬ {chat(q)}")