In [1]:
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device set to: {device}")


Device set to: cpu


In [3]:
# Load CLI Q&A dataset
with open("cli_questions.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Access the list of entries inside "data" key
qa_list = data["data"]

# Show a sample
print(f"Total entries: {len(qa_list)}")
print("Sample entry:", qa_list[0])


Total entries: 172
Sample entry: {'question': 'What is the intended use-case for git stash?', 'answer': 'Git stash is a convenience method to temporarily store your working changes. One key use-case is when you’ve started working on a new patch but realize you forgot something in your last commit. In such cases, you can stash your current work, amend the previous commit, and then pop the stash to resume work.\n\nExample:\n```\n# Stash current changes\ngit stash save\n\n# Fix and amend the previous commit\ngit add -u\ngit commit --amend\n\n# Restore your stashed changes\ngit stash pop\n```\n\nWhile creating temporary branches is also a valid approach, stash is often faster for quick save-and-resume workflows.', 'tags': ['git', 'git-stash']}


In [4]:
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
model.to(device)


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_emb): 

In [5]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)  # -1 for CPU


Device set to use cpu


In [6]:
# Pick sample questions
sample_questions = [entry["question"] for entry in qa_list[:5]]

# Generate and print answers
for i, question in enumerate(sample_questions):
    print(f"Q{i+1}: {question}")
    output = generator(question, max_new_tokens=150, do_sample=True, temperature=0.7)
    print(f"A{i+1}: {output[0]['generated_text']}\n{'-'*60}")


Q1: What is the intended use-case for git stash?
A1: What is the intended use-case for git stash?
------------------------------------------------------------
Q2: What is the difference between 'git pull' and 'git fetch'?
A2: What is the difference between 'git pull' and 'git fetch'?
------------------------------------------------------------
Q3: How do I undo the most recent local commits in Git?
A3: How do I undo the most recent local commits in Git?
------------------------------------------------------------
Q4: How do I delete a Git branch locally and remotely?
A4: How do I delete a Git branch locally and remotely?
------------------------------------------------------------
Q5: What is the intended use-case for git stash?
A5: What is the intended use-case for git stash?
------------------------------------------------------------


In [7]:
prompt = f"Q: {question}\nA:"
output = generator(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
print(output[0]["generated_text"])


Q: What is the intended use-case for git stash?
A: Git stash is a powerful tool that lets you store changes in one place and then later come back and finish that work. Git stashes changes to a temporary branch, which can be used to work on a task without committing the changes yet.
Git stash is used for tasks like modifying code, fixing bugs, and fixing merge conflicts during development. It is a powerful tool that can help you get work done faster and avoid the risk of losing changes.
Q: How do I


In [None]:
import json

# Load the dataset
with open("cli_questions.json", "r") as f:
    raw = json.load(f)
    data = raw["data"]  # ensure this matches your JSON structure

# Generate answers
results = []
for i, item in enumerate(data[:50]):  # run on subset first
    question = item["question"]
    prompt = f"Q: {question}\nA:"
    output = generator(prompt, max_new_tokens=150, temperature=0.7, do_sample=True)
    answer = output[0]["generated_text"].split("A:")[1].strip() if "A:" in output[0]["generated_text"] else output[0]["generated_text"]
    results.append({"question": question, "answer": answer})
    print(f"Q{i+1}: {question}\nA{i+1}: {answer}\n{'-'*60}")


Q1: What is the intended use-case for git stash?
A1: Git stash is part of the commit cycle. It's used to stage changes to a branch before committing them to the repository. This allows you to easily squash multiple commits into a single commit, without having to squash them individually. It's also used to temporarily store changes so that you can make changes to those changes before committing them.
Q: How does git stash work?
------------------------------------------------------------
Q2: What is the difference between 'git pull' and 'git fetch'?
A2: Git pull and Git fetch are two commands that are used to fetch changes made on another repository and update your local repository. Here's the difference between them:


*

*git pull pulls changes from another branch or remote repository into your local repository.

*git fetch fetches changes from a remote repository into your local repository.

*If you update your local repository with changes from the remote repository using git pull, 

In [1]:
!pip install transformers datasets peft accelerate bitsandbytes trl --quiet


In [5]:
print("Top-level keys:", data.keys() if isinstance(data, dict) else "Not a dict")
print("Preview:", str(data)[:500])  # Print first 500 chars of the content


Top-level keys: dict_keys(['data'])
Preview: {'data': [{'question': 'What is the intended use-case for git stash?', 'answer': 'Git stash is a convenience method to temporarily store your working changes. One key use-case is when you’ve started working on a new patch but realize you forgot something in your last commit. In such cases, you can stash your current work, amend the previous commit, and then pop the stash to resume work.\n\nExample:\n```\n# Stash current changes\ngit stash save\n\n# Fix and amend the previous commit\ngit add -u\n


In [6]:
import json
from datasets import Dataset

# Load and extract Q&A list
with open("cli_questions.json", "r") as f:
    raw = json.load(f)
    data_list = raw["data"]  # ✅ correct key now

# Convert to prompt/response format
for sample in data_list:
    sample["prompt"] = sample["question"]
    sample["response"] = sample["answer"]

# Create HuggingFace Dataset
dataset = Dataset.from_list(data_list)
dataset = dataset.train_test_split(test_size=0.1)

print("Loaded dataset:", dataset)


Loaded dataset: DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'tags', 'prompt', 'response'],
        num_rows: 154
    })
    test: Dataset({
        features: ['question', 'answer', 'tags', 'prompt', 'response'],
        num_rows: 18
    })
})


In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # or try "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    load_in_4bit=True  # For LoRA on low-resource
)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers and GPU quantization are unavailable.
None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'npu', 'cuda', 'mps', 'hpu', '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)', 'xpu'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'npu', 'cuda', 'mps', 'hpu', '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)', 'xpu'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",  # or torch.float32 if you get another dtype error
    device_map="cpu"     # force CPU since no supported GPU found
)


In [12]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


NameError: name 'torch' is not defined

In [13]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'npu', 'cuda', 'mps', 'hpu', '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)', 'xpu'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: None of the available devices `available_devices = None` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {'npu', 'cuda', 'mps', 'hpu', '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)', 'xpu'}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",   # This will still use CPU if no GPU is found
)


Some parameters are on the meta device because they were offloaded to the disk and cpu.


In [17]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float32  # or float16 if your CPU supports it
)


Some parameters are on the meta device because they were offloaded to the disk and cpu.


In [16]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device set to: {device}")


Device set to: cpu


In [1]:
import json
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training




In [1]:
import json
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training




In [2]:
import json

with open("cli_questions.json") as f:
    data = json.load(f)

# Check the top-level structure
print(type(data))  # Should print <class 'dict'>
print(data.keys())  # See what keys are at the top


<class 'dict'>
dict_keys(['data'])


In [3]:
import json
from datasets import Dataset

# Load the JSON and extract the list
with open("cli_questions.json") as f:
    raw = json.load(f)

qa_list = raw["data"]  # access the list inside the 'data' key

# Format for instruction tuning
formatted_data = [
    {"text": f"### Question:\n{item['question']}\n\n### Answer:\n{item['answer']}"}
    for item in qa_list
]

# Convert to Hugging Face dataset
dataset = Dataset.from_list(formatted_data)

# Preview
print(f"Loaded {len(dataset)} formatted examples")
print(dataset[0])


Loaded 172 formatted examples
{'text': '### Question:\nWhat is the intended use-case for git stash?\n\n### Answer:\nGit stash is a convenience method to temporarily store your working changes. One key use-case is when you’ve started working on a new patch but realize you forgot something in your last commit. In such cases, you can stash your current work, amend the previous commit, and then pop the stash to resume work.\n\nExample:\n```\n# Stash current changes\ngit stash save\n\n# Fix and amend the previous commit\ngit add -u\ngit commit --amend\n\n# Restore your stashed changes\ngit stash pop\n```\n\nWhile creating temporary branches is also a valid approach, stash is often faster for quick save-and-resume workflows.'}


In [4]:
from transformers import AutoTokenizer

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # You can switch to Phi-2 if you prefer

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token  # Needed for causal LM padding

# Tokenization function
def tokenize(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["text"])

tokenized_dataset.set_format(type="torch")
print(tokenized_dataset[0])


Map:   0%|          | 0/172 [00:00<?, ? examples/s]

{'input_ids': tensor([    1,   835,   894, 29901,    13,  5618,   338,   278,  9146,   671,
        29899,  4878,   363,  6315,   380,  1161, 29973,    13,    13,  2277,
        29937,   673, 29901,    13, 28712,   380,  1161,   338,   263, 29703,
         1158,   304,  5382,  6275,  3787,   596,  1985,  3620, 29889,  3118,
         1820,   671, 29899,  4878,   338,   746,   366, 30010,   345,  4687,
         1985,   373,   263,   716, 13261,   541, 16289,   366,  9640,  1554,
          297,   596,  1833,  9063, 29889,   512,  1316,  4251, 29892,   366,
          508,   380,  1161,   596,  1857,   664, 29892,   626,   355,   278,
         3517,  9063, 29892,   322,   769,  1835,   278,   380,  1161,   304,
          620,  2017,   664, 29889,    13,    13, 14023, 29901,    13, 28956,
           13, 29937,   624,  1161,  1857,  3620,    13,  5559,   380,  1161,
         4078,    13,    13, 29937, 24778,   322,   626,   355,   278,  3517,
         9063,    13,  5559,   788,   448, 29884, 

In [5]:
train_dataset = tokenized_dataset


In [6]:
# Use entire dataset as training set
train_dataset = tokenized_dataset


In [7]:
tokenized_dataset.save_to_disk("tokenized_dataset")


Saving the dataset (0/1 shards):   0%|          | 0/172 [00:00<?, ? examples/s]

In [8]:
from datasets import load_from_disk

# Load the saved dataset
tokenized_dataset = load_from_disk("tokenized_dataset")


In [9]:
train_dataset = tokenized_dataset  # Use full set for training since it's only 172 examples


In [10]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
from datasets import load_from_disk
import torch

# Load model and tokenizer (TinyLlama)
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Important for Trainer padding

model = AutoModelForCausalLM.from_pretrained(model_name)

# Setup LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Inject LoRA adapters
model = get_peft_model(model, lora_config)

# Load the tokenized dataset
dataset = load_from_disk("tokenized_dataset")

# Setup data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Training args
training_args = TrainingArguments(
    output_dir="./lora-tinyllama-output",
    per_device_train_batch_size=2,         # Small batch size for CPU
    gradient_accumulation_steps=4,
    num_train_epochs=1,                    # Reduce for quicker runs
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-4,
    fp16=False,                            # Don't use fp16 on CPU
    report_to="none"
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# Start training
trainer.train()


In [1]:
pip install -r requirements.txt


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'


In [1]:
login(token="hf_...")


NameError: name 'login' is not defined

In [3]:
import nbformat

# Load the notebook
nb_path = "training.ipynb"
with open(nb_path, "r", encoding="utf-8") as f:
    nb = nbformat.read(f, as_version=4)

# Remove outputs and hidden tokens
for cell in nb.cells:
    if "outputs" in cell:
        cell["outputs"] = []
    if "execution_count" in cell:
        cell["execution_count"] = None
    if cell["cell_type"] == "code":
        cell["source"] = cell["source"].replace("hf_", "REMOVED_TOKEN_")

# Save clean version
with open(nb_path, "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print("Notebook cleaned!")


Notebook cleaned!
