In [1]:
from huggingface_hub import login
from dotenv import load_dotenv
import os
load_dotenv()

# Login to Hugging Face Hub
login(token=os.getenv("HUGGINGFACE_TOKEN"))

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = "google/gemma-2-2b-it"
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=".cache/")
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=".cache/")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
print(model)

Gemma2ForCausalLM(
  (model): Gemma2Model(
    (embed_tokens): Embedding(256000, 2304, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x Gemma2DecoderLayer(
        (self_attn): Gemma2Attention(
          (q_proj): Linear(in_features=2304, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2304, out_features=1024, bias=False)
          (v_proj): Linear(in_features=2304, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2304, bias=False)
          (rotary_emb): Gemma2RotaryEmbedding()
        )
        (mlp): Gemma2MLP(
          (gate_proj): Linear(in_features=2304, out_features=9216, bias=False)
          (up_proj): Linear(in_features=2304, out_features=9216, bias=False)
          (down_proj): Linear(in_features=9216, out_features=2304, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): Gemma2RMSNorm((2304,), eps=1e-06)
        (pre_feedforward_layernorm): Gemma2RMSNorm((2304,), eps

In [4]:
%%time
input_text = "I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac.    I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?"

input_ids = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**input_ids, max_length=128)
print(tokenizer.decode(outputs[0]))

<bos>I have so many issues to address. I have a history of sexual abuse, I’m a breast cancer survivor and I am a lifetime insomniac.    I have a long history of depression and I’m beginning to have anxiety. I have low self esteem but I’ve been happily married for almost 35 years.I’ve never had counseling about any of this. Do I have too many issues to address in counseling?

It's wonderful that you're recognizing the need for support and seeking help. You absolutely do not have too many issues to address in counseling. In fact, it's
CPU times: total: 31.2 s
Wall time: 16.6 s


Model after fine tuning

In [5]:
import torch
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the base model and tokenizer
model_name = "google/gemma-2-2b-it"
# Load the fine-tuned model
new_model = "gemma-2-2b-ft/"  # Replace with the path to your fine-tuned model

In [6]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    cache_dir=".cache/"
)
model = PeftModel.from_pretrained(base_model, new_model, cache_dir = ".cache/")
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(
    model_name, trust_remote_code=True, cache_dir=".cache/"
)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
%%time
input_ids = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**input_ids, max_length=128)
print(tokenizer.decode(outputs[0]))

KeyboardInterrupt: 

In [None]:
model.save_pretrained("gemma-2-2b-it-therapist")
model.push_to_hub("gemma-2-2b-it-therapist", use_auth_token=True, use_temp_dir=False)
tokenizer.save_pretrained("gemma-2-2b-it-therapist")
tokenizer.push_to_hub("gemma-2-2b-it-therapist", use_auth_token=True, use_temp_dir=False)



model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]



README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ryefoxlime/gemma-2-2b-it-therapist/commit/7ac88faf3ac432c4617e6e1b54969f12cc941e1e', commit_message='Upload tokenizer', commit_description='', oid='7ac88faf3ac432c4617e6e1b54969f12cc941e1e', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ryefoxlime/gemma-2-2b-it-therapist', endpoint='https://huggingface.co', repo_type='model', repo_id='ryefoxlime/gemma-2-2b-it-therapist'), pr_revision=None, pr_num=None)