umairimran commited on
Commit
ff07c51
·
verified ·
1 Parent(s): 386a244

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -1
README.md CHANGED
@@ -4,4 +4,52 @@ license: llama3.1
4
  language:
5
  - en
6
  base_model: unsloth/llama-3-8b-bnb-4bit
7
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  language:
5
  - en
6
  base_model: unsloth/llama-3-8b-bnb-4bit
7
+ ---
8
+
9
+
10
+
11
+ from transformers import AutoTokenizer, TextStreamer
12
+ from peft import PeftModel, PeftConfig
13
+ from transformers import AutoModelForCausalLM
14
+ from unsloth import FastLanguageModel
15
+ import torch
16
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
17
+
18
+ ### Instruction:
19
+ {}
20
+
21
+ ### Input:
22
+ {}
23
+
24
+ ### Response:
25
+ {}"""
26
+ # Load the Peft configuration
27
+ config = PeftConfig.from_pretrained("umairimran/medical_chatbot_model_trained_on_ruslanmv_ai-medical-chatbot")
28
+
29
+ # Load the base model from Hugging Face with float16 data type
30
+ base_model = AutoModelForCausalLM.from_pretrained(
31
+ "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
32
+ torch_dtype=torch.float16, # Switch to float16
33
+ device_map="auto" # Automatically map model to available devices
34
+ )
35
+ # Apply the PeftModel to the base model
36
+ model = PeftModel.from_pretrained(base_model, "umairimran/medical_chatbot_model_trained_on_ruslanmv_ai-medical-chatbot")
37
+ # Initialize the tokenizer
38
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-bnb-4bit")
39
+ # Optimize the model for inference (this applies if using FastLanguageModel)
40
+ FastLanguageModel.for_inference(model)
41
+ # Prepare the input without using .to("cuda") because device_map="auto" handles it
42
+ inputs = tokenizer(
43
+ alpaca_prompt.format(
44
+ "hello doctor can you understand me i want to know about deseacse of flu", # instruction
45
+ "i dont know how to flu",
46
+ "", # output - leave this blank for generation!
47
+ ),
48
+ return_tensors="pt"
49
+ )
50
+
51
+ # Set up the streamer for output
52
+ text_streamer = TextStreamer(tokenizer)
53
+
54
+ # Generate the output
55
+ _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)