Update README.md
Browse files
README.md
CHANGED
|
@@ -48,44 +48,53 @@ import torch
|
|
| 48 |
from unsloth import FastLanguageModel
|
| 49 |
from transformers import AutoTokenizer
|
| 50 |
|
| 51 |
-
|
| 52 |
model_path = "moo100/DeepSeek-R1-telecom-chatbot"
|
| 53 |
-
model, tokenizer = FastLanguageModel.from_pretrained(model_path, max_seq_length=1024, dtype=None)
|
| 54 |
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
model = FastLanguageModel.for_inference(model)
|
| 57 |
|
| 58 |
-
Move model to GPU if available
|
| 59 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 60 |
model.to(device)
|
| 61 |
|
| 62 |
-
Define system instruction
|
| 63 |
system_instruction = """You are an AI assistant. Answer user questions concisely and factually.
|
| 64 |
Do NOT role-play as a customer service agent. Only answer the user's query."""
|
| 65 |
|
| 66 |
-
Define user input
|
| 67 |
user_input = "What are the benefits of 5G?"
|
| 68 |
|
| 69 |
-
Construct full prompt
|
| 70 |
full_prompt = f"{system_instruction}\n\nUser: {user_input}\nAssistant:"
|
| 71 |
|
| 72 |
-
Tokenize input
|
| 73 |
inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
|
| 74 |
|
| 75 |
-
Generate response
|
| 76 |
outputs = model.generate(
|
| 77 |
-
input_ids=inputs.input_ids,
|
| 78 |
-
attention_mask=inputs.attention_mask,
|
| 79 |
-
max_new_tokens=100,
|
| 80 |
-
do_sample=True,
|
| 81 |
-
temperature=0.5,
|
| 82 |
-
top_k=50,
|
| 83 |
-
eos_token_id=tokenizer.eos_token_id,
|
| 84 |
)
|
| 85 |
|
| 86 |
-
Decode and
|
| 87 |
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
|
| 91 |
## Training Details
|
|
|
|
| 48 |
from unsloth import FastLanguageModel
|
| 49 |
from transformers import AutoTokenizer
|
| 50 |
|
| 51 |
+
✅ Define model path (modify if using a different source)
|
| 52 |
model_path = "moo100/DeepSeek-R1-telecom-chatbot"
|
|
|
|
| 53 |
|
| 54 |
+
✅ Load model and tokenizer
|
| 55 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 56 |
+
model_path,
|
| 57 |
+
max_seq_length=1024, # Ensures compatibility with training length
|
| 58 |
+
dtype=None # Uses default precision
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
✅ Optimize model for **fast inference** with Unsloth
|
| 62 |
model = FastLanguageModel.for_inference(model)
|
| 63 |
|
| 64 |
+
✅ Move model to GPU if available, otherwise use CPU
|
| 65 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 66 |
model.to(device)
|
| 67 |
|
| 68 |
+
✅ Define system instruction to guide responses
|
| 69 |
system_instruction = """You are an AI assistant. Answer user questions concisely and factually.
|
| 70 |
Do NOT role-play as a customer service agent. Only answer the user's query."""
|
| 71 |
|
| 72 |
+
✅ Define user input (replace with any query)
|
| 73 |
user_input = "What are the benefits of 5G?"
|
| 74 |
|
| 75 |
+
✅ Construct full prompt with instructions and user query
|
| 76 |
full_prompt = f"{system_instruction}\n\nUser: {user_input}\nAssistant:"
|
| 77 |
|
| 78 |
+
✅ Tokenize input prompt
|
| 79 |
inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
|
| 80 |
|
| 81 |
+
✅ Generate model response with controlled stopping criteria
|
| 82 |
outputs = model.generate(
|
| 83 |
+
input_ids=inputs.input_ids, # Encoded input tokens
|
| 84 |
+
attention_mask=inputs.attention_mask, # Mask for input length
|
| 85 |
+
max_new_tokens=100, # Limits response length
|
| 86 |
+
do_sample=True, # Enables randomness for variability
|
| 87 |
+
temperature=0.5, # Controls randomness level
|
| 88 |
+
top_k=50, # Samples from top 50 probable words
|
| 89 |
+
eos_token_id=tokenizer.eos_token_id, # Stops at end-of-sentence token
|
| 90 |
)
|
| 91 |
|
| 92 |
+
✅ Decode and extract only the newly generated response
|
| 93 |
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
|
| 94 |
+
|
| 95 |
+
✅ Print the AI-generated response
|
| 96 |
+
print(response.strip())
|
| 97 |
+
|
| 98 |
|
| 99 |
|
| 100 |
## Training Details
|