moo100
/

DeepSeek-R1-telecom-chatbot

Transformers

Safetensors

Model card Files Files and versions

xet

Community

moo100 commited on Feb 10, 2025

Commit

84885e2

verified ·

1 Parent(s): 9afa8d5

Update README.md

Browse files

Files changed (1) hide show

README.md +27 -18

README.md CHANGED Viewed

@@ -48,44 +48,53 @@ import torch
 from unsloth import FastLanguageModel
 from transformers import AutoTokenizer
-Load model and tokenizer
 model_path = "moo100/DeepSeek-R1-telecom-chatbot"
-model, tokenizer = FastLanguageModel.from_pretrained(model_path, max_seq_length=1024, dtype=None)
-Optimize for fast inference
 model = FastLanguageModel.for_inference(model)
-Move model to GPU if available
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
-Define system instruction for guided response
 system_instruction = """You are an AI assistant. Answer user questions concisely and factually.
 Do NOT role-play as a customer service agent. Only answer the user's query."""
-Define user input
 user_input = "What are the benefits of 5G?"
-Construct full prompt
 full_prompt = f"{system_instruction}\n\nUser: {user_input}\nAssistant:"
-Tokenize input
 inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
-Generate response
 outputs = model.generate(
-    input_ids=inputs.input_ids,
-    attention_mask=inputs.attention_mask,
-    max_new_tokens=100,
-    do_sample=True,
-    temperature=0.5,
-    top_k=50,
-    eos_token_id=tokenizer.eos_token_id,
 )
-Decode and print response
 response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
-print(response.strip())
 ## Training Details

 from unsloth import FastLanguageModel
 from transformers import AutoTokenizer
+✅ Define model path (modify if using a different source)
 model_path = "moo100/DeepSeek-R1-telecom-chatbot"
+✅ Load model and tokenizer
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_path,
+    max_seq_length=1024,  # Ensures compatibility with training length
+    dtype=None  # Uses default precision
+)
+✅ Optimize model for **fast inference** with Unsloth
 model = FastLanguageModel.for_inference(model)
+✅ Move model to GPU if available, otherwise use CPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+✅ Define system instruction to guide responses
 system_instruction = """You are an AI assistant. Answer user questions concisely and factually.
 Do NOT role-play as a customer service agent. Only answer the user's query."""
+✅ Define user input (replace with any query)
 user_input = "What are the benefits of 5G?"
+✅ Construct full prompt with instructions and user query
 full_prompt = f"{system_instruction}\n\nUser: {user_input}\nAssistant:"
+✅ Tokenize input prompt
 inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
+✅ Generate model response with controlled stopping criteria
 outputs = model.generate(
+    input_ids=inputs.input_ids,  # Encoded input tokens
+    attention_mask=inputs.attention_mask,  # Mask for input length
+    max_new_tokens=100,  # Limits response length
+    do_sample=True,  # Enables randomness for variability
+    temperature=0.5,  # Controls randomness level
+    top_k=50,  # Samples from top 50 probable words
+    eos_token_id=tokenizer.eos_token_id,  # Stops at end-of-sentence token
 )
+✅ Decode and extract only the newly generated response
 response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
+✅ Print the AI-generated response
+print(response.strip())
 ## Training Details