moo100 commited on
Commit
84885e2
·
verified ·
1 Parent(s): 9afa8d5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -18
README.md CHANGED
@@ -48,44 +48,53 @@ import torch
48
  from unsloth import FastLanguageModel
49
  from transformers import AutoTokenizer
50
 
51
- Load model and tokenizer
52
  model_path = "moo100/DeepSeek-R1-telecom-chatbot"
53
- model, tokenizer = FastLanguageModel.from_pretrained(model_path, max_seq_length=1024, dtype=None)
54
 
55
- Optimize for fast inference
 
 
 
 
 
 
 
56
  model = FastLanguageModel.for_inference(model)
57
 
58
- Move model to GPU if available
59
  device = "cuda" if torch.cuda.is_available() else "cpu"
60
  model.to(device)
61
 
62
- Define system instruction for guided response
63
  system_instruction = """You are an AI assistant. Answer user questions concisely and factually.
64
  Do NOT role-play as a customer service agent. Only answer the user's query."""
65
 
66
- Define user input
67
  user_input = "What are the benefits of 5G?"
68
 
69
- Construct full prompt
70
  full_prompt = f"{system_instruction}\n\nUser: {user_input}\nAssistant:"
71
 
72
- Tokenize input
73
  inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
74
 
75
- Generate response
76
  outputs = model.generate(
77
- input_ids=inputs.input_ids,
78
- attention_mask=inputs.attention_mask,
79
- max_new_tokens=100,
80
- do_sample=True,
81
- temperature=0.5,
82
- top_k=50,
83
- eos_token_id=tokenizer.eos_token_id,
84
  )
85
 
86
- Decode and print response
87
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
88
- print(response.strip())
 
 
 
89
 
90
 
91
  ## Training Details
 
48
  from unsloth import FastLanguageModel
49
  from transformers import AutoTokenizer
50
 
51
+ Define model path (modify if using a different source)
52
  model_path = "moo100/DeepSeek-R1-telecom-chatbot"
 
53
 
54
+ Load model and tokenizer
55
+ model, tokenizer = FastLanguageModel.from_pretrained(
56
+ model_path,
57
+ max_seq_length=1024, # Ensures compatibility with training length
58
+ dtype=None # Uses default precision
59
+ )
60
+
61
+ ✅ Optimize model for **fast inference** with Unsloth
62
  model = FastLanguageModel.for_inference(model)
63
 
64
+ Move model to GPU if available, otherwise use CPU
65
  device = "cuda" if torch.cuda.is_available() else "cpu"
66
  model.to(device)
67
 
68
+ Define system instruction to guide responses
69
  system_instruction = """You are an AI assistant. Answer user questions concisely and factually.
70
  Do NOT role-play as a customer service agent. Only answer the user's query."""
71
 
72
+ Define user input (replace with any query)
73
  user_input = "What are the benefits of 5G?"
74
 
75
+ Construct full prompt with instructions and user query
76
  full_prompt = f"{system_instruction}\n\nUser: {user_input}\nAssistant:"
77
 
78
+ Tokenize input prompt
79
  inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
80
 
81
+ Generate model response with controlled stopping criteria
82
  outputs = model.generate(
83
+ input_ids=inputs.input_ids, # Encoded input tokens
84
+ attention_mask=inputs.attention_mask, # Mask for input length
85
+ max_new_tokens=100, # Limits response length
86
+ do_sample=True, # Enables randomness for variability
87
+ temperature=0.5, # Controls randomness level
88
+ top_k=50, # Samples from top 50 probable words
89
+ eos_token_id=tokenizer.eos_token_id, # Stops at end-of-sentence token
90
  )
91
 
92
+ Decode and extract only the newly generated response
93
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
94
+
95
+ ✅ Print the AI-generated response
96
+ print(response.strip())
97
+
98
 
99
 
100
  ## Training Details