omdeep22 commited on
Commit
9729d95
·
verified ·
1 Parent(s): 84ef404

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +25 -29
README.md CHANGED
@@ -32,43 +32,39 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
32
 
33
  model_id = "omdeep22/Gonyai-v1"
34
 
35
- # 1. Load Tokenizer and Model
 
 
36
  tokenizer = AutoTokenizer.from_pretrained(model_id)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  model_id,
39
  trust_remote_code=True,
40
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
41
- ).to("cuda" if torch.cuda.is_available() else "cpu")
42
-
43
- # 2. Define your prompt using the Chat Template
44
- # This automatically handles the <|user|> and <|assistant|> tags
45
- messages = [
46
- {"role": "user", "content": "गोंयच्या पावसाचेर एक कविता बरोव."}
47
- ]
48
 
49
- tokenized_chat = tokenizer.apply_chat_template(
 
50
  messages,
51
  tokenize=True,
52
  add_generation_prompt=True,
53
- return_tensors="pt"
54
- ).to(model.device)
 
55
 
56
- # 3. Optimized Inference Settings for 160M Architecture
57
- outputs = model.generate(
58
- tokenized_chat,
59
- max_new_tokens=80, # Prevents rambling/hallucinations
60
- min_new_tokens=10, # Ensures a meaningful response
61
- temperature=0.3, # Keeps the model focused and logical
62
- top_k=40, # Filters out low-probability noise
63
- top_p=0.9, # Nucleus sampling for coherence
64
- repetition_penalty=1.2, # Prevents looping in small models
65
- do_sample=True,
66
- eos_token_id=tokenizer.eos_token_id,
67
- pad_token_id=tokenizer.eos_token_id
68
- )
69
 
70
- # 4. Decode only the NEW tokens generated by the assistant
71
- generated_tokens = outputs[0][tokenized_chat.shape[-1]:]
72
- response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
73
 
74
- print(f"Assistant: {response}")
 
32
 
33
  model_id = "omdeep22/Gonyai-v1"
34
 
35
+ device = "cuda" if torch.cuda.is_available() else "cpu"
36
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
37
+
38
  tokenizer = AutoTokenizer.from_pretrained(model_id)
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_id,
41
  trust_remote_code=True,
42
+ torch_dtype=dtype
43
+ ).to(device)
 
 
 
 
 
 
44
 
45
+ messages = [{"role": "user", "content": "गोंयच्या पावसाचेर एक कविता बरोव."}]
46
+ inputs = tokenizer.apply_chat_template(
47
  messages,
48
  tokenize=True,
49
  add_generation_prompt=True,
50
+ return_tensors="pt",
51
+ return_dict=True
52
+ ).to(device)
53
 
54
+ with torch.inference_mode():
55
+ with torch.autocast(device_type=device, dtype=dtype):
56
+ outputs = model.generate(
57
+ input_ids=inputs["input_ids"],
58
+ attention_mask=inputs["attention_mask"],
59
+ max_new_tokens=100,
60
+ temperature=0.3,
61
+ repetition_penalty=1.2,
62
+ do_sample=True,
63
+ eos_token_id=tokenizer.eos_token_id,
64
+ pad_token_id=tokenizer.eos_token_id
65
+ )
 
66
 
67
+ generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]
68
+ response = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
 
69
 
70
+ print(f"\nAssistant: {response}")