AxelDlv00 commited on
Commit
9afa635
·
1 Parent(s): 2bed230

correcting the code for inference

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. README.md +18 -11
.gitignore CHANGED
@@ -0,0 +1 @@
 
 
1
+ *.ipynb
README.md CHANGED
@@ -78,16 +78,19 @@ To use one of the models, load the base `Qwen3-4B` model and then apply the adap
78
  import torch
79
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
80
  from peft import PeftModel
 
81
 
82
- # Configuration
83
  base_model_name = "Qwen/Qwen3-4B"
84
  adapter_repo_id = "AxelDlv00/ToxiFrench"
 
85
 
86
- # Choose the target adapter
87
- # ["Standard-SFT", "SOAP-SFT", "SOAP-Oversampled", "SOAP-DWL", "SOAP-DWL-DPO"]
88
- target_adapter = "SOAP-DWL"
 
 
 
89
 
90
- # Quantization
91
  bnb_config = BitsAndBytesConfig(
92
  load_in_4bit=True,
93
  bnb_4bit_use_double_quant=True,
@@ -95,23 +98,27 @@ bnb_config = BitsAndBytesConfig(
95
  bnb_4bit_compute_dtype=torch.bfloat16
96
  )
97
 
98
- # Load Tokenizer & Model
99
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
100
  model = AutoModelForCausalLM.from_pretrained(
101
  base_model_name,
102
  quantization_config=bnb_config,
103
- trust_remote_code=True
 
104
  )
105
 
106
- # Load the Adapter
 
 
 
 
 
 
107
  model = PeftModel.from_pretrained(model, adapter_repo_id, subfolder=target_adapter)
108
  model.eval()
109
 
110
- # Inference
111
  text = "Je ne supporte plus ton comportement, tu es vraiment un idiot !"
112
  prompt = f"Message:\n{text}\n\nAnalyse:\n"
113
-
114
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
115
  with torch.no_grad():
116
  outputs = model.generate(
117
  **inputs,
 
78
  import torch
79
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
80
  from peft import PeftModel
81
+ import os
82
 
 
83
  base_model_name = "Qwen/Qwen3-4B"
84
  adapter_repo_id = "AxelDlv00/ToxiFrench"
85
+ target_adapter = "SOAP-DWL-DPO"
86
 
87
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
88
+ if tokenizer.pad_token is None:
89
+ tokenizer.pad_token = tokenizer.eos_token
90
+
91
+ tokens = ["<think>", "</think>"]
92
+ tokenizer.add_special_tokens({"additional_special_tokens": tokens})
93
 
 
94
  bnb_config = BitsAndBytesConfig(
95
  load_in_4bit=True,
96
  bnb_4bit_use_double_quant=True,
 
98
  bnb_4bit_compute_dtype=torch.bfloat16
99
  )
100
 
 
 
101
  model = AutoModelForCausalLM.from_pretrained(
102
  base_model_name,
103
  quantization_config=bnb_config,
104
+ trust_remote_code=True,
105
+ device_map="auto"
106
  )
107
 
108
+ tokenizer_vocab_size = len(tokenizer)
109
+ model_embedding_size = model.get_input_embeddings().weight.size(0)
110
+
111
+ if model_embedding_size != tokenizer_vocab_size:
112
+ print(f"Syncing vocab: {model_embedding_size} -> {tokenizer_vocab_size}")
113
+ model.resize_token_embeddings(tokenizer_vocab_size)
114
+
115
  model = PeftModel.from_pretrained(model, adapter_repo_id, subfolder=target_adapter)
116
  model.eval()
117
 
 
118
  text = "Je ne supporte plus ton comportement, tu es vraiment un idiot !"
119
  prompt = f"Message:\n{text}\n\nAnalyse:\n"
 
120
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
121
+
122
  with torch.no_grad():
123
  outputs = model.generate(
124
  **inputs,