Araik Tamazian commited on
Commit
e85c478
·
1 Parent(s): 9b5d461

fixed bug

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -38,7 +38,13 @@ class LLMSalesExtractor:
38
 
39
  try:
40
  # Initialize with CPU-only, optimized for 2 cores
41
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, padding_side='left')
 
 
 
 
 
 
42
  self.model = AutoModelForCausalLM.from_pretrained(
43
  self.model_name,
44
  torch_dtype=torch.float32,
@@ -46,9 +52,9 @@ class LLMSalesExtractor:
46
  low_cpu_mem_usage=True
47
  )
48
 
49
- # Set pad token if not exists
50
- if self.tokenizer.pad_token is None:
51
- self.tokenizer.pad_token = self.tokenizer.eos_token
52
 
53
  self.llm_available = True
54
  print("LLM model loaded successfully")
@@ -206,21 +212,35 @@ JSON:"""
206
  try:
207
  prompt = self.generate_llm_prompt(text)
208
 
209
- # Tokenize and generate
210
- inputs = self.tokenizer.encode(prompt, return_tensors='pt', max_length=512, truncation=True)
 
 
 
 
 
 
 
 
 
 
211
 
212
  with torch.no_grad():
213
  outputs = self.model.generate(
214
- inputs,
 
215
  max_new_tokens=200,
216
  temperature=0.1,
217
  do_sample=True,
218
  pad_token_id=self.tokenizer.eos_token_id,
 
219
  num_return_sequences=1
220
  )
221
 
222
- # Decode response
223
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
224
 
225
  # Extract JSON from response
226
  json_start = response.find('{')
 
38
 
39
  try:
40
  # Initialize with CPU-only, optimized for 2 cores
41
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
42
+
43
+ # Set pad token to avoid attention mask issues
44
+ if self.tokenizer.pad_token is None:
45
+ self.tokenizer.pad_token = self.tokenizer.unk_token if self.tokenizer.unk_token else "[PAD]"
46
+ self.tokenizer.pad_token_id = self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
47
+
48
  self.model = AutoModelForCausalLM.from_pretrained(
49
  self.model_name,
50
  torch_dtype=torch.float32,
 
52
  low_cpu_mem_usage=True
53
  )
54
 
55
+ # Resize token embeddings if we added a new pad token
56
+ if self.tokenizer.pad_token != self.tokenizer.eos_token:
57
+ self.model.resize_token_embeddings(len(self.tokenizer))
58
 
59
  self.llm_available = True
60
  print("LLM model loaded successfully")
 
212
  try:
213
  prompt = self.generate_llm_prompt(text)
214
 
215
+ # Tokenize with proper attention mask
216
+ inputs = self.tokenizer(
217
+ prompt,
218
+ return_tensors='pt',
219
+ max_length=512,
220
+ truncation=True,
221
+ padding=True,
222
+ return_attention_mask=True
223
+ )
224
+
225
+ input_ids = inputs['input_ids']
226
+ attention_mask = inputs['attention_mask']
227
 
228
  with torch.no_grad():
229
  outputs = self.model.generate(
230
+ input_ids,
231
+ attention_mask=attention_mask,
232
  max_new_tokens=200,
233
  temperature=0.1,
234
  do_sample=True,
235
  pad_token_id=self.tokenizer.eos_token_id,
236
+ eos_token_id=self.tokenizer.eos_token_id,
237
  num_return_sequences=1
238
  )
239
 
240
+ # Decode response (skip the input tokens)
241
+ input_length = input_ids.shape[1]
242
+ generated_tokens = outputs[0][input_length:]
243
+ response = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
244
 
245
  # Extract JSON from response
246
  json_start = response.find('{')