Spaces:
Sleeping
Sleeping
Araik Tamazian commited on
Commit ·
e85c478
1
Parent(s): 9b5d461
fixed bug
Browse files
app.py
CHANGED
|
@@ -38,7 +38,13 @@ class LLMSalesExtractor:
|
|
| 38 |
|
| 39 |
try:
|
| 40 |
# Initialize with CPU-only, optimized for 2 cores
|
| 41 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 43 |
self.model_name,
|
| 44 |
torch_dtype=torch.float32,
|
|
@@ -46,9 +52,9 @@ class LLMSalesExtractor:
|
|
| 46 |
low_cpu_mem_usage=True
|
| 47 |
)
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
if self.tokenizer.pad_token
|
| 51 |
-
self.
|
| 52 |
|
| 53 |
self.llm_available = True
|
| 54 |
print("LLM model loaded successfully")
|
|
@@ -206,21 +212,35 @@ JSON:"""
|
|
| 206 |
try:
|
| 207 |
prompt = self.generate_llm_prompt(text)
|
| 208 |
|
| 209 |
-
# Tokenize
|
| 210 |
-
inputs = self.tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
with torch.no_grad():
|
| 213 |
outputs = self.model.generate(
|
| 214 |
-
|
|
|
|
| 215 |
max_new_tokens=200,
|
| 216 |
temperature=0.1,
|
| 217 |
do_sample=True,
|
| 218 |
pad_token_id=self.tokenizer.eos_token_id,
|
|
|
|
| 219 |
num_return_sequences=1
|
| 220 |
)
|
| 221 |
|
| 222 |
-
# Decode response
|
| 223 |
-
|
|
|
|
|
|
|
| 224 |
|
| 225 |
# Extract JSON from response
|
| 226 |
json_start = response.find('{')
|
|
|
|
| 38 |
|
| 39 |
try:
|
| 40 |
# Initialize with CPU-only, optimized for 2 cores
|
| 41 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 42 |
+
|
| 43 |
+
# Set pad token to avoid attention mask issues
|
| 44 |
+
if self.tokenizer.pad_token is None:
|
| 45 |
+
self.tokenizer.pad_token = self.tokenizer.unk_token if self.tokenizer.unk_token else "[PAD]"
|
| 46 |
+
self.tokenizer.pad_token_id = self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
|
| 47 |
+
|
| 48 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 49 |
self.model_name,
|
| 50 |
torch_dtype=torch.float32,
|
|
|
|
| 52 |
low_cpu_mem_usage=True
|
| 53 |
)
|
| 54 |
|
| 55 |
+
# Resize token embeddings if we added a new pad token
|
| 56 |
+
if self.tokenizer.pad_token != self.tokenizer.eos_token:
|
| 57 |
+
self.model.resize_token_embeddings(len(self.tokenizer))
|
| 58 |
|
| 59 |
self.llm_available = True
|
| 60 |
print("LLM model loaded successfully")
|
|
|
|
| 212 |
try:
|
| 213 |
prompt = self.generate_llm_prompt(text)
|
| 214 |
|
| 215 |
+
# Tokenize with proper attention mask
|
| 216 |
+
inputs = self.tokenizer(
|
| 217 |
+
prompt,
|
| 218 |
+
return_tensors='pt',
|
| 219 |
+
max_length=512,
|
| 220 |
+
truncation=True,
|
| 221 |
+
padding=True,
|
| 222 |
+
return_attention_mask=True
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
input_ids = inputs['input_ids']
|
| 226 |
+
attention_mask = inputs['attention_mask']
|
| 227 |
|
| 228 |
with torch.no_grad():
|
| 229 |
outputs = self.model.generate(
|
| 230 |
+
input_ids,
|
| 231 |
+
attention_mask=attention_mask,
|
| 232 |
max_new_tokens=200,
|
| 233 |
temperature=0.1,
|
| 234 |
do_sample=True,
|
| 235 |
pad_token_id=self.tokenizer.eos_token_id,
|
| 236 |
+
eos_token_id=self.tokenizer.eos_token_id,
|
| 237 |
num_return_sequences=1
|
| 238 |
)
|
| 239 |
|
| 240 |
+
# Decode response (skip the input tokens)
|
| 241 |
+
input_length = input_ids.shape[1]
|
| 242 |
+
generated_tokens = outputs[0][input_length:]
|
| 243 |
+
response = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
| 244 |
|
| 245 |
# Extract JSON from response
|
| 246 |
json_start = response.find('{')
|