Vladislav Krasnov commited on
Commit
56da25c
·
1 Parent(s): cd00e73

Update space 12

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -3,8 +3,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
  # Use lighter model for CPU
6
- # model_name = "microsoft/phi-2" # 2.7B - TOO HEAVY
7
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # 1.1B - much lighter
8
 
9
  try:
10
  print(f"Loading {model_name}...")
@@ -43,7 +43,7 @@ def generate_response(message):
43
  outputs = model.generate(
44
  inputs.input_ids,
45
  attention_mask=inputs.attention_mask, # FIX: Add attention mask
46
- max_new_tokens=150, # Reduced for CPU
47
  temperature=0.7,
48
  do_sample=True,
49
  top_p=0.9,
 
3
  import torch
4
 
5
  # Use lighter model for CPU
6
+ model_name = "microsoft/phi-2" # 2.7B - TOO HEAVY
7
+ #model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # 1.1B - much lighter
8
 
9
  try:
10
  print(f"Loading {model_name}...")
 
43
  outputs = model.generate(
44
  inputs.input_ids,
45
  attention_mask=inputs.attention_mask, # FIX: Add attention mask
46
+ max_new_tokens=400, # Reduced for CPU
47
  temperature=0.7,
48
  do_sample=True,
49
  top_p=0.9,