Spaces:

sarekuwa
/

livecoder

Sleeping

Vladislav Krasnov commited on about 1 month ago

Commit

56da25c

1 Parent(s): cd00e73

Update space 12

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # Use lighter model for CPU
-# model_name = "microsoft/phi-2"  # 2.7B - TOO HEAVY
-model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # 1.1B - much lighter
 try:
     print(f"Loading {model_name}...")
@@ -43,7 +43,7 @@ def generate_response(message):
             outputs = model.generate(
                 inputs.input_ids,
                 attention_mask=inputs.attention_mask,  # FIX: Add attention mask
-                max_new_tokens=150,  # Reduced for CPU
                 temperature=0.7,
                 do_sample=True,
                 top_p=0.9,

 import torch
 # Use lighter model for CPU
+model_name = "microsoft/phi-2"  # 2.7B - TOO HEAVY
+#model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # 1.1B - much lighter
 try:
     print(f"Loading {model_name}...")
             outputs = model.generate(
                 inputs.input_ids,
                 attention_mask=inputs.attention_mask,  # FIX: Add attention mask
+                max_new_tokens=400,  # Reduced for CPU
                 temperature=0.7,
                 do_sample=True,
                 top_p=0.9,