Spaces:

sarekuwa
/

livecoder

Sleeping

Vladislav Krasnov commited on about 1 month ago

Commit

cb115bc

1 Parent(s): 56da25c

I hope it's final commit..

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # Use lighter model for CPU
-model_name = "microsoft/phi-2"  # 2.7B - TOO HEAVY
-#model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # 1.1B - much lighter
 try:
     print(f"Loading {model_name}...")
@@ -43,8 +43,8 @@ def generate_response(message):
             outputs = model.generate(
                 inputs.input_ids,
                 attention_mask=inputs.attention_mask,  # FIX: Add attention mask
-                max_new_tokens=400,  # Reduced for CPU
-                temperature=0.7,
                 do_sample=True,
                 top_p=0.9,
                 pad_token_id=tokenizer.pad_token_id,
@@ -79,6 +79,6 @@ interface.queue(default_concurrency_limit=1)
 interface.launch(
     server_name="0.0.0.0",
     server_port=7860,
-    share=False,
     debug=False
 )

 import torch
 # Use lighter model for CPU
+#model_name = "microsoft/phi-2"  # 2.7B - TOO HEAVY
+model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # 1.1B - much lighter
 try:
     print(f"Loading {model_name}...")
             outputs = model.generate(
                 inputs.input_ids,
                 attention_mask=inputs.attention_mask,  # FIX: Add attention mask
+                max_new_tokens=600,  # Reduced for CPU
+                temperature=0.8,
                 do_sample=True,
                 top_p=0.9,
                 pad_token_id=tokenizer.pad_token_id,
 interface.launch(
     server_name="0.0.0.0",
     server_port=7860,
+    share=True,
     debug=False
 )