Vladislav Krasnov commited on
Commit
cb115bc
·
1 Parent(s): 56da25c

I hope it's final commit..

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -3,8 +3,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
  # Use lighter model for CPU
6
- model_name = "microsoft/phi-2" # 2.7B - TOO HEAVY
7
- #model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # 1.1B - much lighter
8
 
9
  try:
10
  print(f"Loading {model_name}...")
@@ -43,8 +43,8 @@ def generate_response(message):
43
  outputs = model.generate(
44
  inputs.input_ids,
45
  attention_mask=inputs.attention_mask, # FIX: Add attention mask
46
- max_new_tokens=400, # Reduced for CPU
47
- temperature=0.7,
48
  do_sample=True,
49
  top_p=0.9,
50
  pad_token_id=tokenizer.pad_token_id,
@@ -79,6 +79,6 @@ interface.queue(default_concurrency_limit=1)
79
  interface.launch(
80
  server_name="0.0.0.0",
81
  server_port=7860,
82
- share=False,
83
  debug=False
84
  )
 
3
  import torch
4
 
5
  # Use lighter model for CPU
6
+ #model_name = "microsoft/phi-2" # 2.7B - TOO HEAVY
7
+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # 1.1B - much lighter
8
 
9
  try:
10
  print(f"Loading {model_name}...")
 
43
  outputs = model.generate(
44
  inputs.input_ids,
45
  attention_mask=inputs.attention_mask, # FIX: Add attention mask
46
+ max_new_tokens=600, # Reduced for CPU
47
+ temperature=0.8,
48
  do_sample=True,
49
  top_p=0.9,
50
  pad_token_id=tokenizer.pad_token_id,
 
79
  interface.launch(
80
  server_name="0.0.0.0",
81
  server_port=7860,
82
+ share=True,
83
  debug=False
84
  )