AxionLab-official commited on
Commit
5e2ce1e
·
verified ·
1 Parent(s): 545b229

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -26,7 +26,7 @@ print("🔄 Loading model...")
26
  model = AutoModelForCausalLM.from_pretrained(
27
  MODEL_ID,
28
  token=HF_TOKEN,
29
- torch_dtype=torch.float32,
30
  device_map="auto"
31
  )
32
  model.eval()
@@ -51,7 +51,7 @@ def chat(user_input, system_prompt, temperature, top_p, max_tokens):
51
  return_tensors="pt"
52
  ).to(model.device)
53
 
54
- with torch.no_grad():
55
  output = model.generate(
56
  **inputs,
57
  max_new_tokens=int(max_tokens),
@@ -63,14 +63,14 @@ def chat(user_input, system_prompt, temperature, top_p, max_tokens):
63
  pad_token_id=tokenizer.eos_token_id
64
  )
65
 
66
- generated_tokens = output[0][inputs["input_ids"].shape[-1]:]
67
 
68
- decoded = tokenizer.decode(
69
- generated_tokens,
70
- skip_special_tokens=True
71
- )
72
 
73
- return decoded.strip()
74
 
75
  # =========================
76
  # GRADIO UI
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  MODEL_ID,
28
  token=HF_TOKEN,
29
+ torch_dtype=torch.float32, # pode trocar pra bfloat16 se tiver GPU
30
  device_map="auto"
31
  )
32
  model.eval()
 
51
  return_tensors="pt"
52
  ).to(model.device)
53
 
54
+ with torch.no_grad():
55
  output = model.generate(
56
  **inputs,
57
  max_new_tokens=int(max_tokens),
 
63
  pad_token_id=tokenizer.eos_token_id
64
  )
65
 
66
+ generated_tokens = output[0][inputs["input_ids"].shape[-1]:]
67
 
68
+ decoded = tokenizer.decode(
69
+ generated_tokens,
70
+ skip_special_tokens=True
71
+ )
72
 
73
+ return decoded.strip()
74
 
75
  # =========================
76
  # GRADIO UI