ranggafermata commited on
Commit
fe03a00
Β·
verified Β·
1 Parent(s): dfb30cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -10,9 +10,17 @@ login(token=os.getenv("HF_TOKEN"))
10
 
11
  # Load the model and tokenizer
12
  model_name = "ranggafermata/Fermata-v1.2-lightcoder"
 
 
 
 
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
15
- model.eval()
 
 
 
 
16
 
17
  def generate_code(prompt, max_tokens, temperature, top_p):
18
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
10
 
11
  # Load the model and tokenizer
12
  model_name = "ranggafermata/Fermata-v1.2-lightcoder"
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ dtype = torch.float16 if device == "cuda" else torch.float32
15
+
16
+ # Load tokenizer and model with eager attention
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ model_id,
20
+ torch_dtype=dtype,
21
+ attn_implementation="eager",
22
+ trust_remote_code=True
23
+ ).to(device)
24
 
25
  def generate_code(prompt, max_tokens, temperature, top_p):
26
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)