mkfallah commited on
Commit
8e34c90
·
verified ·
1 Parent(s): 4b3481c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -15,11 +15,12 @@ asr = pipeline(
15
  # --------------------------
16
  # 2. Language Model (LLM)
17
  # --------------------------
18
- llm_model_id = "tiiuae/falcon-7b-instruct"
19
  tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
20
  llm_model = AutoModelForCausalLM.from_pretrained(
21
- llm_model_id, torch_dtype=torch.bfloat16, device_map="auto"
22
- )
 
23
 
24
  def ask_llm(prompt, max_new_tokens=200):
25
  inputs = tokenizer(prompt, return_tensors="pt").to(llm_model.device)
 
15
  # --------------------------
16
  # 2. Language Model (LLM)
17
  # --------------------------
18
+ llm_model_id = "tiiuae/falcon-rw-1b" # lighter model for CPU
19
  tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
20
  llm_model = AutoModelForCausalLM.from_pretrained(
21
+ llm_model_id,
22
+ dtype=torch.float32 # use dtype instead of deprecated torch_dtype
23
+ ).to("cpu") # remove device_map to avoid accelerate requirement
24
 
25
  def ask_llm(prompt, max_new_tokens=200):
26
  inputs = tokenizer(prompt, return_tensors="pt").to(llm_model.device)