Spaces:

mkfallah
/

vgap

Sleeping

mkfallah commited on Sep 5, 2025

Commit

8e34c90

verified ·

1 Parent(s): 4b3481c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,11 +15,12 @@ asr = pipeline(
 # --------------------------
 # 2. Language Model (LLM)
 # --------------------------
-llm_model_id = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
 llm_model = AutoModelForCausalLM.from_pretrained(
-    llm_model_id, torch_dtype=torch.bfloat16, device_map="auto"
-)
 def ask_llm(prompt, max_new_tokens=200):
     inputs = tokenizer(prompt, return_tensors="pt").to(llm_model.device)

 # --------------------------
 # 2. Language Model (LLM)
 # --------------------------
+llm_model_id = "tiiuae/falcon-rw-1b"  # lighter model for CPU
 tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
 llm_model = AutoModelForCausalLM.from_pretrained(
+    llm_model_id,
+    dtype=torch.float32  # use dtype instead of deprecated torch_dtype
+).to("cpu")  # remove device_map to avoid accelerate requirement
 def ask_llm(prompt, max_new_tokens=200):
     inputs = tokenizer(prompt, return_tensors="pt").to(llm_model.device)