Update app.py
Browse files
app.py
CHANGED
|
@@ -15,11 +15,12 @@ asr = pipeline(
|
|
| 15 |
# --------------------------
|
| 16 |
# 2. Language Model (LLM)
|
| 17 |
# --------------------------
|
| 18 |
-
llm_model_id = "tiiuae/falcon-
|
| 19 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
|
| 20 |
llm_model = AutoModelForCausalLM.from_pretrained(
|
| 21 |
-
llm_model_id,
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
def ask_llm(prompt, max_new_tokens=200):
|
| 25 |
inputs = tokenizer(prompt, return_tensors="pt").to(llm_model.device)
|
|
|
|
| 15 |
# --------------------------
|
| 16 |
# 2. Language Model (LLM)
|
| 17 |
# --------------------------
|
| 18 |
+
llm_model_id = "tiiuae/falcon-rw-1b" # lighter model for CPU
|
| 19 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
|
| 20 |
llm_model = AutoModelForCausalLM.from_pretrained(
|
| 21 |
+
llm_model_id,
|
| 22 |
+
dtype=torch.float32 # use dtype instead of deprecated torch_dtype
|
| 23 |
+
).to("cpu") # remove device_map to avoid accelerate requirement
|
| 24 |
|
| 25 |
def ask_llm(prompt, max_new_tokens=200):
|
| 26 |
inputs = tokenizer(prompt, return_tensors="pt").to(llm_model.device)
|