Spaces:

ahmedbasemdev
/

FineTunedChatbot

Runtime error

ahmedbasemdev commited on Nov 23, 2024

Commit

3b3c5cf

verified ·

1 Parent(s): 4fd4e48

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,9 +2,20 @@ import gradio as gr
 # Load your model and tokenizer
 from transformers import AutoModelForCausalLM, AutoTokenizer
-model_name = "ahmedbasemdev/LLama3.2-fine-tuned"  # Replace with your model name
-model = AutoModelForCausalLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def single_inference(question):

 # Load your model and tokenizer
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Specify the model name
+model_name = "ahmedbasemdev/llama-3.2-3b-ChatBot"
+# Load the model with 8-bit quantization
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",  # Automatically map the model to the available device (CPU)
+    load_in_8bit=True,  # Enable 8-bit quantization
+    torch_dtype=torch.float16  # Use mixed precision
+)
+# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def single_inference(question):