Spaces:

AstroMLab
/

AstroSage-8B

Runtime error

Tijmen2 commited on Nov 18, 2024

Commit

9baf1c4

verified ·

1 Parent(s): 5d84ead

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,21 +4,11 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 import torch
 import random
-# Define model parameters for 8-bit quantized loading
-model_name = "AstroMLab/AstroSage-8B"
-# Load the tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Load the model with 8-bit quantization using bitsandbytes
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16,
-    load_in_8bit=True,               # Enable 8-bit quantization
-    device_map="auto"                # Automatically assign layers to available GPUs
-)
-streamer = TextStreamer(tokenizer)
 # Placeholder responses for when context is empty
 GREETING_MESSAGES = [
@@ -37,7 +27,24 @@ def user(user_message, history):
 @spaces.GPU(duration=20)
 def bot(history):
     """Generate the chatbot response."""
     if not history:
         history = []

 import torch
 import random
+MODEL_NAME = "AstroMLab/AstroSage-8B"
+model = None
+tokenizer = None
+streamer = None # these will be initialized the first time the bot function runs
 # Placeholder responses for when context is empty
 GREETING_MESSAGES = [
 @spaces.GPU(duration=20)
 def bot(history):
     """Generate the chatbot response."""
+    global model, tokenizer, streamer
+    if not model:
+        # initialize the LLM
+        # Load the tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        # Load the model with 8-bit quantization using bitsandbytes
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.bfloat16,
+            load_in_8bit=True,               # Enable 8-bit quantization
+            device_map="auto"                # Automatically assign layers to available GPUs
+        )
+        streamer = TextStreamer(tokenizer)
     if not history:
         history = []