Spaces:

satishpednekar
/

sbxcerthelper

Runtime error

satishpednekar commited on Jan 20

Commit

6cc0654

verified ·

1 Parent(s): 2bc71d6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,12 +7,13 @@ MODEL_NAME = "satishpednekar/sbxcertqueryhelper"
 def load_model():
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
-        torch_dtype=torch.float16,
         device_map="auto",
         trust_remote_code=True,
-        load_in_8bit=True  # Enable 8-bit quantization for memory efficiency
     )
     return model, tokenizer
@@ -27,7 +28,9 @@ def generate_response(prompt, max_length=512, temperature=0.7, top_p=0.95):
     """
     try:
         # Prepare the input
-        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         # Generate
         outputs = model.generate(

 def load_model():
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+    # Modified model loading without 8-bit quantization
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
+        torch_dtype=torch.float32,  # Use float32 instead of float16 for better compatibility
         device_map="auto",
         trust_remote_code=True,
+        # Removed load_in_8bit parameter
     )
     return model, tokenizer
     """
     try:
         # Prepare the input
+        inputs = tokenizer(prompt, return_tensors="pt")
+        if torch.cuda.is_available():
+            inputs = inputs.to(model.device)
         # Generate
         outputs = model.generate(