Spaces:

anaspro
/

chatbox

Runtime error

App Files Files Community

anaspro commited on Oct 26

Commit

6d60e00

1 Parent(s): 151da18

updatE

Browse files

Files changed (1) hide show

app.py +51 -28

app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import os
 import torch
 import transformers
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import spaces
@@ -24,36 +24,59 @@ hf_token = os.getenv("HF_TOKEN")
 # Initialize model and tokenizer separately for better control
 print("Loading model and tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(
-    model_path,
-    token=hf_token,
-    trust_remote_code=True
-)
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    device_map="auto",
-    token=hf_token,
-    trust_remote_code=True,
-    torch_dtype=torch.bfloat16,
-    low_cpu_mem_usage=True,
-    quantization_config={
-        "load_in_4bit": True,
-        "bnb_4bit_use_double_quant": True,
-        "bnb_4bit_quant_type": "nf4",
-        "bnb_4bit_compute_dtype": torch.bfloat16
-    }
-)
-# Create pipeline with the loaded model
-pipeline_model = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    device_map="auto"
-)
-print("Model loaded successfully!")
 def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
     """Generate response using the pipeline with messages format"""

 import os
 import torch
 import transformers
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import gradio as gr
 import spaces
 # Initialize model and tokenizer separately for better control
 print("Loading model and tokenizer...")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_path,
+        token=hf_token,
+        trust_remote_code=True
+    )
+    # Load model with proper quantization config
+    from transformers import BitsAndBytesConfig
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        quantization_config=bnb_config,
+        device_map="auto",
+        token=hf_token,
+        trust_remote_code=True,
+        torch_dtype=torch.bfloat16,
+        low_cpu_mem_usage=True
+    )
+    # Create pipeline with the loaded model
+    pipeline_model = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer
+    )
+    print("Model loaded successfully!")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    # Fallback to direct pipeline loading
+    print("Trying alternative loading method...")
+    pipeline_model = pipeline(
+        "text-generation",
+        model=model_path,
+        token=hf_token,
+        trust_remote_code=True,
+        model_kwargs={
+            "torch_dtype": torch.bfloat16,
+            "low_cpu_mem_usage": True,
+        }
+    )
+    tokenizer = pipeline_model.tokenizer
+    print("Model loaded with fallback method!")
 def generate_with_pipeline(messages, max_new_tokens=256, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.0):
     """Generate response using the pipeline with messages format"""