Spaces:

nsultan5
/

customer_support_ai_agent

Sleeping

nsultan5 commited on Aug 7, 2025

Commit

8844959

verified ·

1 Parent(s): 481bbf7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,17 +26,29 @@ if hf_token is None:
     raise ValueError("HF_TOKEN environment variable not found. Please add it in your secrets or environment.")
 # Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(llama_model_id, use_auth_token=hf_token)
 # Load quantized model (AutoGPTQ)
 model = AutoGPTQForCausalLM.from_quantized(
-    llama_model_id,
     use_safetensors=True,
     device="cuda" if torch.cuda.is_available() else "cpu",
-    use_triton=True,        # Use Triton for faster inference if available
-    use_auth_token=hf_token,
 )
 # Text generation pipeline
 text_pipe = pipeline(
     "text-generation",

     raise ValueError("HF_TOKEN environment variable not found. Please add it in your secrets or environment.")
 # Load tokenizer
 # Load quantized model (AutoGPTQ)
+from auto_gptq import AutoGPTQForCausalLM
+from transformers import AutoTokenizer
+model_name = "TheBloke/Llama-2-7B-Chat-GPTQ"  # change to your GPTQ model ID
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    token=hf_token,
+    trust_remote_code=True
+)
 model = AutoGPTQForCausalLM.from_quantized(
+    model_name_or_path=model_name,
     use_safetensors=True,
+    trust_remote_code=True,
     device="cuda" if torch.cuda.is_available() else "cpu",
+    token=hf_token
 )
 # Text generation pipeline
 text_pipe = pipeline(
     "text-generation",