Deva1211 commited on
Commit
bc6fc3d
Β·
verified Β·
1 Parent(s): 4e432fc

using now TheBloke/Falcon-180B-Chat-GPTQ

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -10,7 +10,7 @@ print("Loading optimized Mistral model...")
10
  try:
11
  # First try: AWQ quantized model (best performance)
12
  print("πŸ”„ Attempting to load AWQ model...")
13
- tokenizer = AutoTokenizer.from_pretrained("TheBloke/alpaca-lora-65B-GPTQ")
14
  model = AutoModelForCausalLM.from_pretrained(
15
  "TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
16
  device_map="auto",
@@ -25,9 +25,9 @@ except Exception as e:
25
  try:
26
  # Second try: Use a smaller, more compatible model
27
  print("πŸ”„ Falling back to Mistral-7B-Instruct-v0.1 (more compatible)...")
28
- tokenizer = AutoTokenizer.from_pretrained("TheBloke/alpaca-lora-65B-GPTQ")
29
  model = AutoModelForCausalLM.from_pretrained(
30
- "TheBloke/alpaca-lora-65B-GPTQ",
31
  device_map="auto",
32
  torch_dtype=torch.float16,
33
  low_cpu_mem_usage=True,
 
10
  try:
11
  # First try: AWQ quantized model (best performance)
12
  print("πŸ”„ Attempting to load AWQ model...")
13
+ tokenizer = AutoTokenizer.from_pretrained("TheBloke/Falcon-180B-Chat-GPTQ")
14
  model = AutoModelForCausalLM.from_pretrained(
15
  "TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
16
  device_map="auto",
 
25
  try:
26
  # Second try: Use a smaller, more compatible model
27
  print("πŸ”„ Falling back to Mistral-7B-Instruct-v0.1 (more compatible)...")
28
+ tokenizer = AutoTokenizer.from_pretrained("TheBloke/Falcon-180B-Chat-GPTQ")
29
  model = AutoModelForCausalLM.from_pretrained(
30
+ "TheBloke/Falcon-180B-Chat-GPTQ",
31
  device_map="auto",
32
  torch_dtype=torch.float16,
33
  low_cpu_mem_usage=True,