Spaces:

Deva1211
/

chatbot

Running

Deva1211 commited on Aug 14, 2025

Commit

c422049

1 Parent(s): b8dd0f7

Switched to resolving issues

Files changed (6) hide show

alternative_requirements.txt ADDED Viewed

+# ===== OPTION 1: For Hugging Face Spaces (Recommended) =====
+# Use this in your main requirements.txt file
+torch>=2.0.0,<2.2.0
+transformers>=4.35.0,<4.40.0
+accelerate>=0.20.0
+gradio>=3.50.0,<4.0.0
+bitsandbytes>=0.41.0
+# ===== OPTION 2: For Local Development with AutoAWQ =====
+# If you want to try AutoAWQ locally, use this setup:
+# Step 1: Install core dependencies first
+# pip install torch>=2.0.0,<2.2.0 transformers>=4.35.0,<4.40.0 accelerate>=0.20.0
+# Step 2: Install AutoAWQ
+# pip install autoawq>=0.1.8
+# Step 3: Install Gradio
+# pip install gradio>=3.50.0,<4.0.0
+# ===== OPTION 3: For CPU-only deployment =====
+# torch>=2.0.0,<2.2.0 --index-url https://download.pytorch.org/whl/cpu
+# transformers>=4.35.0,<4.40.0
+# gradio>=3.50.0,<4.0.0
+# ===== OPTION 4: Alternative with different quantization =====
+# torch>=2.0.0,<2.2.0
+# transformers>=4.35.0,<4.40.0
+# optimum>=1.16.0
+# auto-gptq>=0.6.0
+# gradio>=3.50.0,<4.0.0

app.py CHANGED Viewed

@@ -5,13 +5,28 @@ import re
 # Load model and tokenizer
 print("Loading Mistral-7B-Instruct AWQ...")
-tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-AWQ", trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    "TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
-    device_map="auto",
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-)
 # Add pad token if it doesn't exist
 if tokenizer.pad_token is None:

 # Load model and tokenizer
 print("Loading Mistral-7B-Instruct AWQ...")
+# Try AWQ model first, fallback to regular model if needed
+try:
+    tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-AWQ")
+    model = AutoModelForCausalLM.from_pretrained(
+        "TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
+        device_map="auto",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True
+    )
+    print("✅ AWQ model loaded successfully!")
+except Exception as e:
+    print(f"⚠️ AWQ model failed to load: {e}")
+    print("📦 Falling back to regular Mistral-7B-Instruct-v0.2...")
+    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+    model = AutoModelForCausalLM.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.2",
+        device_map="auto",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True
+    )
+    print("✅ Regular model loaded successfully!")
 # Add pad token if it doesn't exist
 if tokenizer.pad_token is None:

install_local.bat ADDED Viewed

+@echo off
+echo ============================================
+echo Installing Mistral AWQ Chatbot Dependencies
+echo ============================================
+echo.
+echo Step 1: Installing core dependencies...
+pip install torch>=2.0.0,<2.2.0 transformers>=4.35.0,<4.40.0 accelerate>=0.20.0
+echo.
+echo Step 2: Installing AutoAWQ...
+pip install autoawq>=0.1.8
+echo.
+echo Step 3: Installing Gradio...
+pip install gradio>=3.50.0,<4.0.0
+echo.
+echo ============================================
+echo Installation Complete!
+echo ============================================
+echo.
+echo To test the installation, run:
+echo python test_model.py
+echo.
+echo To start the chatbot, run:
+echo python app.py
+pause

install_local.sh ADDED Viewed

+#!/bin/bash
+echo "============================================"
+echo "Installing Mistral AWQ Chatbot Dependencies"
+echo "============================================"
+echo ""
+echo "Step 1: Installing core dependencies..."
+pip install "torch>=2.0.0,<2.2.0" "transformers>=4.35.0,<4.40.0" "accelerate>=0.20.0"
+echo ""
+echo "Step 2: Installing AutoAWQ..."
+pip install "autoawq>=0.1.8"
+echo ""
+echo "Step 3: Installing Gradio..."
+pip install "gradio>=3.50.0,<4.0.0"
+echo ""
+echo "============================================"
+echo "Installation Complete!"
+echo "============================================"
+echo ""
+echo "To test the installation, run:"
+echo "python test_model.py"
+echo ""
+echo "To start the chatbot, run:"
+echo "python app.py"

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 torch>=2.0.0,<2.2.0
 transformers>=4.35.0,<4.40.0
-autoawq>=0.1.8
 accelerate>=0.20.0
 gradio>=3.50.0,<4.0.0

 torch>=2.0.0,<2.2.0
 transformers>=4.35.0,<4.40.0
 accelerate>=0.20.0
 gradio>=3.50.0,<4.0.0
+# Use bitsandbytes for quantization support - more compatible with HF Spaces
+bitsandbytes>=0.41.0

test_model.py CHANGED Viewed

@@ -9,14 +9,27 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 def test_model():
     print("Loading Mistral-7B-Instruct AWQ for testing...")
-    # Load model and tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-AWQ", trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        "TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
-        device_map="auto",
-        trust_remote_code=True,
-        torch_dtype=torch.float16
-    )
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token

 def test_model():
     print("Loading Mistral-7B-Instruct AWQ for testing...")
+    # Try AWQ model first, fallback to regular model if needed
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-AWQ")
+        model = AutoModelForCausalLM.from_pretrained(
+            "TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
+            device_map="auto",
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
+        )
+        print("✅ AWQ model loaded successfully!")
+    except Exception as e:
+        print(f"⚠️ AWQ model failed to load: {e}")
+        print("📦 Falling back to regular Mistral-7B-Instruct-v0.2...")
+        tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+        model = AutoModelForCausalLM.from_pretrained(
+            "mistralai/Mistral-7B-Instruct-v0.2",
+            device_map="auto",
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
+        )
+        print("✅ Regular model loaded successfully!")
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token