Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,8 @@ import gradio as gr
|
|
| 6 |
import os
|
| 7 |
import logging
|
| 8 |
from unsloth import FastLanguageModel
|
|
|
|
|
|
|
| 9 |
logging.basicConfig(
|
| 10 |
level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
|
| 11 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
@@ -72,11 +74,22 @@ Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Gr
|
|
| 72 |
'''
|
| 73 |
@spaces.GPU()
|
| 74 |
def chunk_it(inventory_list, user_input_text):
|
| 75 |
-
|
| 76 |
-
tensor = torch.randn(num_elements, dtype=torch.float32)
|
| 77 |
-
tensor_gpu = tensor.to('cuda')
|
| 78 |
-
logger.info("Loading model and tokenizer...")
|
| 79 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 81 |
model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
|
| 82 |
max_seq_length = 2048,
|
|
|
|
| 6 |
import os
|
| 7 |
import logging
|
| 8 |
from unsloth import FastLanguageModel
|
| 9 |
+
import subprocess
|
| 10 |
+
|
| 11 |
logging.basicConfig(
|
| 12 |
level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
|
| 13 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
|
| 74 |
'''
|
| 75 |
@spaces.GPU()
|
| 76 |
def chunk_it(inventory_list, user_input_text):
|
| 77 |
+
# Check for CUDA and NVIDIA-related errors
|
|
|
|
|
|
|
|
|
|
| 78 |
try:
|
| 79 |
+
# Check for GPU devices
|
| 80 |
+
device_count = torch.cuda.device_count()
|
| 81 |
+
logger.info(f"Number of GPU devices: {device_count}")
|
| 82 |
+
if device_count == 0:
|
| 83 |
+
raise RuntimeError("No GPU devices found.") # Raise an error if no GPUs are detected
|
| 84 |
+
|
| 85 |
+
# Check CUDA version using subprocess
|
| 86 |
+
process = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
|
| 87 |
+
cuda_version = process.stdout.strip()
|
| 88 |
+
logger.info(f"CUDA version: {cuda_version}")
|
| 89 |
+
if 'not found' in cuda_version.lower():
|
| 90 |
+
raise RuntimeError("CUDA not found.") # Raise an error if CUDA is not found
|
| 91 |
+
|
| 92 |
+
# Load model and tokenizer (your original code)
|
| 93 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 94 |
model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
|
| 95 |
max_seq_length = 2048,
|