Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,6 +17,8 @@ from threading import Thread
|
|
| 17 |
import numpy as np
|
| 18 |
from io import StringIO
|
| 19 |
|
|
|
|
|
|
|
| 20 |
# Global variables to store model, tokenizer and pipe
|
| 21 |
MODEL = None
|
| 22 |
TOKENIZER = None
|
|
@@ -55,17 +57,27 @@ ANALYZED_DATA = None
|
|
| 55 |
|
| 56 |
# Function to load the model in background
|
| 57 |
def load_model_in_background():
|
| 58 |
-
global MODEL, TOKENIZER, PIPE, MODEL_LOADING, MODEL_LOADED
|
| 59 |
try:
|
| 60 |
MODEL_LOADING = True
|
| 61 |
print("Starting model loading process...")
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
# Model identifier - using quantized 4-bit version for reduced memory
|
| 64 |
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 65 |
|
| 66 |
print("Loading tokenizer...")
|
| 67 |
# Set tokenizer to use legacy format to avoid issues
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
print("Loading model with optimized settings for limited memory...")
|
| 71 |
# Configure model loading with 4-bit quantization for minimum memory usage
|
|
@@ -77,7 +89,8 @@ def load_model_in_background():
|
|
| 77 |
load_in_4bit=True, # Enable 4-bit quantization
|
| 78 |
max_memory={0: "8GiB"}, # Limit memory usage per GPU
|
| 79 |
offload_folder="offload_folder", # Use disk offloading if needed
|
| 80 |
-
offload_state_dict=True # Offload state dict to CPU when possible
|
|
|
|
| 81 |
)
|
| 82 |
|
| 83 |
print("Creating optimized pipeline...")
|
|
@@ -94,19 +107,15 @@ def load_model_in_background():
|
|
| 94 |
MODEL_LOADING = False
|
| 95 |
MODEL_LOADED = True
|
| 96 |
return "Model loaded successfully! Ready to generate responses."
|
| 97 |
-
except torch.cuda.OutOfMemoryError as e:
|
| 98 |
-
MODEL_LOADING = False
|
| 99 |
-
print(f"CUDA out of memory error: {str(e)}")
|
| 100 |
-
return f"GPU memory error: {str(e)}. Try restarting or using a machine with more GPU memory."
|
| 101 |
-
except ImportError as e:
|
| 102 |
-
MODEL_LOADING = False
|
| 103 |
-
print(f"Import error - missing dependencies: {str(e)}")
|
| 104 |
-
return f"Missing dependencies: {str(e)}. Try 'pip install -U bitsandbytes transformers accelerate'"
|
| 105 |
except Exception as e:
|
| 106 |
MODEL_LOADING = False
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
# Function to generate response using the model
|
| 112 |
def generate_response(prompt, chat_history, progress=gr.Progress()):
|
|
@@ -250,6 +259,15 @@ def create_new_chat(chat_name):
|
|
| 250 |
return f"Created new chat: {chat_name}"
|
| 251 |
return "Please enter a unique chat name"
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
# Function to handle file upload and analysis
|
| 254 |
def analyze_uploaded_file(file):
|
| 255 |
global FILE_DATA, ANALYZED_DATA, CHATS, CURRENT_CHAT
|
|
@@ -526,7 +544,10 @@ def clear_current_chat():
|
|
| 526 |
return f"Cleared chat: {CURRENT_CHAT}"
|
| 527 |
|
| 528 |
# Function to load model and return status
|
| 529 |
-
|
|
|
|
|
|
|
|
|
|
| 530 |
if MODEL_LOADED:
|
| 531 |
return "Model is already loaded and ready!"
|
| 532 |
elif MODEL_LOADING:
|
|
@@ -642,7 +663,24 @@ You can customize this template with your specific data. If you need a more comp
|
|
| 642 |
clear_chat_btn = gr.Button("Clear Current Chat", variant="secondary")
|
| 643 |
|
| 644 |
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
# Model Loading and Settings
|
|
|
|
| 646 |
with gr.Row():
|
| 647 |
load_model_btn = gr.Button("Load Mistral-7B Model", variant="primary")
|
| 648 |
use_fallback_btn = gr.Button("Use Simple JSON Mode", variant="secondary")
|
|
@@ -926,6 +964,13 @@ You can customize this template with your specific data. If you need a more comp
|
|
| 926 |
api_name="clear_chat"
|
| 927 |
)
|
| 928 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 929 |
# Initialize empty chatbot
|
| 930 |
chatbot.value = []
|
| 931 |
|
|
|
|
| 17 |
import numpy as np
|
| 18 |
from io import StringIO
|
| 19 |
|
| 20 |
+
HF_TOKEN = None
|
| 21 |
+
|
| 22 |
# Global variables to store model, tokenizer and pipe
|
| 23 |
MODEL = None
|
| 24 |
TOKENIZER = None
|
|
|
|
| 57 |
|
| 58 |
# Function to load the model in background
|
| 59 |
def load_model_in_background():
|
| 60 |
+
global MODEL, TOKENIZER, PIPE, MODEL_LOADING, MODEL_LOADED, HF_TOKEN
|
| 61 |
try:
|
| 62 |
MODEL_LOADING = True
|
| 63 |
print("Starting model loading process...")
|
| 64 |
|
| 65 |
+
# Check if token is provided
|
| 66 |
+
if not HF_TOKEN:
|
| 67 |
+
MODEL_LOADING = False
|
| 68 |
+
return "Error: HuggingFace token is required. Please enter your token and try again."
|
| 69 |
+
|
| 70 |
# Model identifier - using quantized 4-bit version for reduced memory
|
| 71 |
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 72 |
|
| 73 |
print("Loading tokenizer...")
|
| 74 |
# Set tokenizer to use legacy format to avoid issues
|
| 75 |
+
# Use the token for authentication
|
| 76 |
+
TOKENIZER = AutoTokenizer.from_pretrained(
|
| 77 |
+
model_id,
|
| 78 |
+
legacy_format=True,
|
| 79 |
+
token=HF_TOKEN # Add token here
|
| 80 |
+
)
|
| 81 |
|
| 82 |
print("Loading model with optimized settings for limited memory...")
|
| 83 |
# Configure model loading with 4-bit quantization for minimum memory usage
|
|
|
|
| 89 |
load_in_4bit=True, # Enable 4-bit quantization
|
| 90 |
max_memory={0: "8GiB"}, # Limit memory usage per GPU
|
| 91 |
offload_folder="offload_folder", # Use disk offloading if needed
|
| 92 |
+
offload_state_dict=True, # Offload state dict to CPU when possible
|
| 93 |
+
token=HF_TOKEN # Add token here
|
| 94 |
)
|
| 95 |
|
| 96 |
print("Creating optimized pipeline...")
|
|
|
|
| 107 |
MODEL_LOADING = False
|
| 108 |
MODEL_LOADED = True
|
| 109 |
return "Model loaded successfully! Ready to generate responses."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
except Exception as e:
|
| 111 |
MODEL_LOADING = False
|
| 112 |
+
error_msg = str(e)
|
| 113 |
+
if "401" in error_msg or "authentication" in error_msg.lower():
|
| 114 |
+
return f"Authentication error: Please check your HuggingFace token. Error: {error_msg}"
|
| 115 |
+
elif "access" in error_msg.lower() or "gated" in error_msg.lower():
|
| 116 |
+
return f"Access denied: You may need to request access to this model on HuggingFace. Error: {error_msg}"
|
| 117 |
+
else:
|
| 118 |
+
return f"Error loading model: {error_msg}"
|
| 119 |
|
| 120 |
# Function to generate response using the model
|
| 121 |
def generate_response(prompt, chat_history, progress=gr.Progress()):
|
|
|
|
| 259 |
return f"Created new chat: {chat_name}"
|
| 260 |
return "Please enter a unique chat name"
|
| 261 |
|
| 262 |
+
# MODIFICATION 3: Add function to set HuggingFace token
|
| 263 |
+
def set_hf_token(token):
|
| 264 |
+
global HF_TOKEN
|
| 265 |
+
if token and token.strip():
|
| 266 |
+
HF_TOKEN = token.strip()
|
| 267 |
+
return "HuggingFace token saved successfully!"
|
| 268 |
+
else:
|
| 269 |
+
return "Please enter a valid HuggingFace token."
|
| 270 |
+
|
| 271 |
# Function to handle file upload and analysis
|
| 272 |
def analyze_uploaded_file(file):
|
| 273 |
global FILE_DATA, ANALYZED_DATA, CHATS, CURRENT_CHAT
|
|
|
|
| 544 |
return f"Cleared chat: {CURRENT_CHAT}"
|
| 545 |
|
| 546 |
# Function to load model and return status
|
| 547 |
+
ddef load_model_button():
|
| 548 |
+
global HF_TOKEN
|
| 549 |
+
if not HF_TOKEN:
|
| 550 |
+
return "Please enter your HuggingFace token first before loading the model."
|
| 551 |
if MODEL_LOADED:
|
| 552 |
return "Model is already loaded and ready!"
|
| 553 |
elif MODEL_LOADING:
|
|
|
|
| 663 |
clear_chat_btn = gr.Button("Clear Current Chat", variant="secondary")
|
| 664 |
|
| 665 |
with gr.Column(scale=1):
|
| 666 |
+
# HuggingFace Token Input
|
| 667 |
+
gr.Markdown("### HuggingFace Authentication")
|
| 668 |
+
hf_token_input = gr.Textbox(
|
| 669 |
+
label="HuggingFace Access Token",
|
| 670 |
+
placeholder="Enter your HF token (hf_xxx...)",
|
| 671 |
+
type="password",
|
| 672 |
+
info="Required to download the Mistral-7B model"
|
| 673 |
+
)
|
| 674 |
+
set_token_btn = gr.Button("Set Token", variant="secondary")
|
| 675 |
+
token_status = gr.Textbox(
|
| 676 |
+
label="Token Status",
|
| 677 |
+
value="No token set",
|
| 678 |
+
interactive=False,
|
| 679 |
+
lines=1
|
| 680 |
+
)
|
| 681 |
+
|
| 682 |
# Model Loading and Settings
|
| 683 |
+
gr.Markdown("### Model Loading")
|
| 684 |
with gr.Row():
|
| 685 |
load_model_btn = gr.Button("Load Mistral-7B Model", variant="primary")
|
| 686 |
use_fallback_btn = gr.Button("Use Simple JSON Mode", variant="secondary")
|
|
|
|
| 964 |
api_name="clear_chat"
|
| 965 |
)
|
| 966 |
|
| 967 |
+
set_token_btn.click(
|
| 968 |
+
set_hf_token,
|
| 969 |
+
inputs=hf_token_input,
|
| 970 |
+
outputs=token_status,
|
| 971 |
+
api_name="set_token"
|
| 972 |
+
)
|
| 973 |
+
|
| 974 |
# Initialize empty chatbot
|
| 975 |
chatbot.value = []
|
| 976 |
|