Spaces:

shaheerawan3
/

Mistral_7B_Chatbot

Sleeping

App Files Files Community

shaheerawan3 commited on May 18

Commit

5baa77a

verified ·

1 Parent(s): 0972634

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -15

app.py CHANGED Viewed

@@ -17,6 +17,8 @@ from threading import Thread
 import numpy as np
 from io import StringIO
 # Global variables to store model, tokenizer and pipe
 MODEL = None
 TOKENIZER = None
@@ -55,17 +57,27 @@ ANALYZED_DATA = None
 # Function to load the model in background
 def load_model_in_background():
-    global MODEL, TOKENIZER, PIPE, MODEL_LOADING, MODEL_LOADED
     try:
         MODEL_LOADING = True
         print("Starting model loading process...")
         # Model identifier - using quantized 4-bit version for reduced memory
         model_id = "mistralai/Mistral-7B-Instruct-v0.3"
         print("Loading tokenizer...")
         # Set tokenizer to use legacy format to avoid issues
-        TOKENIZER = AutoTokenizer.from_pretrained(model_id, legacy_format=True)
         print("Loading model with optimized settings for limited memory...")
         # Configure model loading with 4-bit quantization for minimum memory usage
@@ -77,7 +89,8 @@ def load_model_in_background():
             load_in_4bit=True,  # Enable 4-bit quantization
             max_memory={0: "8GiB"},  # Limit memory usage per GPU
             offload_folder="offload_folder",  # Use disk offloading if needed
-            offload_state_dict=True  # Offload state dict to CPU when possible
         )
         print("Creating optimized pipeline...")
@@ -94,19 +107,15 @@ def load_model_in_background():
         MODEL_LOADING = False
         MODEL_LOADED = True
         return "Model loaded successfully! Ready to generate responses."
-    except torch.cuda.OutOfMemoryError as e:
-        MODEL_LOADING = False
-        print(f"CUDA out of memory error: {str(e)}")
-        return f"GPU memory error: {str(e)}. Try restarting or using a machine with more GPU memory."
-    except ImportError as e:
-        MODEL_LOADING = False
-        print(f"Import error - missing dependencies: {str(e)}")
-        return f"Missing dependencies: {str(e)}. Try 'pip install -U bitsandbytes transformers accelerate'"
     except Exception as e:
         MODEL_LOADING = False
-        print(f"Error loading model: {str(e)}")
-        error_type = type(e).__name__
-        return f"Error loading model ({error_type}): {str(e)}"
 # Function to generate response using the model
 def generate_response(prompt, chat_history, progress=gr.Progress()):
@@ -250,6 +259,15 @@ def create_new_chat(chat_name):
         return f"Created new chat: {chat_name}"
     return "Please enter a unique chat name"
 # Function to handle file upload and analysis
 def analyze_uploaded_file(file):
     global FILE_DATA, ANALYZED_DATA, CHATS, CURRENT_CHAT
@@ -526,7 +544,10 @@ def clear_current_chat():
     return f"Cleared chat: {CURRENT_CHAT}"
 # Function to load model and return status
-def load_model_button():
     if MODEL_LOADED:
         return "Model is already loaded and ready!"
     elif MODEL_LOADING:
@@ -642,7 +663,24 @@ You can customize this template with your specific data. If you need a more comp
                                 clear_chat_btn = gr.Button("Clear Current Chat", variant="secondary")
                 with gr.Column(scale=1):
                     # Model Loading and Settings
                     with gr.Row():
                         load_model_btn = gr.Button("Load Mistral-7B Model", variant="primary")
                         use_fallback_btn = gr.Button("Use Simple JSON Mode", variant="secondary")
@@ -926,6 +964,13 @@ You can customize this template with your specific data. If you need a more comp
             api_name="clear_chat"
         )
         # Initialize empty chatbot
         chatbot.value = []

 import numpy as np
 from io import StringIO
+HF_TOKEN = None
 # Global variables to store model, tokenizer and pipe
 MODEL = None
 TOKENIZER = None
 # Function to load the model in background
 def load_model_in_background():
+    global MODEL, TOKENIZER, PIPE, MODEL_LOADING, MODEL_LOADED, HF_TOKEN
     try:
         MODEL_LOADING = True
         print("Starting model loading process...")
+        # Check if token is provided
+        if not HF_TOKEN:
+            MODEL_LOADING = False
+            return "Error: HuggingFace token is required. Please enter your token and try again."
         # Model identifier - using quantized 4-bit version for reduced memory
         model_id = "mistralai/Mistral-7B-Instruct-v0.3"
         print("Loading tokenizer...")
         # Set tokenizer to use legacy format to avoid issues
+        # Use the token for authentication
+        TOKENIZER = AutoTokenizer.from_pretrained(
+            model_id,
+            legacy_format=True,
+            token=HF_TOKEN  # Add token here
+        )
         print("Loading model with optimized settings for limited memory...")
         # Configure model loading with 4-bit quantization for minimum memory usage
             load_in_4bit=True,  # Enable 4-bit quantization
             max_memory={0: "8GiB"},  # Limit memory usage per GPU
             offload_folder="offload_folder",  # Use disk offloading if needed
+            offload_state_dict=True,  # Offload state dict to CPU when possible
+            token=HF_TOKEN  # Add token here
         )
         print("Creating optimized pipeline...")
         MODEL_LOADING = False
         MODEL_LOADED = True
         return "Model loaded successfully! Ready to generate responses."
     except Exception as e:
         MODEL_LOADING = False
+        error_msg = str(e)
+        if "401" in error_msg or "authentication" in error_msg.lower():
+            return f"Authentication error: Please check your HuggingFace token. Error: {error_msg}"
+        elif "access" in error_msg.lower() or "gated" in error_msg.lower():
+            return f"Access denied: You may need to request access to this model on HuggingFace. Error: {error_msg}"
+        else:
+            return f"Error loading model: {error_msg}"
 # Function to generate response using the model
 def generate_response(prompt, chat_history, progress=gr.Progress()):
         return f"Created new chat: {chat_name}"
     return "Please enter a unique chat name"
+# MODIFICATION 3: Add function to set HuggingFace token
+def set_hf_token(token):
+    global HF_TOKEN
+    if token and token.strip():
+        HF_TOKEN = token.strip()
+        return "HuggingFace token saved successfully!"
+    else:
+        return "Please enter a valid HuggingFace token."
 # Function to handle file upload and analysis
 def analyze_uploaded_file(file):
     global FILE_DATA, ANALYZED_DATA, CHATS, CURRENT_CHAT
     return f"Cleared chat: {CURRENT_CHAT}"
 # Function to load model and return status
+ddef load_model_button():
+    global HF_TOKEN
+    if not HF_TOKEN:
+        return "Please enter your HuggingFace token first before loading the model."
     if MODEL_LOADED:
         return "Model is already loaded and ready!"
     elif MODEL_LOADING:
                                 clear_chat_btn = gr.Button("Clear Current Chat", variant="secondary")
                 with gr.Column(scale=1):
+                    # HuggingFace Token Input
+                    gr.Markdown("### HuggingFace Authentication")
+                    hf_token_input = gr.Textbox(
+                        label="HuggingFace Access Token",
+                        placeholder="Enter your HF token (hf_xxx...)",
+                        type="password",
+                        info="Required to download the Mistral-7B model"
+                    )
+                    set_token_btn = gr.Button("Set Token", variant="secondary")
+                    token_status = gr.Textbox(
+                        label="Token Status",
+                        value="No token set",
+                        interactive=False,
+                        lines=1
+                    )
                     # Model Loading and Settings
+                    gr.Markdown("### Model Loading")
                     with gr.Row():
                         load_model_btn = gr.Button("Load Mistral-7B Model", variant="primary")
                         use_fallback_btn = gr.Button("Use Simple JSON Mode", variant="secondary")
             api_name="clear_chat"
         )
+        set_token_btn.click(
+            set_hf_token,
+            inputs=hf_token_input,
+            outputs=token_status,
+            api_name="set_token"
+        )
         # Initialize empty chatbot
         chatbot.value = []