shaheerawan3 commited on
Commit
5baa77a
·
verified ·
1 Parent(s): 0972634

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -15
app.py CHANGED
@@ -17,6 +17,8 @@ from threading import Thread
17
  import numpy as np
18
  from io import StringIO
19
 
 
 
20
  # Global variables to store model, tokenizer and pipe
21
  MODEL = None
22
  TOKENIZER = None
@@ -55,17 +57,27 @@ ANALYZED_DATA = None
55
 
56
  # Function to load the model in background
57
  def load_model_in_background():
58
- global MODEL, TOKENIZER, PIPE, MODEL_LOADING, MODEL_LOADED
59
  try:
60
  MODEL_LOADING = True
61
  print("Starting model loading process...")
62
 
 
 
 
 
 
63
  # Model identifier - using quantized 4-bit version for reduced memory
64
  model_id = "mistralai/Mistral-7B-Instruct-v0.3"
65
 
66
  print("Loading tokenizer...")
67
  # Set tokenizer to use legacy format to avoid issues
68
- TOKENIZER = AutoTokenizer.from_pretrained(model_id, legacy_format=True)
 
 
 
 
 
69
 
70
  print("Loading model with optimized settings for limited memory...")
71
  # Configure model loading with 4-bit quantization for minimum memory usage
@@ -77,7 +89,8 @@ def load_model_in_background():
77
  load_in_4bit=True, # Enable 4-bit quantization
78
  max_memory={0: "8GiB"}, # Limit memory usage per GPU
79
  offload_folder="offload_folder", # Use disk offloading if needed
80
- offload_state_dict=True # Offload state dict to CPU when possible
 
81
  )
82
 
83
  print("Creating optimized pipeline...")
@@ -94,19 +107,15 @@ def load_model_in_background():
94
  MODEL_LOADING = False
95
  MODEL_LOADED = True
96
  return "Model loaded successfully! Ready to generate responses."
97
- except torch.cuda.OutOfMemoryError as e:
98
- MODEL_LOADING = False
99
- print(f"CUDA out of memory error: {str(e)}")
100
- return f"GPU memory error: {str(e)}. Try restarting or using a machine with more GPU memory."
101
- except ImportError as e:
102
- MODEL_LOADING = False
103
- print(f"Import error - missing dependencies: {str(e)}")
104
- return f"Missing dependencies: {str(e)}. Try 'pip install -U bitsandbytes transformers accelerate'"
105
  except Exception as e:
106
  MODEL_LOADING = False
107
- print(f"Error loading model: {str(e)}")
108
- error_type = type(e).__name__
109
- return f"Error loading model ({error_type}): {str(e)}"
 
 
 
 
110
 
111
  # Function to generate response using the model
112
  def generate_response(prompt, chat_history, progress=gr.Progress()):
@@ -250,6 +259,15 @@ def create_new_chat(chat_name):
250
  return f"Created new chat: {chat_name}"
251
  return "Please enter a unique chat name"
252
 
 
 
 
 
 
 
 
 
 
253
  # Function to handle file upload and analysis
254
  def analyze_uploaded_file(file):
255
  global FILE_DATA, ANALYZED_DATA, CHATS, CURRENT_CHAT
@@ -526,7 +544,10 @@ def clear_current_chat():
526
  return f"Cleared chat: {CURRENT_CHAT}"
527
 
528
  # Function to load model and return status
529
- def load_model_button():
 
 
 
530
  if MODEL_LOADED:
531
  return "Model is already loaded and ready!"
532
  elif MODEL_LOADING:
@@ -642,7 +663,24 @@ You can customize this template with your specific data. If you need a more comp
642
  clear_chat_btn = gr.Button("Clear Current Chat", variant="secondary")
643
 
644
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
  # Model Loading and Settings
 
646
  with gr.Row():
647
  load_model_btn = gr.Button("Load Mistral-7B Model", variant="primary")
648
  use_fallback_btn = gr.Button("Use Simple JSON Mode", variant="secondary")
@@ -926,6 +964,13 @@ You can customize this template with your specific data. If you need a more comp
926
  api_name="clear_chat"
927
  )
928
 
 
 
 
 
 
 
 
929
  # Initialize empty chatbot
930
  chatbot.value = []
931
 
 
17
  import numpy as np
18
  from io import StringIO
19
 
20
+ HF_TOKEN = None
21
+
22
  # Global variables to store model, tokenizer and pipe
23
  MODEL = None
24
  TOKENIZER = None
 
57
 
58
  # Function to load the model in background
59
  def load_model_in_background():
60
+ global MODEL, TOKENIZER, PIPE, MODEL_LOADING, MODEL_LOADED, HF_TOKEN
61
  try:
62
  MODEL_LOADING = True
63
  print("Starting model loading process...")
64
 
65
+ # Check if token is provided
66
+ if not HF_TOKEN:
67
+ MODEL_LOADING = False
68
+ return "Error: HuggingFace token is required. Please enter your token and try again."
69
+
70
  # Model identifier - using quantized 4-bit version for reduced memory
71
  model_id = "mistralai/Mistral-7B-Instruct-v0.3"
72
 
73
  print("Loading tokenizer...")
74
  # Set tokenizer to use legacy format to avoid issues
75
+ # Use the token for authentication
76
+ TOKENIZER = AutoTokenizer.from_pretrained(
77
+ model_id,
78
+ legacy_format=True,
79
+ token=HF_TOKEN # Add token here
80
+ )
81
 
82
  print("Loading model with optimized settings for limited memory...")
83
  # Configure model loading with 4-bit quantization for minimum memory usage
 
89
  load_in_4bit=True, # Enable 4-bit quantization
90
  max_memory={0: "8GiB"}, # Limit memory usage per GPU
91
  offload_folder="offload_folder", # Use disk offloading if needed
92
+ offload_state_dict=True, # Offload state dict to CPU when possible
93
+ token=HF_TOKEN # Add token here
94
  )
95
 
96
  print("Creating optimized pipeline...")
 
107
  MODEL_LOADING = False
108
  MODEL_LOADED = True
109
  return "Model loaded successfully! Ready to generate responses."
 
 
 
 
 
 
 
 
110
  except Exception as e:
111
  MODEL_LOADING = False
112
+ error_msg = str(e)
113
+ if "401" in error_msg or "authentication" in error_msg.lower():
114
+ return f"Authentication error: Please check your HuggingFace token. Error: {error_msg}"
115
+ elif "access" in error_msg.lower() or "gated" in error_msg.lower():
116
+ return f"Access denied: You may need to request access to this model on HuggingFace. Error: {error_msg}"
117
+ else:
118
+ return f"Error loading model: {error_msg}"
119
 
120
  # Function to generate response using the model
121
  def generate_response(prompt, chat_history, progress=gr.Progress()):
 
259
  return f"Created new chat: {chat_name}"
260
  return "Please enter a unique chat name"
261
 
262
+ # MODIFICATION 3: Add function to set HuggingFace token
263
+ def set_hf_token(token):
264
+ global HF_TOKEN
265
+ if token and token.strip():
266
+ HF_TOKEN = token.strip()
267
+ return "HuggingFace token saved successfully!"
268
+ else:
269
+ return "Please enter a valid HuggingFace token."
270
+
271
  # Function to handle file upload and analysis
272
  def analyze_uploaded_file(file):
273
  global FILE_DATA, ANALYZED_DATA, CHATS, CURRENT_CHAT
 
544
  return f"Cleared chat: {CURRENT_CHAT}"
545
 
546
  # Function to load model and return status
547
+ ddef load_model_button():
548
+ global HF_TOKEN
549
+ if not HF_TOKEN:
550
+ return "Please enter your HuggingFace token first before loading the model."
551
  if MODEL_LOADED:
552
  return "Model is already loaded and ready!"
553
  elif MODEL_LOADING:
 
663
  clear_chat_btn = gr.Button("Clear Current Chat", variant="secondary")
664
 
665
  with gr.Column(scale=1):
666
+ # HuggingFace Token Input
667
+ gr.Markdown("### HuggingFace Authentication")
668
+ hf_token_input = gr.Textbox(
669
+ label="HuggingFace Access Token",
670
+ placeholder="Enter your HF token (hf_xxx...)",
671
+ type="password",
672
+ info="Required to download the Mistral-7B model"
673
+ )
674
+ set_token_btn = gr.Button("Set Token", variant="secondary")
675
+ token_status = gr.Textbox(
676
+ label="Token Status",
677
+ value="No token set",
678
+ interactive=False,
679
+ lines=1
680
+ )
681
+
682
  # Model Loading and Settings
683
+ gr.Markdown("### Model Loading")
684
  with gr.Row():
685
  load_model_btn = gr.Button("Load Mistral-7B Model", variant="primary")
686
  use_fallback_btn = gr.Button("Use Simple JSON Mode", variant="secondary")
 
964
  api_name="clear_chat"
965
  )
966
 
967
+ set_token_btn.click(
968
+ set_hf_token,
969
+ inputs=hf_token_input,
970
+ outputs=token_status,
971
+ api_name="set_token"
972
+ )
973
+
974
  # Initialize empty chatbot
975
  chatbot.value = []
976