Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
import threading
|
| 4 |
import time
|
| 5 |
from pathlib import Path
|
| 6 |
-
from huggingface_hub import hf_hub_download,login
|
| 7 |
|
| 8 |
# Try to import llama-cpp-python, fallback to instructions if not available
|
| 9 |
try:
|
|
@@ -23,9 +23,9 @@ HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
|
|
| 23 |
|
| 24 |
hf_token = os.environ.get("HF_TOKEN")
|
| 25 |
|
| 26 |
-
|
|
|
|
| 27 |
|
| 28 |
-
login(token=hf_token)
|
| 29 |
def find_gguf_file(directory="."):
|
| 30 |
"""Find GGUF files in the specified directory"""
|
| 31 |
gguf_files = []
|
|
@@ -35,6 +35,19 @@ def find_gguf_file(directory="."):
|
|
| 35 |
gguf_files.append(os.path.join(root, file))
|
| 36 |
return gguf_files
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def download_model_from_hf(repo_id=HF_REPO_ID, filename=HF_FILENAME):
|
| 39 |
"""Download GGUF model from HuggingFace Hub"""
|
| 40 |
try:
|
|
@@ -76,7 +89,7 @@ def get_optimal_settings():
|
|
| 76 |
|
| 77 |
return n_threads, n_gpu_layers
|
| 78 |
|
| 79 |
-
def load_model_from_gguf(gguf_path=None, n_ctx=2048, use_hf_download=True):
|
| 80 |
"""Load the model from a GGUF file with automatic optimization"""
|
| 81 |
global model, model_loaded
|
| 82 |
|
|
@@ -87,8 +100,10 @@ def load_model_from_gguf(gguf_path=None, n_ctx=2048, use_hf_download=True):
|
|
| 87 |
# If no path provided, try different approaches
|
| 88 |
if gguf_path is None:
|
| 89 |
if use_hf_download:
|
|
|
|
|
|
|
| 90 |
# Try to download from HuggingFace first
|
| 91 |
-
gguf_path, error = download_model_from_hf()
|
| 92 |
if error:
|
| 93 |
return False, f"❌ Failed to download from HuggingFace: {error}"
|
| 94 |
else:
|
|
@@ -123,8 +138,9 @@ def load_model_from_gguf(gguf_path=None, n_ctx=2048, use_hf_download=True):
|
|
| 123 |
)
|
| 124 |
|
| 125 |
model_loaded = True
|
|
|
|
| 126 |
print("Model loaded successfully!")
|
| 127 |
-
return True, f"✅ Model loaded successfully
|
| 128 |
|
| 129 |
except Exception as e:
|
| 130 |
model_loaded = False
|
|
@@ -195,11 +211,25 @@ def clear_chat():
|
|
| 195 |
"""Clear the chat history"""
|
| 196 |
return [], ""
|
| 197 |
|
| 198 |
-
def load_model_interface(context_size,
|
| 199 |
"""Interface function to load model with configurable context size"""
|
| 200 |
-
success, message = load_model_from_gguf(gguf_path=None, n_ctx=int(context_size), use_hf_download=
|
| 201 |
return message
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
def get_available_gguf_files():
|
| 204 |
"""Get list of available GGUF files"""
|
| 205 |
gguf_files = find_gguf_file()
|
|
@@ -220,6 +250,15 @@ def create_interface():
|
|
| 220 |
# Check for available models
|
| 221 |
availability_status = check_model_availability()
|
| 222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
with gr.Blocks(title="MMed-Llama-Alpaca GGUF Chatbot", theme=gr.themes.Soft()) as demo:
|
| 224 |
gr.HTML("""
|
| 225 |
<h1 style="text-align: center; color: #2E86AB; margin-bottom: 30px;">
|
|
@@ -256,15 +295,16 @@ def create_interface():
|
|
| 256 |
# Model loading section
|
| 257 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
| 258 |
|
| 259 |
-
#
|
| 260 |
-
use_hf_download = gr.Checkbox(
|
| 261 |
-
value=True,
|
| 262 |
-
label="Download from HuggingFace",
|
| 263 |
-
info="Uncheck to use local GGUF files"
|
| 264 |
-
)
|
| 265 |
|
| 266 |
-
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
| 270 |
model_status = gr.Textbox(
|
|
@@ -346,7 +386,7 @@ def create_interface():
|
|
| 346 |
# Event handlers
|
| 347 |
load_btn.click(
|
| 348 |
load_model_interface,
|
| 349 |
-
inputs=[context_size,
|
| 350 |
outputs=model_status
|
| 351 |
)
|
| 352 |
|
|
|
|
| 3 |
import threading
|
| 4 |
import time
|
| 5 |
from pathlib import Path
|
| 6 |
+
from huggingface_hub import hf_hub_download, login, list_repo_files
|
| 7 |
|
| 8 |
# Try to import llama-cpp-python, fallback to instructions if not available
|
| 9 |
try:
|
|
|
|
| 23 |
|
| 24 |
hf_token = os.environ.get("HF_TOKEN")
|
| 25 |
|
| 26 |
+
if hf_token:
|
| 27 |
+
login(token=hf_token)
|
| 28 |
|
|
|
|
| 29 |
def find_gguf_file(directory="."):
|
| 30 |
"""Find GGUF files in the specified directory"""
|
| 31 |
gguf_files = []
|
|
|
|
| 35 |
gguf_files.append(os.path.join(root, file))
|
| 36 |
return gguf_files
|
| 37 |
|
| 38 |
+
def get_repo_gguf_files(repo_id=HF_REPO_ID):
|
| 39 |
+
"""Get all GGUF files from the HuggingFace repository"""
|
| 40 |
+
try:
|
| 41 |
+
print(f"Fetching file list from {repo_id}...")
|
| 42 |
+
files = list_repo_files(repo_id=repo_id, token=hf_token)
|
| 43 |
+
gguf_files = [f for f in files if f.endswith('.gguf')]
|
| 44 |
+
print(f"Found {len(gguf_files)} GGUF files in repository")
|
| 45 |
+
return gguf_files, None
|
| 46 |
+
except Exception as e:
|
| 47 |
+
error_msg = f"Error fetching repository files: {str(e)}"
|
| 48 |
+
print(error_msg)
|
| 49 |
+
return [], error_msg
|
| 50 |
+
|
| 51 |
def download_model_from_hf(repo_id=HF_REPO_ID, filename=HF_FILENAME):
|
| 52 |
"""Download GGUF model from HuggingFace Hub"""
|
| 53 |
try:
|
|
|
|
| 89 |
|
| 90 |
return n_threads, n_gpu_layers
|
| 91 |
|
| 92 |
+
def load_model_from_gguf(gguf_path=None, filename=None, n_ctx=2048, use_hf_download=True):
|
| 93 |
"""Load the model from a GGUF file with automatic optimization"""
|
| 94 |
global model, model_loaded
|
| 95 |
|
|
|
|
| 100 |
# If no path provided, try different approaches
|
| 101 |
if gguf_path is None:
|
| 102 |
if use_hf_download:
|
| 103 |
+
# Use the specified filename or default
|
| 104 |
+
selected_filename = filename if filename else HF_FILENAME
|
| 105 |
# Try to download from HuggingFace first
|
| 106 |
+
gguf_path, error = download_model_from_hf(filename=selected_filename)
|
| 107 |
if error:
|
| 108 |
return False, f"❌ Failed to download from HuggingFace: {error}"
|
| 109 |
else:
|
|
|
|
| 138 |
)
|
| 139 |
|
| 140 |
model_loaded = True
|
| 141 |
+
selected_filename = filename if filename else os.path.basename(gguf_path)
|
| 142 |
print("Model loaded successfully!")
|
| 143 |
+
return True, f"✅ Model loaded successfully: {selected_filename}\n📊 Context: {n_ctx} tokens\n🖥️ CPU Threads: {n_threads}\n🎮 GPU Layers: {n_gpu_layers}\n📦 Source: {HF_REPO_ID}"
|
| 144 |
|
| 145 |
except Exception as e:
|
| 146 |
model_loaded = False
|
|
|
|
| 211 |
"""Clear the chat history"""
|
| 212 |
return [], ""
|
| 213 |
|
| 214 |
+
def load_model_interface(context_size, selected_model):
|
| 215 |
"""Interface function to load model with configurable context size"""
|
| 216 |
+
success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
|
| 217 |
return message
|
| 218 |
|
| 219 |
+
def refresh_model_list():
|
| 220 |
+
"""Refresh the list of available GGUF models from the repository"""
|
| 221 |
+
gguf_files, error = get_repo_gguf_files()
|
| 222 |
+
if error:
|
| 223 |
+
return gr.Dropdown(choices=["Error loading models"], value="Error loading models")
|
| 224 |
+
|
| 225 |
+
if not gguf_files:
|
| 226 |
+
return gr.Dropdown(choices=["No GGUF files found"], value="No GGUF files found")
|
| 227 |
+
|
| 228 |
+
# Set default value to the original default file if it exists
|
| 229 |
+
default_value = HF_FILENAME if HF_FILENAME in gguf_files else gguf_files[0]
|
| 230 |
+
|
| 231 |
+
return gr.Dropdown(choices=gguf_files, value=default_value)
|
| 232 |
+
|
| 233 |
def get_available_gguf_files():
|
| 234 |
"""Get list of available GGUF files"""
|
| 235 |
gguf_files = find_gguf_file()
|
|
|
|
| 250 |
# Check for available models
|
| 251 |
availability_status = check_model_availability()
|
| 252 |
|
| 253 |
+
# Get initial list of GGUF files from repository
|
| 254 |
+
gguf_files, error = get_repo_gguf_files()
|
| 255 |
+
if error or not gguf_files:
|
| 256 |
+
initial_choices = ["Error loading models" if error else "No GGUF files found"]
|
| 257 |
+
initial_value = initial_choices[0]
|
| 258 |
+
else:
|
| 259 |
+
initial_choices = gguf_files
|
| 260 |
+
initial_value = HF_FILENAME if HF_FILENAME in gguf_files else gguf_files[0]
|
| 261 |
+
|
| 262 |
with gr.Blocks(title="MMed-Llama-Alpaca GGUF Chatbot", theme=gr.themes.Soft()) as demo:
|
| 263 |
gr.HTML("""
|
| 264 |
<h1 style="text-align: center; color: #2E86AB; margin-bottom: 30px;">
|
|
|
|
| 295 |
# Model loading section
|
| 296 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
| 297 |
|
| 298 |
+
# gr.HTML(f"<p style='font-size: 0.9em; color: #666;'><strong>Repository:</strong> {HF_REPO_ID}</p>")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
+
# Model selection dropdown
|
| 301 |
+
model_dropdown = gr.Dropdown(
|
| 302 |
+
choices=initial_choices,
|
| 303 |
+
value=initial_value,
|
| 304 |
+
label="Select GGUF Model",
|
| 305 |
+
info="Choose from available models in the repository",
|
| 306 |
+
interactive=True
|
| 307 |
+
)
|
| 308 |
|
| 309 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
| 310 |
model_status = gr.Textbox(
|
|
|
|
| 386 |
# Event handlers
|
| 387 |
load_btn.click(
|
| 388 |
load_model_interface,
|
| 389 |
+
inputs=[context_size, model_dropdown],
|
| 390 |
outputs=model_status
|
| 391 |
)
|
| 392 |
|