Spaces:
Sleeping
Sleeping
lowvoltagenation commited on
Commit ·
fca6913
1
Parent(s): 4e11a03
Fix HF Spaces auth: Use inference API for Maya model to avoid gated base model download
Browse files- src/app.py +1 -1
- src/model_interface.py +31 -7
src/app.py
CHANGED
|
@@ -101,7 +101,7 @@ class MayaGradioApp:
|
|
| 101 |
model_type = model_config.get("type", "local")
|
| 102 |
|
| 103 |
if self.current_model == "blakeurmos/maya-7b-lora-v1":
|
| 104 |
-
# For Maya
|
| 105 |
full_prompt = ""
|
| 106 |
|
| 107 |
# Add only recent conversation history (keep it simple)
|
|
|
|
| 101 |
model_type = model_config.get("type", "local")
|
| 102 |
|
| 103 |
if self.current_model == "blakeurmos/maya-7b-lora-v1":
|
| 104 |
+
# For Maya model, use simple conversational format since it's already fine-tuned
|
| 105 |
full_prompt = ""
|
| 106 |
|
| 107 |
# Add only recent conversation history (keep it simple)
|
src/model_interface.py
CHANGED
|
@@ -46,13 +46,12 @@ class ModelInterface:
|
|
| 46 |
|
| 47 |
# Define available models (optimized for HuggingFace Spaces)
|
| 48 |
self.available_models = {
|
| 49 |
-
# Maya's fine-tuned LoRA model
|
| 50 |
"blakeurmos/maya-7b-lora-v1": {
|
| 51 |
"name": "Maya 7B (Fine-tuned)",
|
| 52 |
"description": "Maya's personality fine-tuned on Mistral-7B",
|
| 53 |
"size": "LoRA (~14MB + base model)",
|
| 54 |
-
"type": "
|
| 55 |
-
"base_model": "mistralai/Mistral-7B-Instruct-v0.3",
|
| 56 |
"requires_auth": True
|
| 57 |
},
|
| 58 |
# Latest Mistral instruction model
|
|
@@ -101,8 +100,19 @@ class ModelInterface:
|
|
| 101 |
# For inference API, just create a pipeline
|
| 102 |
logger.info(f"Setting up inference API pipeline for {model_id}")
|
| 103 |
|
| 104 |
-
# Use auth token if available
|
| 105 |
-
auth_token =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
pipe = pipeline(
|
| 108 |
"text-generation",
|
|
@@ -183,8 +193,19 @@ class ModelInterface:
|
|
| 183 |
logger.error(f"No base model specified for LoRA {model_id}")
|
| 184 |
return False
|
| 185 |
|
| 186 |
-
# Use auth token if available
|
| 187 |
-
auth_token =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
# Load base model first
|
| 190 |
logger.info(f"Loading base model {base_model_id}...")
|
|
@@ -297,6 +318,9 @@ class ModelInterface:
|
|
| 297 |
formatted_prompt = f"<s>[INST] {prompt} [/INST]"
|
| 298 |
else:
|
| 299 |
formatted_prompt = prompt
|
|
|
|
|
|
|
|
|
|
| 300 |
else:
|
| 301 |
formatted_prompt = prompt
|
| 302 |
|
|
|
|
| 46 |
|
| 47 |
# Define available models (optimized for HuggingFace Spaces)
|
| 48 |
self.available_models = {
|
| 49 |
+
# Maya's fine-tuned LoRA model (use inference API on HF Spaces)
|
| 50 |
"blakeurmos/maya-7b-lora-v1": {
|
| 51 |
"name": "Maya 7B (Fine-tuned)",
|
| 52 |
"description": "Maya's personality fine-tuned on Mistral-7B",
|
| 53 |
"size": "LoRA (~14MB + base model)",
|
| 54 |
+
"type": "inference_api", # Use inference API to avoid gated model issues
|
|
|
|
| 55 |
"requires_auth": True
|
| 56 |
},
|
| 57 |
# Latest Mistral instruction model
|
|
|
|
| 100 |
# For inference API, just create a pipeline
|
| 101 |
logger.info(f"Setting up inference API pipeline for {model_id}")
|
| 102 |
|
| 103 |
+
# Use auth token if available - check multiple possible env vars
|
| 104 |
+
auth_token = None
|
| 105 |
+
if use_auth_token:
|
| 106 |
+
auth_token = (
|
| 107 |
+
os.getenv("HUGGINGFACE_API_TOKEN") or
|
| 108 |
+
os.getenv("HF_TOKEN") or
|
| 109 |
+
os.getenv("HUGGINGFACE_TOKEN") or
|
| 110 |
+
os.getenv("HF_API_TOKEN")
|
| 111 |
+
)
|
| 112 |
+
if auth_token:
|
| 113 |
+
logger.info("Using HuggingFace authentication token")
|
| 114 |
+
else:
|
| 115 |
+
logger.warning("Auth requested but no HF token found in environment")
|
| 116 |
|
| 117 |
pipe = pipeline(
|
| 118 |
"text-generation",
|
|
|
|
| 193 |
logger.error(f"No base model specified for LoRA {model_id}")
|
| 194 |
return False
|
| 195 |
|
| 196 |
+
# Use auth token if available - check multiple possible env vars
|
| 197 |
+
auth_token = None
|
| 198 |
+
if use_auth_token:
|
| 199 |
+
auth_token = (
|
| 200 |
+
os.getenv("HUGGINGFACE_API_TOKEN") or
|
| 201 |
+
os.getenv("HF_TOKEN") or
|
| 202 |
+
os.getenv("HUGGINGFACE_TOKEN") or
|
| 203 |
+
os.getenv("HF_API_TOKEN")
|
| 204 |
+
)
|
| 205 |
+
if auth_token:
|
| 206 |
+
logger.info("Using HuggingFace authentication token")
|
| 207 |
+
else:
|
| 208 |
+
logger.warning("Auth requested but no HF token found in environment")
|
| 209 |
|
| 210 |
# Load base model first
|
| 211 |
logger.info(f"Loading base model {base_model_id}...")
|
|
|
|
| 318 |
formatted_prompt = f"<s>[INST] {prompt} [/INST]"
|
| 319 |
else:
|
| 320 |
formatted_prompt = prompt
|
| 321 |
+
elif target_model == "blakeurmos/maya-7b-lora-v1":
|
| 322 |
+
# Maya model always needs Mistral format (even via inference API)
|
| 323 |
+
formatted_prompt = f"<s>[INST] {prompt} [/INST]"
|
| 324 |
else:
|
| 325 |
formatted_prompt = prompt
|
| 326 |
|