lowvoltagenation commited on
Commit
fca6913
·
1 Parent(s): 4e11a03

Fix HF Spaces auth: Use inference API for Maya model to avoid gated base model download

Browse files
Files changed (2) hide show
  1. src/app.py +1 -1
  2. src/model_interface.py +31 -7
src/app.py CHANGED
@@ -101,7 +101,7 @@ class MayaGradioApp:
101
  model_type = model_config.get("type", "local")
102
 
103
  if self.current_model == "blakeurmos/maya-7b-lora-v1":
104
- # For Maya LoRA, use simple conversational format since it's already fine-tuned
105
  full_prompt = ""
106
 
107
  # Add only recent conversation history (keep it simple)
 
101
  model_type = model_config.get("type", "local")
102
 
103
  if self.current_model == "blakeurmos/maya-7b-lora-v1":
104
+ # For Maya model, use simple conversational format since it's already fine-tuned
105
  full_prompt = ""
106
 
107
  # Add only recent conversation history (keep it simple)
src/model_interface.py CHANGED
@@ -46,13 +46,12 @@ class ModelInterface:
46
 
47
  # Define available models (optimized for HuggingFace Spaces)
48
  self.available_models = {
49
- # Maya's fine-tuned LoRA model
50
  "blakeurmos/maya-7b-lora-v1": {
51
  "name": "Maya 7B (Fine-tuned)",
52
  "description": "Maya's personality fine-tuned on Mistral-7B",
53
  "size": "LoRA (~14MB + base model)",
54
- "type": "lora",
55
- "base_model": "mistralai/Mistral-7B-Instruct-v0.3",
56
  "requires_auth": True
57
  },
58
  # Latest Mistral instruction model
@@ -101,8 +100,19 @@ class ModelInterface:
101
  # For inference API, just create a pipeline
102
  logger.info(f"Setting up inference API pipeline for {model_id}")
103
 
104
- # Use auth token if available
105
- auth_token = os.getenv("HUGGINGFACE_API_TOKEN") if use_auth_token else None
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  pipe = pipeline(
108
  "text-generation",
@@ -183,8 +193,19 @@ class ModelInterface:
183
  logger.error(f"No base model specified for LoRA {model_id}")
184
  return False
185
 
186
- # Use auth token if available
187
- auth_token = os.getenv("HUGGINGFACE_API_TOKEN") if use_auth_token else None
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  # Load base model first
190
  logger.info(f"Loading base model {base_model_id}...")
@@ -297,6 +318,9 @@ class ModelInterface:
297
  formatted_prompt = f"<s>[INST] {prompt} [/INST]"
298
  else:
299
  formatted_prompt = prompt
 
 
 
300
  else:
301
  formatted_prompt = prompt
302
 
 
46
 
47
  # Define available models (optimized for HuggingFace Spaces)
48
  self.available_models = {
49
+ # Maya's fine-tuned LoRA model (use inference API on HF Spaces)
50
  "blakeurmos/maya-7b-lora-v1": {
51
  "name": "Maya 7B (Fine-tuned)",
52
  "description": "Maya's personality fine-tuned on Mistral-7B",
53
  "size": "LoRA (~14MB + base model)",
54
+ "type": "inference_api", # Use inference API to avoid gated model issues
 
55
  "requires_auth": True
56
  },
57
  # Latest Mistral instruction model
 
100
  # For inference API, just create a pipeline
101
  logger.info(f"Setting up inference API pipeline for {model_id}")
102
 
103
+ # Use auth token if available - check multiple possible env vars
104
+ auth_token = None
105
+ if use_auth_token:
106
+ auth_token = (
107
+ os.getenv("HUGGINGFACE_API_TOKEN") or
108
+ os.getenv("HF_TOKEN") or
109
+ os.getenv("HUGGINGFACE_TOKEN") or
110
+ os.getenv("HF_API_TOKEN")
111
+ )
112
+ if auth_token:
113
+ logger.info("Using HuggingFace authentication token")
114
+ else:
115
+ logger.warning("Auth requested but no HF token found in environment")
116
 
117
  pipe = pipeline(
118
  "text-generation",
 
193
  logger.error(f"No base model specified for LoRA {model_id}")
194
  return False
195
 
196
+ # Use auth token if available - check multiple possible env vars
197
+ auth_token = None
198
+ if use_auth_token:
199
+ auth_token = (
200
+ os.getenv("HUGGINGFACE_API_TOKEN") or
201
+ os.getenv("HF_TOKEN") or
202
+ os.getenv("HUGGINGFACE_TOKEN") or
203
+ os.getenv("HF_API_TOKEN")
204
+ )
205
+ if auth_token:
206
+ logger.info("Using HuggingFace authentication token")
207
+ else:
208
+ logger.warning("Auth requested but no HF token found in environment")
209
 
210
  # Load base model first
211
  logger.info(f"Loading base model {base_model_id}...")
 
318
  formatted_prompt = f"<s>[INST] {prompt} [/INST]"
319
  else:
320
  formatted_prompt = prompt
321
+ elif target_model == "blakeurmos/maya-7b-lora-v1":
322
+ # Maya model always needs Mistral format (even via inference API)
323
+ formatted_prompt = f"<s>[INST] {prompt} [/INST]"
324
  else:
325
  formatted_prompt = prompt
326