Spaces:

blakeurmos
/

mayahq

Sleeping

App Files Files Community

lowvoltagenation commited on Jul 15, 2025

Commit

fca6913

1 Parent(s): 4e11a03

Fix HF Spaces auth: Use inference API for Maya model to avoid gated base model download

Browse files

Files changed (2) hide show

src/app.py +1 -1
src/model_interface.py +31 -7

src/app.py CHANGED Viewed

@@ -101,7 +101,7 @@ class MayaGradioApp:
             model_type = model_config.get("type", "local")
             if self.current_model == "blakeurmos/maya-7b-lora-v1":
-                # For Maya LoRA, use simple conversational format since it's already fine-tuned
                 full_prompt = ""
                 # Add only recent conversation history (keep it simple)

             model_type = model_config.get("type", "local")
             if self.current_model == "blakeurmos/maya-7b-lora-v1":
+                # For Maya model, use simple conversational format since it's already fine-tuned
                 full_prompt = ""
                 # Add only recent conversation history (keep it simple)

src/model_interface.py CHANGED Viewed

@@ -46,13 +46,12 @@ class ModelInterface:
         # Define available models (optimized for HuggingFace Spaces)
         self.available_models = {
-            # Maya's fine-tuned LoRA model
             "blakeurmos/maya-7b-lora-v1": {
                 "name": "Maya 7B (Fine-tuned)",
                 "description": "Maya's personality fine-tuned on Mistral-7B",
                 "size": "LoRA (~14MB + base model)",
-                "type": "lora",
-                "base_model": "mistralai/Mistral-7B-Instruct-v0.3",
                 "requires_auth": True
             },
             # Latest Mistral instruction model
@@ -101,8 +100,19 @@ class ModelInterface:
                 # For inference API, just create a pipeline
                 logger.info(f"Setting up inference API pipeline for {model_id}")
-                # Use auth token if available
-                auth_token = os.getenv("HUGGINGFACE_API_TOKEN") if use_auth_token else None
                 pipe = pipeline(
                     "text-generation",
@@ -183,8 +193,19 @@ class ModelInterface:
                     logger.error(f"No base model specified for LoRA {model_id}")
                     return False
-                # Use auth token if available
-                auth_token = os.getenv("HUGGINGFACE_API_TOKEN") if use_auth_token else None
                 # Load base model first
                 logger.info(f"Loading base model {base_model_id}...")
@@ -297,6 +318,9 @@ class ModelInterface:
                     formatted_prompt = f"<s>[INST] {prompt} [/INST]"
                 else:
                     formatted_prompt = prompt
             else:
                 formatted_prompt = prompt

         # Define available models (optimized for HuggingFace Spaces)
         self.available_models = {
+            # Maya's fine-tuned LoRA model (use inference API on HF Spaces)
             "blakeurmos/maya-7b-lora-v1": {
                 "name": "Maya 7B (Fine-tuned)",
                 "description": "Maya's personality fine-tuned on Mistral-7B",
                 "size": "LoRA (~14MB + base model)",
+                "type": "inference_api",  # Use inference API to avoid gated model issues
                 "requires_auth": True
             },
             # Latest Mistral instruction model
                 # For inference API, just create a pipeline
                 logger.info(f"Setting up inference API pipeline for {model_id}")
+                # Use auth token if available - check multiple possible env vars
+                auth_token = None
+                if use_auth_token:
+                    auth_token = (
+                        os.getenv("HUGGINGFACE_API_TOKEN") or
+                        os.getenv("HF_TOKEN") or
+                        os.getenv("HUGGINGFACE_TOKEN") or
+                        os.getenv("HF_API_TOKEN")
+                    )
+                    if auth_token:
+                        logger.info("Using HuggingFace authentication token")
+                    else:
+                        logger.warning("Auth requested but no HF token found in environment")
                 pipe = pipeline(
                     "text-generation",
                     logger.error(f"No base model specified for LoRA {model_id}")
                     return False
+                # Use auth token if available - check multiple possible env vars
+                auth_token = None
+                if use_auth_token:
+                    auth_token = (
+                        os.getenv("HUGGINGFACE_API_TOKEN") or
+                        os.getenv("HF_TOKEN") or
+                        os.getenv("HUGGINGFACE_TOKEN") or
+                        os.getenv("HF_API_TOKEN")
+                    )
+                    if auth_token:
+                        logger.info("Using HuggingFace authentication token")
+                    else:
+                        logger.warning("Auth requested but no HF token found in environment")
                 # Load base model first
                 logger.info(f"Loading base model {base_model_id}...")
                     formatted_prompt = f"<s>[INST] {prompt} [/INST]"
                 else:
                     formatted_prompt = prompt
+            elif target_model == "blakeurmos/maya-7b-lora-v1":
+                # Maya model always needs Mistral format (even via inference API)
+                formatted_prompt = f"<s>[INST] {prompt} [/INST]"
             else:
                 formatted_prompt = prompt