Spaces:

blakeurmos
/

mayahq

Sleeping

lowvoltagenation commited on Jul 15, 2025

Commit

2043924

1 Parent(s): ed31be4

Add support for LoRA model loading in ModelInterface

- Updated requirements.txt to include 'peft' library.
- Enhanced ModelInterface to load LoRA adapters with base models, including error handling and tokenizer setup.
- Integrated logging for model loading processes to improve feedback during operations.

Files changed (3) hide show

requirements.txt +1 -0
src/__pycache__/model_interface.cpython-313.pyc +0 -0
src/model_interface.py +55 -0

requirements.txt CHANGED Viewed

@@ -12,6 +12,7 @@ langchain-community>=0.0.10
 # HuggingFace Integration
 huggingface_hub>=0.18.0
 datasets>=2.14.0
 # Model Providers (Optional)
 anthropic>=0.5.0

 # HuggingFace Integration
 huggingface_hub>=0.18.0
 datasets>=2.14.0
+peft>=0.6.0
 # Model Providers (Optional)
 anthropic>=0.5.0

src/__pycache__/model_interface.cpython-313.pyc CHANGED Viewed

Binary files a/src/__pycache__/model_interface.cpython-313.pyc and b/src/__pycache__/model_interface.cpython-313.pyc differ

src/model_interface.py CHANGED Viewed

@@ -12,6 +12,7 @@ from transformers import (
     pipeline,
     BitsAndBytesConfig
 )
 import torch
 from huggingface_hub import HfApi
 import json
@@ -173,6 +174,60 @@ class ModelInterface:
                     "type": "local"
                 }
             else:
                 logger.error(f"Unknown model type: {model_type}")
                 return False

     pipeline,
     BitsAndBytesConfig
 )
+from peft import PeftModel
 import torch
 from huggingface_hub import HfApi
 import json
                     "type": "local"
                 }
+            elif model_type == "lora":
+                # Load LoRA adapter with base model
+                logger.info(f"Loading LoRA model {model_id}...")
+                base_model_id = model_config.get("base_model")
+                if not base_model_id:
+                    logger.error(f"No base model specified for LoRA {model_id}")
+                    return False
+                # Use auth token if available
+                auth_token = os.getenv("HUGGINGFACE_API_TOKEN") if use_auth_token else None
+                # Load base model first
+                logger.info(f"Loading base model {base_model_id}...")
+                base_model = AutoModelForCausalLM.from_pretrained(
+                    base_model_id,
+                    token=auth_token,
+                    torch_dtype=torch.float16,
+                    device_map="auto" if torch.cuda.is_available() else None,
+                    low_cpu_mem_usage=True
+                )
+                # Load LoRA adapter
+                logger.info(f"Loading LoRA adapter {model_id}...")
+                model = PeftModel.from_pretrained(base_model, model_id, token=auth_token)
+                # Load tokenizer (from base model)
+                tokenizer = AutoTokenizer.from_pretrained(
+                    base_model_id,
+                    token=auth_token,
+                    padding_side="left"
+                )
+                # Add pad token if missing
+                if tokenizer.pad_token is None:
+                    tokenizer.pad_token = tokenizer.eos_token
+                # Create pipeline
+                pipe = pipeline(
+                    "text-generation",
+                    model=model,
+                    tokenizer=tokenizer,
+                    device=0 if torch.cuda.is_available() else -1,
+                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+                )
+                self.models[model_id] = {
+                    "pipeline": pipe,
+                    "tokenizer": tokenizer,
+                    "model": model,
+                    "type": "lora",
+                    "base_model": base_model_id
+                }
             else:
                 logger.error(f"Unknown model type: {model_type}")
                 return False