Spaces:

sachiniyer
/

posttraining-practice

Sleeping

App Files Files Community

sachiniyer commited on Jan 15

Commit

7a10114

verified ·

1 Parent(s): 25cd75a

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

__pycache__/backend.cpython-312.pyc +0 -0
__pycache__/models.cpython-312.pyc +0 -0
backend.py +64 -31
deploy.py +22 -12
models.py +9 -0

__pycache__/backend.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/backend.cpython-312.pyc and b/__pycache__/backend.cpython-312.pyc differ

__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (471 Bytes). View file

backend.py CHANGED Viewed

@@ -1,63 +1,89 @@
 import os
 import modal
 from fastapi import Header
-MODEL_IDS = [
-    "sachiniyer/SmolLM2-DPO-Schwinn-SmolLM2-Base",
-    "sachiniyer/SmolLM2-DPO-Schwinn-gpt-5-mini-base",
-    "sachiniyer/Qwen2.5-0.5B-DPO-Schwinn",
-    "sachiniyer/SmolLM2-FT-SFT-Learning",
-    "sachiniyer/DeepSeek-R1-LoRA-Finetuned",
-    "sachiniyer/DeepSeek-R1-QLoRA-Finetuned",
-]
 image = (
     modal.Image.debian_slim(python_version="3.12")
-    .pip_install("torch", "transformers", "accelerate", "fastapi")
 )
 app = modal.App("posttraining-chat", image=image)
 @app.cls(
     gpu="T4",
     scaledown_window=60,
     secrets=[modal.Secret.from_dotenv()],
 )
 class Inference:
     @modal.enter()
-    def load_models(self):
         import torch
         from transformers import AutoModelForCausalLM, AutoTokenizer
-        self.models = {}
-        for model_id in MODEL_IDS:
-            print(f"Loading model: {model_id}")
             tokenizer = AutoTokenizer.from_pretrained(model_id)
             model = AutoModelForCausalLM.from_pretrained(
                 model_id,
                 torch_dtype=torch.float16,
                 device_map="auto",
             )
             self.models[model_id] = {"model": model, "tokenizer": tokenizer}
-            print(f"Loaded: {model_id}")
     @modal.fastapi_endpoint(method="POST")
     def generate(self, request: dict, x_api_key: str | None = Header(None)) -> dict:
         import torch
         expected_key = os.environ.get("MODEL_SITE_API_KEY")
         if not expected_key or x_api_key != expected_key:
             return {"error": "Unauthorized - invalid API key"}
         model_id = request.get("model_id", MODEL_IDS[0])
         message = request.get("message", "")
         history = request.get("history", [])
-        if model_id not in self.models:
             return {"error": f"Model {model_id} not found"}
         tokenizer = self.models[model_id]["tokenizer"]
         model = self.models[model_id]["model"]
@@ -66,19 +92,26 @@ class Inference:
             conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
         conversation += f"User: {message}\nAssistant:"
-        inputs = tokenizer(conversation, return_tensors="pt").to("cuda")
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=256,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9,
-                pad_token_id=tokenizer.eos_token_id,
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response.split("Assistant:")[-1].strip()
-        return {"response": response}

+import logging
 import os
 import modal
 from fastapi import Header
+from models import MODEL_IDS
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+CACHE_DIR = "/cache"
 image = (
     modal.Image.debian_slim(python_version="3.12")
+    .pip_install("torch", "transformers", "accelerate", "fastapi", "bitsandbytes")
+    .add_local_dir("site", "/root")
 )
 app = modal.App("posttraining-chat", image=image)
+cache_vol = modal.Volume.from_name("hf-cache", create_if_missing=True)
 @app.cls(
     gpu="T4",
     scaledown_window=60,
     secrets=[modal.Secret.from_dotenv()],
+    volumes={CACHE_DIR: cache_vol},
 )
 class Inference:
     @modal.enter()
+    def setup(self):
+        os.environ["HF_HOME"] = CACHE_DIR
+        self.models = {}
+    def load_model(self, model_id: str):
+        if model_id in self.models:
+            logger.info(f"Model already loaded: {model_id}")
+            return
         import torch
         from transformers import AutoModelForCausalLM, AutoTokenizer
+        logger.info(f"Loading model: {model_id}")
+        try:
             tokenizer = AutoTokenizer.from_pretrained(model_id)
+            logger.info(f"Tokenizer loaded for {model_id}")
             model = AutoModelForCausalLM.from_pretrained(
                 model_id,
                 torch_dtype=torch.float16,
                 device_map="auto",
             )
+            logger.info(f"Model loaded successfully: {model_id}")
             self.models[model_id] = {"model": model, "tokenizer": tokenizer}
+            cache_vol.commit()
+        except Exception as e:
+            logger.error(f"Failed to load model {model_id}: {e}")
+            raise
     @modal.fastapi_endpoint(method="POST")
     def generate(self, request: dict, x_api_key: str | None = Header(None)) -> dict:
         import torch
+        logger.info(
+            f"Received request: model_id={request.get('model_id')}, message_len={len(request.get('message', ''))}, history_len={len(request.get('history', []))}"
+        )
         expected_key = os.environ.get("MODEL_SITE_API_KEY")
         if not expected_key or x_api_key != expected_key:
+            logger.warning("Auth failed: invalid or missing API key")
             return {"error": "Unauthorized - invalid API key"}
         model_id = request.get("model_id", MODEL_IDS[0])
         message = request.get("message", "")
         history = request.get("history", [])
+        if model_id not in MODEL_IDS:
+            logger.warning(f"Model not found: {model_id}")
             return {"error": f"Model {model_id} not found"}
+        try:
+            self.load_model(model_id)
+        except Exception as e:
+            logger.error(f"Model loading failed: {e}")
+            return {"error": f"Failed to load model: {e}"}
         tokenizer = self.models[model_id]["tokenizer"]
         model = self.models[model_id]["model"]
             conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
         conversation += f"User: {message}\nAssistant:"
+        try:
+            inputs = tokenizer(conversation, return_tensors="pt").to("cuda")
+            logger.info(f"Tokenized input shape: {inputs['input_ids'].shape}")
+            with torch.no_grad():
+                outputs = model.generate(
+                    **inputs,
+                    max_new_tokens=256,
+                    do_sample=True,
+                    temperature=0.7,
+                    top_p=0.9,
+                    pad_token_id=tokenizer.eos_token_id,
+                )
+            logger.info(f"Generated output shape: {outputs.shape}")
+            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = response.split("Assistant:")[-1].strip()
+            logger.info(f"Final response length: {len(response)}")
+            return {"response": response}
+        except Exception as e:
+            logger.error(f"Inference failed: {e}", exc_info=True)
+            return {"error": f"Inference failed: {e}"}

deploy.py CHANGED Viewed

@@ -11,14 +11,19 @@ from huggingface_hub import HfApi
 load_dotenv()
 def main():
-    # Check required env vars
     api_key = os.environ.get("MODEL_SITE_API_KEY")
     site_password = os.environ.get("SITE_PASSWORD")
     if not api_key or not site_password:
         sys.exit("ERROR: MODEL_SITE_API_KEY and SITE_PASSWORD must be set in .env")
     # Deploy Modal backend
     print("Deploying Modal backend...")
     result = subprocess.run(
@@ -33,8 +38,7 @@ def main():
         sys.exit("ERROR: Could not find Modal endpoint URL")
     modal_endpoint = match.group(0)
-    # Generate requirements and deploy to HuggingFace
-    print("Deploying to HuggingFace Spaces (select 'cpu-basic')...")
     result = subprocess.run(
         ["uv", "export", "--group", "site", "--no-hashes", "--no-dev"],
         capture_output=True,
@@ -43,19 +47,25 @@ def main():
     with open("site/requirements.txt", "w") as f:
         f.write(result.stdout)
-    subprocess.run(
-        ["uv", "run", "--group", "site", "gradio", "deploy",
-         "--title", "posttraining-practice", "--app-file", "app.py"],
-        cwd="site",
     )
     os.remove("site/requirements.txt")
     # Set secrets
-    space_id = input("Space ID (e.g., sachiniyer/posttraining-practice): ").strip()
-    if not space_id:
-        sys.exit("ERROR: Space ID required")
-    api = HfApi()
     api.add_space_secret(repo_id=space_id, key="MODAL_ENDPOINT", value=modal_endpoint)
     api.add_space_secret(repo_id=space_id, key="MODEL_SITE_API_KEY", value=api_key)
     api.add_space_secret(repo_id=space_id, key="SITE_PASSWORD", value=site_password)

 load_dotenv()
+SPACE_TITLE = "posttraining-practice"
 def main():
     api_key = os.environ.get("MODEL_SITE_API_KEY")
     site_password = os.environ.get("SITE_PASSWORD")
     if not api_key or not site_password:
         sys.exit("ERROR: MODEL_SITE_API_KEY and SITE_PASSWORD must be set in .env")
+    api = HfApi()
+    user = api.whoami()["name"]
+    space_id = f"{user}/{SPACE_TITLE}"
     # Deploy Modal backend
     print("Deploying Modal backend...")
     result = subprocess.run(
         sys.exit("ERROR: Could not find Modal endpoint URL")
     modal_endpoint = match.group(0)
+    # Generate requirements.txt
     result = subprocess.run(
         ["uv", "export", "--group", "site", "--no-hashes", "--no-dev"],
         capture_output=True,
     with open("site/requirements.txt", "w") as f:
         f.write(result.stdout)
+    # Create/update HuggingFace Space
+    print(f"Deploying to HuggingFace Space {space_id}...")
+    api.create_repo(
+        repo_id=space_id,
+        repo_type="space",
+        space_sdk="gradio",
+        space_hardware="cpu-basic",
+        exist_ok=True,
+    )
+    api.upload_folder(
+        folder_path="site",
+        repo_id=space_id,
+        repo_type="space",
     )
     os.remove("site/requirements.txt")
     # Set secrets
+    print("Setting secrets...")
     api.add_space_secret(repo_id=space_id, key="MODAL_ENDPOINT", value=modal_endpoint)
     api.add_space_secret(repo_id=space_id, key="MODEL_SITE_API_KEY", value=api_key)
     api.add_space_secret(repo_id=space_id, key="SITE_PASSWORD", value=site_password)

models.py ADDED Viewed

	@@ -0,0 +1,9 @@

+MODEL_IDS = [
+    "sachiniyer/Qwen2.5-0.5B-DPO-Schwinn",
+    "sachiniyer/Qwen2.5-0.5B-PPO-Schwinn",
+    "sachiniyer/SmolLM2-DPO-Schwinn-SmolLM2-Base",
+    "sachiniyer/SmolLM2-DPO-Schwinn-gpt-5-mini-base",
+    "sachiniyer/SmolLM2-FT-SFT-Learning",
+    "sachiniyer/DeepSeek-R1-LoRA-Finetuned",
+    "sachiniyer/DeepSeek-R1-QLoRA-Finetuned",
+]