sachiniyer commited on
Commit
7a10114
·
verified ·
1 Parent(s): 25cd75a

Upload folder using huggingface_hub

Browse files
__pycache__/backend.cpython-312.pyc CHANGED
Binary files a/__pycache__/backend.cpython-312.pyc and b/__pycache__/backend.cpython-312.pyc differ
 
__pycache__/models.cpython-312.pyc ADDED
Binary file (471 Bytes). View file
 
backend.py CHANGED
@@ -1,63 +1,89 @@
 
1
  import os
2
 
3
  import modal
4
  from fastapi import Header
5
 
6
- MODEL_IDS = [
7
- "sachiniyer/SmolLM2-DPO-Schwinn-SmolLM2-Base",
8
- "sachiniyer/SmolLM2-DPO-Schwinn-gpt-5-mini-base",
9
- "sachiniyer/Qwen2.5-0.5B-DPO-Schwinn",
10
- "sachiniyer/SmolLM2-FT-SFT-Learning",
11
- "sachiniyer/DeepSeek-R1-LoRA-Finetuned",
12
- "sachiniyer/DeepSeek-R1-QLoRA-Finetuned",
13
- ]
14
 
15
  image = (
16
  modal.Image.debian_slim(python_version="3.12")
17
- .pip_install("torch", "transformers", "accelerate", "fastapi")
 
18
  )
19
 
20
  app = modal.App("posttraining-chat", image=image)
 
21
 
22
 
23
  @app.cls(
24
  gpu="T4",
25
  scaledown_window=60,
26
  secrets=[modal.Secret.from_dotenv()],
 
27
  )
28
  class Inference:
29
  @modal.enter()
30
- def load_models(self):
 
 
 
 
 
 
 
 
31
  import torch
32
  from transformers import AutoModelForCausalLM, AutoTokenizer
33
 
34
- self.models = {}
35
- for model_id in MODEL_IDS:
36
- print(f"Loading model: {model_id}")
37
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
38
  model = AutoModelForCausalLM.from_pretrained(
39
  model_id,
40
  torch_dtype=torch.float16,
41
  device_map="auto",
42
  )
 
43
  self.models[model_id] = {"model": model, "tokenizer": tokenizer}
44
- print(f"Loaded: {model_id}")
 
 
 
45
 
46
  @modal.fastapi_endpoint(method="POST")
47
  def generate(self, request: dict, x_api_key: str | None = Header(None)) -> dict:
48
  import torch
49
 
 
 
 
 
50
  expected_key = os.environ.get("MODEL_SITE_API_KEY")
51
  if not expected_key or x_api_key != expected_key:
 
52
  return {"error": "Unauthorized - invalid API key"}
53
 
54
  model_id = request.get("model_id", MODEL_IDS[0])
55
  message = request.get("message", "")
56
  history = request.get("history", [])
57
 
58
- if model_id not in self.models:
 
59
  return {"error": f"Model {model_id} not found"}
60
 
 
 
 
 
 
 
61
  tokenizer = self.models[model_id]["tokenizer"]
62
  model = self.models[model_id]["model"]
63
 
@@ -66,19 +92,26 @@ class Inference:
66
  conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
67
  conversation += f"User: {message}\nAssistant:"
68
 
69
- inputs = tokenizer(conversation, return_tensors="pt").to("cuda")
70
-
71
- with torch.no_grad():
72
- outputs = model.generate(
73
- **inputs,
74
- max_new_tokens=256,
75
- do_sample=True,
76
- temperature=0.7,
77
- top_p=0.9,
78
- pad_token_id=tokenizer.eos_token_id,
79
- )
80
-
81
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
82
- response = response.split("Assistant:")[-1].strip()
83
-
84
- return {"response": response}
 
 
 
 
 
 
 
 
1
+ import logging
2
  import os
3
 
4
  import modal
5
  from fastapi import Header
6
 
7
+ from models import MODEL_IDS
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ CACHE_DIR = "/cache"
 
 
13
 
14
  image = (
15
  modal.Image.debian_slim(python_version="3.12")
16
+ .pip_install("torch", "transformers", "accelerate", "fastapi", "bitsandbytes")
17
+ .add_local_dir("site", "/root")
18
  )
19
 
20
  app = modal.App("posttraining-chat", image=image)
21
+ cache_vol = modal.Volume.from_name("hf-cache", create_if_missing=True)
22
 
23
 
24
  @app.cls(
25
  gpu="T4",
26
  scaledown_window=60,
27
  secrets=[modal.Secret.from_dotenv()],
28
+ volumes={CACHE_DIR: cache_vol},
29
  )
30
  class Inference:
31
  @modal.enter()
32
+ def setup(self):
33
+ os.environ["HF_HOME"] = CACHE_DIR
34
+ self.models = {}
35
+
36
+ def load_model(self, model_id: str):
37
+ if model_id in self.models:
38
+ logger.info(f"Model already loaded: {model_id}")
39
+ return
40
+
41
  import torch
42
  from transformers import AutoModelForCausalLM, AutoTokenizer
43
 
44
+ logger.info(f"Loading model: {model_id}")
45
+ try:
 
46
  tokenizer = AutoTokenizer.from_pretrained(model_id)
47
+ logger.info(f"Tokenizer loaded for {model_id}")
48
  model = AutoModelForCausalLM.from_pretrained(
49
  model_id,
50
  torch_dtype=torch.float16,
51
  device_map="auto",
52
  )
53
+ logger.info(f"Model loaded successfully: {model_id}")
54
  self.models[model_id] = {"model": model, "tokenizer": tokenizer}
55
+ cache_vol.commit()
56
+ except Exception as e:
57
+ logger.error(f"Failed to load model {model_id}: {e}")
58
+ raise
59
 
60
  @modal.fastapi_endpoint(method="POST")
61
  def generate(self, request: dict, x_api_key: str | None = Header(None)) -> dict:
62
  import torch
63
 
64
+ logger.info(
65
+ f"Received request: model_id={request.get('model_id')}, message_len={len(request.get('message', ''))}, history_len={len(request.get('history', []))}"
66
+ )
67
+
68
  expected_key = os.environ.get("MODEL_SITE_API_KEY")
69
  if not expected_key or x_api_key != expected_key:
70
+ logger.warning("Auth failed: invalid or missing API key")
71
  return {"error": "Unauthorized - invalid API key"}
72
 
73
  model_id = request.get("model_id", MODEL_IDS[0])
74
  message = request.get("message", "")
75
  history = request.get("history", [])
76
 
77
+ if model_id not in MODEL_IDS:
78
+ logger.warning(f"Model not found: {model_id}")
79
  return {"error": f"Model {model_id} not found"}
80
 
81
+ try:
82
+ self.load_model(model_id)
83
+ except Exception as e:
84
+ logger.error(f"Model loading failed: {e}")
85
+ return {"error": f"Failed to load model: {e}"}
86
+
87
  tokenizer = self.models[model_id]["tokenizer"]
88
  model = self.models[model_id]["model"]
89
 
 
92
  conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
93
  conversation += f"User: {message}\nAssistant:"
94
 
95
+ try:
96
+ inputs = tokenizer(conversation, return_tensors="pt").to("cuda")
97
+ logger.info(f"Tokenized input shape: {inputs['input_ids'].shape}")
98
+
99
+ with torch.no_grad():
100
+ outputs = model.generate(
101
+ **inputs,
102
+ max_new_tokens=256,
103
+ do_sample=True,
104
+ temperature=0.7,
105
+ top_p=0.9,
106
+ pad_token_id=tokenizer.eos_token_id,
107
+ )
108
+ logger.info(f"Generated output shape: {outputs.shape}")
109
+
110
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
111
+ response = response.split("Assistant:")[-1].strip()
112
+ logger.info(f"Final response length: {len(response)}")
113
+
114
+ return {"response": response}
115
+ except Exception as e:
116
+ logger.error(f"Inference failed: {e}", exc_info=True)
117
+ return {"error": f"Inference failed: {e}"}
deploy.py CHANGED
@@ -11,14 +11,19 @@ from huggingface_hub import HfApi
11
 
12
  load_dotenv()
13
 
 
 
14
 
15
  def main():
16
- # Check required env vars
17
  api_key = os.environ.get("MODEL_SITE_API_KEY")
18
  site_password = os.environ.get("SITE_PASSWORD")
19
  if not api_key or not site_password:
20
  sys.exit("ERROR: MODEL_SITE_API_KEY and SITE_PASSWORD must be set in .env")
21
 
 
 
 
 
22
  # Deploy Modal backend
23
  print("Deploying Modal backend...")
24
  result = subprocess.run(
@@ -33,8 +38,7 @@ def main():
33
  sys.exit("ERROR: Could not find Modal endpoint URL")
34
  modal_endpoint = match.group(0)
35
 
36
- # Generate requirements and deploy to HuggingFace
37
- print("Deploying to HuggingFace Spaces (select 'cpu-basic')...")
38
  result = subprocess.run(
39
  ["uv", "export", "--group", "site", "--no-hashes", "--no-dev"],
40
  capture_output=True,
@@ -43,19 +47,25 @@ def main():
43
  with open("site/requirements.txt", "w") as f:
44
  f.write(result.stdout)
45
 
46
- subprocess.run(
47
- ["uv", "run", "--group", "site", "gradio", "deploy",
48
- "--title", "posttraining-practice", "--app-file", "app.py"],
49
- cwd="site",
 
 
 
 
 
 
 
 
 
 
50
  )
51
  os.remove("site/requirements.txt")
52
 
53
  # Set secrets
54
- space_id = input("Space ID (e.g., sachiniyer/posttraining-practice): ").strip()
55
- if not space_id:
56
- sys.exit("ERROR: Space ID required")
57
-
58
- api = HfApi()
59
  api.add_space_secret(repo_id=space_id, key="MODAL_ENDPOINT", value=modal_endpoint)
60
  api.add_space_secret(repo_id=space_id, key="MODEL_SITE_API_KEY", value=api_key)
61
  api.add_space_secret(repo_id=space_id, key="SITE_PASSWORD", value=site_password)
 
11
 
12
  load_dotenv()
13
 
14
+ SPACE_TITLE = "posttraining-practice"
15
+
16
 
17
  def main():
 
18
  api_key = os.environ.get("MODEL_SITE_API_KEY")
19
  site_password = os.environ.get("SITE_PASSWORD")
20
  if not api_key or not site_password:
21
  sys.exit("ERROR: MODEL_SITE_API_KEY and SITE_PASSWORD must be set in .env")
22
 
23
+ api = HfApi()
24
+ user = api.whoami()["name"]
25
+ space_id = f"{user}/{SPACE_TITLE}"
26
+
27
  # Deploy Modal backend
28
  print("Deploying Modal backend...")
29
  result = subprocess.run(
 
38
  sys.exit("ERROR: Could not find Modal endpoint URL")
39
  modal_endpoint = match.group(0)
40
 
41
+ # Generate requirements.txt
 
42
  result = subprocess.run(
43
  ["uv", "export", "--group", "site", "--no-hashes", "--no-dev"],
44
  capture_output=True,
 
47
  with open("site/requirements.txt", "w") as f:
48
  f.write(result.stdout)
49
 
50
+ # Create/update HuggingFace Space
51
+ print(f"Deploying to HuggingFace Space {space_id}...")
52
+ api.create_repo(
53
+ repo_id=space_id,
54
+ repo_type="space",
55
+ space_sdk="gradio",
56
+ space_hardware="cpu-basic",
57
+ exist_ok=True,
58
+ )
59
+
60
+ api.upload_folder(
61
+ folder_path="site",
62
+ repo_id=space_id,
63
+ repo_type="space",
64
  )
65
  os.remove("site/requirements.txt")
66
 
67
  # Set secrets
68
+ print("Setting secrets...")
 
 
 
 
69
  api.add_space_secret(repo_id=space_id, key="MODAL_ENDPOINT", value=modal_endpoint)
70
  api.add_space_secret(repo_id=space_id, key="MODEL_SITE_API_KEY", value=api_key)
71
  api.add_space_secret(repo_id=space_id, key="SITE_PASSWORD", value=site_password)
models.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ MODEL_IDS = [
2
+ "sachiniyer/Qwen2.5-0.5B-DPO-Schwinn",
3
+ "sachiniyer/Qwen2.5-0.5B-PPO-Schwinn",
4
+ "sachiniyer/SmolLM2-DPO-Schwinn-SmolLM2-Base",
5
+ "sachiniyer/SmolLM2-DPO-Schwinn-gpt-5-mini-base",
6
+ "sachiniyer/SmolLM2-FT-SFT-Learning",
7
+ "sachiniyer/DeepSeek-R1-LoRA-Finetuned",
8
+ "sachiniyer/DeepSeek-R1-QLoRA-Finetuned",
9
+ ]