sachiniyer commited on
Commit
b9a427c
·
verified ·
1 Parent(s): 33c92b9

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. __pycache__/backend.cpython-312.pyc +0 -0
  2. app.py +45 -41
  3. backend.py +84 -0
  4. deploy.py +67 -0
__pycache__/backend.cpython-312.pyc ADDED
Binary file (4.03 kB). View file
 
app.py CHANGED
@@ -1,6 +1,7 @@
 
 
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  MODEL_IDS = [
6
  "sachiniyer/SmolLM2-DPO-Schwinn-SmolLM2-Base",
@@ -11,48 +12,42 @@ MODEL_IDS = [
11
  "sachiniyer/DeepSeek-R1-QLoRA-Finetuned",
12
  ]
13
 
14
- # Load all models
15
- models = {}
16
- for model_id in MODEL_IDS:
17
- print(f"Loading model: {model_id}")
18
- tokenizer = AutoTokenizer.from_pretrained(model_id)
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_id,
21
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
22
- device_map="auto" if torch.cuda.is_available() else None,
23
- )
24
- models[model_id] = {"model": model, "tokenizer": tokenizer}
25
- print(f"Loaded: {model_id}")
26
 
27
 
28
  def make_respond_fn(model_id: str):
29
  def respond(message: str, history: list[tuple[str, str]]) -> str:
30
- tokenizer = models[model_id]["tokenizer"]
31
- model = models[model_id]["model"]
32
-
33
- # Build conversation from history
34
- conversation = ""
35
- for user_msg, assistant_msg in history:
36
- conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
37
- conversation += f"User: {message}\nAssistant:"
38
-
39
- inputs = tokenizer(conversation, return_tensors="pt")
40
- if torch.cuda.is_available():
41
- inputs = inputs.to("cuda")
42
-
43
- outputs = model.generate(
44
- **inputs,
45
- max_new_tokens=256,
46
- do_sample=True,
47
- temperature=0.7,
48
- top_p=0.9,
49
- pad_token_id=tokenizer.eos_token_id,
50
- )
51
-
52
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
53
- # Extract only the new assistant response
54
- response = response.split("Assistant:")[-1].strip()
55
- return response
56
 
57
  return respond
58
 
@@ -62,6 +57,10 @@ with gr.Blocks(title="posttraining-practice") as demo:
62
  gr.Markdown("# posttraining-practice")
63
  gr.Markdown("Chat with different fine-tuned models")
64
 
 
 
 
 
65
  with gr.Tabs():
66
  for model_id in MODEL_IDS:
67
  short_name = model_id.split("/")[-1]
@@ -71,5 +70,10 @@ with gr.Blocks(title="posttraining-practice") as demo:
71
  description=f"Chatting with: {model_id}",
72
  )
73
 
 
 
 
 
74
  if __name__ == "__main__":
75
- demo.launch()
 
 
1
+ import os
2
+
3
  import gradio as gr
4
+ import requests
 
5
 
6
  MODEL_IDS = [
7
  "sachiniyer/SmolLM2-DPO-Schwinn-SmolLM2-Base",
 
12
  "sachiniyer/DeepSeek-R1-QLoRA-Finetuned",
13
  ]
14
 
15
+ # Modal endpoint URL - set this after deploying backend.py
16
+ MODAL_ENDPOINT = os.environ.get("MODAL_ENDPOINT", "")
17
+ # API key for authenticating with Modal backend
18
+ MODEL_SITE_API_KEY = os.environ.get("MODEL_SITE_API_KEY", "")
19
+ # Password for Gradio login (any username accepted)
20
+ SITE_PASSWORD = os.environ.get("SITE_PASSWORD", "")
 
 
 
 
 
 
21
 
22
 
23
  def make_respond_fn(model_id: str):
24
  def respond(message: str, history: list[tuple[str, str]]) -> str:
25
+ if not MODAL_ENDPOINT:
26
+ return "Error: MODAL_ENDPOINT environment variable not set"
27
+
28
+ try:
29
+ response = requests.post(
30
+ MODAL_ENDPOINT,
31
+ headers={"X-API-Key": MODEL_SITE_API_KEY},
32
+ json={
33
+ "model_id": model_id,
34
+ "message": message,
35
+ "history": history,
36
+ },
37
+ timeout=120, # Cold start can take a while
38
+ )
39
+ response.raise_for_status()
40
+ data = response.json()
41
+
42
+ if "error" in data:
43
+ return f"Error: {data['error']}"
44
+
45
+ return data.get("response", "No response received")
46
+
47
+ except requests.exceptions.Timeout:
48
+ return "Error: Request timed out. The model may be starting up, please try again."
49
+ except requests.exceptions.RequestException as e:
50
+ return f"Error: {e}"
51
 
52
  return respond
53
 
 
57
  gr.Markdown("# posttraining-practice")
58
  gr.Markdown("Chat with different fine-tuned models")
59
 
60
+ missing = [v for v in ["MODAL_ENDPOINT", "MODEL_SITE_API_KEY", "SITE_PASSWORD"] if not os.environ.get(v)]
61
+ if missing:
62
+ gr.Markdown(f"⚠️ **Warning:** Missing secrets: {', '.join(missing)}")
63
+
64
  with gr.Tabs():
65
  for model_id in MODEL_IDS:
66
  short_name = model_id.split("/")[-1]
 
70
  description=f"Chatting with: {model_id}",
71
  )
72
 
73
+ def check_password(username: str, password: str) -> bool:
74
+ return password == SITE_PASSWORD
75
+
76
+
77
  if __name__ == "__main__":
78
+ auth = check_password if SITE_PASSWORD else None
79
+ demo.launch(auth=auth)
backend.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import modal
4
+ from fastapi import Header
5
+
6
+ MODEL_IDS = [
7
+ "sachiniyer/SmolLM2-DPO-Schwinn-SmolLM2-Base",
8
+ "sachiniyer/SmolLM2-DPO-Schwinn-gpt-5-mini-base",
9
+ "sachiniyer/Qwen2.5-0.5B-DPO-Schwinn",
10
+ "sachiniyer/SmolLM2-FT-SFT-Learning",
11
+ "sachiniyer/DeepSeek-R1-LoRA-Finetuned",
12
+ "sachiniyer/DeepSeek-R1-QLoRA-Finetuned",
13
+ ]
14
+
15
+ image = (
16
+ modal.Image.debian_slim(python_version="3.12")
17
+ .pip_install("torch", "transformers", "accelerate", "fastapi")
18
+ )
19
+
20
+ app = modal.App("posttraining-chat", image=image)
21
+
22
+
23
+ @app.cls(
24
+ gpu="T4",
25
+ scaledown_window=60,
26
+ secrets=[modal.Secret.from_dotenv()],
27
+ )
28
+ class Inference:
29
+ @modal.enter()
30
+ def load_models(self):
31
+ import torch
32
+ from transformers import AutoModelForCausalLM, AutoTokenizer
33
+
34
+ self.models = {}
35
+ for model_id in MODEL_IDS:
36
+ print(f"Loading model: {model_id}")
37
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
38
+ model = AutoModelForCausalLM.from_pretrained(
39
+ model_id,
40
+ torch_dtype=torch.float16,
41
+ device_map="auto",
42
+ )
43
+ self.models[model_id] = {"model": model, "tokenizer": tokenizer}
44
+ print(f"Loaded: {model_id}")
45
+
46
+ @modal.fastapi_endpoint(method="POST")
47
+ def generate(self, request: dict, x_api_key: str | None = Header(None)) -> dict:
48
+ import torch
49
+
50
+ expected_key = os.environ.get("MODEL_SITE_API_KEY")
51
+ if not expected_key or x_api_key != expected_key:
52
+ return {"error": "Unauthorized - invalid API key"}
53
+
54
+ model_id = request.get("model_id", MODEL_IDS[0])
55
+ message = request.get("message", "")
56
+ history = request.get("history", [])
57
+
58
+ if model_id not in self.models:
59
+ return {"error": f"Model {model_id} not found"}
60
+
61
+ tokenizer = self.models[model_id]["tokenizer"]
62
+ model = self.models[model_id]["model"]
63
+
64
+ conversation = ""
65
+ for user_msg, assistant_msg in history:
66
+ conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
67
+ conversation += f"User: {message}\nAssistant:"
68
+
69
+ inputs = tokenizer(conversation, return_tensors="pt").to("cuda")
70
+
71
+ with torch.no_grad():
72
+ outputs = model.generate(
73
+ **inputs,
74
+ max_new_tokens=256,
75
+ do_sample=True,
76
+ temperature=0.7,
77
+ top_p=0.9,
78
+ pad_token_id=tokenizer.eos_token_id,
79
+ )
80
+
81
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
82
+ response = response.split("Assistant:")[-1].strip()
83
+
84
+ return {"response": response}
deploy.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Deploy the chat site: Modal backend + HuggingFace Space + secrets."""
3
+
4
+ import os
5
+ import re
6
+ import subprocess
7
+ import sys
8
+
9
+ from dotenv import load_dotenv
10
+ from huggingface_hub import HfApi
11
+
12
+ load_dotenv()
13
+
14
+
15
+ def main():
16
+ # Check required env vars
17
+ api_key = os.environ.get("MODEL_SITE_API_KEY")
18
+ site_password = os.environ.get("SITE_PASSWORD")
19
+ if not api_key or not site_password:
20
+ sys.exit("ERROR: MODEL_SITE_API_KEY and SITE_PASSWORD must be set in .env")
21
+
22
+ # Deploy Modal backend
23
+ print("Deploying Modal backend...")
24
+ result = subprocess.run(
25
+ ["uv", "run", "modal", "deploy", "site/backend.py"],
26
+ capture_output=True,
27
+ text=True,
28
+ )
29
+ print(result.stdout + result.stderr)
30
+
31
+ match = re.search(r"https://[^\s]+\.modal\.run", result.stdout + result.stderr)
32
+ if not match:
33
+ sys.exit("ERROR: Could not find Modal endpoint URL")
34
+ modal_endpoint = match.group(0)
35
+
36
+ # Generate requirements and deploy to HuggingFace
37
+ print("Deploying to HuggingFace Spaces (select 'cpu-basic')...")
38
+ result = subprocess.run(
39
+ ["uv", "export", "--group", "site", "--no-hashes", "--no-dev"],
40
+ capture_output=True,
41
+ text=True,
42
+ )
43
+ with open("site/requirements.txt", "w") as f:
44
+ f.write(result.stdout)
45
+
46
+ subprocess.run(
47
+ ["uv", "run", "--group", "site", "gradio", "deploy",
48
+ "--title", "posttraining-practice", "--app-file", "app.py"],
49
+ cwd="site",
50
+ )
51
+ os.remove("site/requirements.txt")
52
+
53
+ # Set secrets
54
+ space_id = input("Space ID (e.g., sachiniyer/posttraining-practice): ").strip()
55
+ if not space_id:
56
+ sys.exit("ERROR: Space ID required")
57
+
58
+ api = HfApi()
59
+ api.add_space_secret(repo_id=space_id, key="MODAL_ENDPOINT", value=modal_endpoint)
60
+ api.add_space_secret(repo_id=space_id, key="MODEL_SITE_API_KEY", value=api_key)
61
+ api.add_space_secret(repo_id=space_id, key="SITE_PASSWORD", value=site_password)
62
+
63
+ print(f"Done! https://huggingface.co/spaces/{space_id}")
64
+
65
+
66
+ if __name__ == "__main__":
67
+ main()