yuhueng commited on
Commit
6eef698
·
verified ·
1 Parent(s): aff1f83

feat: Added LionGuard v1 for safety detector

Browse files
Files changed (1) hide show
  1. app.py +31 -3
app.py CHANGED
@@ -2,8 +2,10 @@ import spaces
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
  import torch
5
-
6
- # torch.manual_seed(42)
 
 
7
 
8
  MODEL_ID = "yuhueng/qwen3-4b-singlish-base" # replace with your model
9
 
@@ -13,6 +15,31 @@ model = AutoModelForCausalLM.from_pretrained(
13
  torch_dtype=torch.float16,
14
  )
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  @spaces.GPU(duration=120)
17
  def inference(prompt: str, max_tokens: int = 256) -> str:
18
  model.to("cuda") # Move to GPU inside decorated function
@@ -40,7 +67,8 @@ def inference(prompt: str, max_tokens: int = 256) -> str:
40
  outputs[0][inputs["input_ids"].shape[1]:],
41
  skip_special_tokens=True
42
  )
43
- return response
 
44
 
45
  # # Use TextIteratorStreamer instead of TextStreamer
46
  # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
  import torch
5
+ import onnxruntime as ort
6
+ import numpy as np
7
+ from sentence_transformers import SentenceTransformer
8
+ from huggingface_hub import hf_hub_download
9
 
10
  MODEL_ID = "yuhueng/qwen3-4b-singlish-base" # replace with your model
11
 
 
15
  torch_dtype=torch.float16,
16
  )
17
 
18
+ # --- 1. Configuration ---
19
+ REPO_ID = "govtech/lionguard-v1"
20
+ EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
21
+ FILENAME = "models/lionguard-binary.onnx"
22
+
23
+ # --- 2. Load Models ---
24
+ embedder = SentenceTransformer(EMBEDDING_MODEL)
25
+
26
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
27
+ session = ort.InferenceSession(model_path)
28
+
29
+ # --- 3. The Inference Logic ---
30
+ def check_safety(text):
31
+ # Generate embedding (Normalize is important for BGE models)
32
+ embedding = embedder.encode([text], normalize_embeddings=True)
33
+
34
+ # Prepare input for ONNX
35
+ input_name = session.get_inputs()[0].name
36
+
37
+ # Run prediction
38
+ pred = session.run(None, {input_name: embedding.astype(np.float32)})[0]
39
+
40
+ return "Unsafe" if pred[0] == 1 else "Safe"
41
+
42
+
43
  @spaces.GPU(duration=120)
44
  def inference(prompt: str, max_tokens: int = 256) -> str:
45
  model.to("cuda") # Move to GPU inside decorated function
 
67
  outputs[0][inputs["input_ids"].shape[1]:],
68
  skip_special_tokens=True
69
  )
70
+ safety = check_safety(response)
71
+ return response, safety
72
 
73
  # # Use TextIteratorStreamer instead of TextStreamer
74
  # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)