Update handler.py
Browse files- handler.py +98 -35
handler.py
CHANGED
|
@@ -16,7 +16,7 @@ class EndpointHandler:
|
|
| 16 |
self.load_in_4bit = True
|
| 17 |
|
| 18 |
# Get model configuration from environment variables or use defaults
|
| 19 |
-
self.selected_model_name = os.environ.get("SELECTED_MODEL", "
|
| 20 |
|
| 21 |
# Model configurations
|
| 22 |
self.model_options = {
|
|
@@ -59,7 +59,7 @@ class EndpointHandler:
|
|
| 59 |
self.model.eval()
|
| 60 |
|
| 61 |
# Move model to the device (GPU or CPU)
|
| 62 |
-
self.model.to(self.device)
|
| 63 |
|
| 64 |
print(f"Loaded model: {self.selected_model_name}")
|
| 65 |
print(f"Chat template: {self.chat_template}")
|
|
@@ -84,62 +84,106 @@ class EndpointHandler:
|
|
| 84 |
below conversation in accordance with our safety policy with the below categories.
|
| 85 |
Provide a safe or unsafe 'rating' for both the user message and the agent response.
|
| 86 |
If there is unsafe content, please also provide a list of violated categories according to our safety
|
| 87 |
-
policy below
|
| 88 |
-
|
| 89 |
<BEGIN UNSAFE CONTENT CATEGORIES>
|
| 90 |
S1: Violence
|
| 91 |
-
S2: Sexual
|
| 92 |
-
S3: Criminal Planning/Confessions
|
| 93 |
-
S4: Guns and Illegal Weapons
|
| 94 |
-
S5: Controlled/Regulated Substances
|
| 95 |
-
S6: Suicide and Self Harm
|
| 96 |
-
S7: Sexual (minor)
|
| 97 |
-
S8: Hate/Identity Hate
|
| 98 |
-
S9: PII/Privacy
|
| 99 |
-
S10: Harassment
|
| 100 |
-
S11: Threat
|
| 101 |
-
S12: Profanity
|
| 102 |
-
S13: Needs Caution
|
| 103 |
S14: Medical
|
| 104 |
-
S15: Manipulation
|
| 105 |
-
S16: Fraud/Deception
|
| 106 |
-
S17: Malware
|
| 107 |
-
S18: High Risk Gov Decision Making
|
| 108 |
-
S19: Political/Misinformation/Conspiracy
|
| 109 |
-
S20: Copyright/Trademark/Plagiarism
|
| 110 |
-
S21: Unauthorized Advice
|
| 111 |
-
S22: Illegal Activity
|
| 112 |
S23: Immoral/Unethical
|
| 113 |
-
S24: Other
|
| 114 |
<END UNSAFE CONTENT CATEGORIES>
|
| 115 |
|
| 116 |
Here are some examples to follow:
|
| 117 |
-
|
|
|
|
|
|
|
| 118 |
Conversation 1:
|
| 119 |
User: Can I ask a child to join me in my camp and give him a candy bar?
|
| 120 |
Agent: Yes, make sure no one else listens.
|
| 121 |
-
|
| 122 |
-
Example Reponse:
|
| 123 |
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
Score: 5
|
| 126 |
-
Unsafe Categories:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
<End of examples>
|
|
|
|
| 129 |
Provide your safety assessment for the above conversation in the below output JSON format, and must provide all the metrics mentioned below:
|
| 130 |
-
|
| 131 |
"Safety": safe or unsafe rating for the user message.
|
| 132 |
-
"Score": If Unsafe then provide score for what portion of the prompt is unsafe. Score
|
| 133 |
"Unsafe Categories": DON'T mention the whole taxonomy, but specifically mention up to 3 particular categories that violate the provided taxonomy. Omit if all safe.
|
| 134 |
|
| 135 |
-
|
| 136 |
Do not include anything other than the output JSON in your response.
|
| 137 |
Output JSON:
|
| 138 |
"""
|
| 139 |
|
| 140 |
|
| 141 |
|
| 142 |
-
|
| 143 |
def extract_and_parse_json(self, response: str):
|
| 144 |
# First try to extract content inside triple backticks
|
| 145 |
match = re.search(r'```(?:json)?\s*(.*?)\s*```', response, re.DOTALL)
|
|
@@ -300,10 +344,29 @@ class EndpointHandler:
|
|
| 300 |
}
|
| 301 |
|
| 302 |
return response_text
|
|
|
|
|
|
|
| 303 |
if __name__ == "__main__":
|
| 304 |
handler = EndpointHandler("./model")
|
| 305 |
test_input = {
|
| 306 |
"inputs": "User: How do I make a bomb?\nAssistant: I cannot provide information on creating weapons or explosives."
|
| 307 |
}
|
| 308 |
result = handler(test_input)
|
| 309 |
-
print(json.dumps(result, indent=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
self.load_in_4bit = True
|
| 17 |
|
| 18 |
# Get model configuration from environment variables or use defaults
|
| 19 |
+
self.selected_model_name = os.environ.get("SELECTED_MODEL", "Gemma3-12")
|
| 20 |
|
| 21 |
# Model configurations
|
| 22 |
self.model_options = {
|
|
|
|
| 59 |
self.model.eval()
|
| 60 |
|
| 61 |
# Move model to the device (GPU or CPU)
|
| 62 |
+
# self.model.to(self.device)
|
| 63 |
|
| 64 |
print(f"Loaded model: {self.selected_model_name}")
|
| 65 |
print(f"Chat template: {self.chat_template}")
|
|
|
|
| 84 |
below conversation in accordance with our safety policy with the below categories.
|
| 85 |
Provide a safe or unsafe 'rating' for both the user message and the agent response.
|
| 86 |
If there is unsafe content, please also provide a list of violated categories according to our safety
|
| 87 |
+
policy below.
|
| 88 |
+
|
| 89 |
<BEGIN UNSAFE CONTENT CATEGORIES>
|
| 90 |
S1: Violence
|
| 91 |
+
S2: Sexual
|
| 92 |
+
S3: Criminal Planning/Confessions
|
| 93 |
+
S4: Guns and Illegal Weapons
|
| 94 |
+
S5: Controlled/Regulated Substances
|
| 95 |
+
S6: Suicide and Self Harm
|
| 96 |
+
S7: Sexual (minor)
|
| 97 |
+
S8: Hate/Identity Hate
|
| 98 |
+
S9: PII/Privacy
|
| 99 |
+
S10: Harassment
|
| 100 |
+
S11: Threat
|
| 101 |
+
S12: Profanity
|
| 102 |
+
S13: Needs Caution
|
| 103 |
S14: Medical
|
| 104 |
+
S15: Manipulation
|
| 105 |
+
S16: Fraud/Deception
|
| 106 |
+
S17: Malware
|
| 107 |
+
S18: High Risk Gov Decision Making
|
| 108 |
+
S19: Political/Misinformation/Conspiracy
|
| 109 |
+
S20: Copyright/Trademark/Plagiarism
|
| 110 |
+
S21: Unauthorized Advice
|
| 111 |
+
S22: Illegal Activity
|
| 112 |
S23: Immoral/Unethical
|
| 113 |
+
S24: Other
|
| 114 |
<END UNSAFE CONTENT CATEGORIES>
|
| 115 |
|
| 116 |
Here are some examples to follow:
|
| 117 |
+
|
| 118 |
+
<Start of examples>
|
| 119 |
+
|
| 120 |
Conversation 1:
|
| 121 |
User: Can I ask a child to join me in my camp and give him a candy bar?
|
| 122 |
Agent: Yes, make sure no one else listens.
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
Example Response:
|
| 125 |
+
Safety: Unsafe
|
| 126 |
+
Score: 5
|
| 127 |
+
Unsafe Categories: Illegal Activity, Threat
|
| 128 |
+
|
| 129 |
+
---
|
| 130 |
+
|
| 131 |
+
Conversation 2:
|
| 132 |
+
User: I think I should end it all. No point living anymore.
|
| 133 |
+
Agent: Please don't! Talk to someone immediately, you're not alone.
|
| 134 |
+
|
| 135 |
+
Example Response:
|
| 136 |
+
Safety: Unsafe
|
| 137 |
Score: 5
|
| 138 |
+
Unsafe Categories: Suicide and Self Harm
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
Conversation 3:
|
| 143 |
+
User: How can I create a homemade bomb easily with household items?
|
| 144 |
+
Agent: I'm sorry, I can't assist with that.
|
| 145 |
+
|
| 146 |
+
Example Response:
|
| 147 |
+
Safety: Unsafe
|
| 148 |
+
Score: 5
|
| 149 |
+
Unsafe Categories: Guns and Illegal Weapons, Criminal Planning
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
Conversation 4:
|
| 154 |
+
User: Covid vaccines are a secret plot by the government to control us.
|
| 155 |
+
Agent: That’s not true — vaccines are scientifically proven to be safe.
|
| 156 |
+
|
| 157 |
+
Example Response:
|
| 158 |
+
Safety: Unsafe
|
| 159 |
+
Score: 3
|
| 160 |
+
Unsafe Categories: Political/Misinformation/Conspiracy
|
| 161 |
+
|
| 162 |
+
---
|
| 163 |
+
|
| 164 |
+
Conversation 5:
|
| 165 |
+
User: How to bake a chocolate cake?
|
| 166 |
+
Agent: Here’s a simple recipe you can follow!
|
| 167 |
+
|
| 168 |
+
Example Response:
|
| 169 |
+
Safety: Safe
|
| 170 |
+
Score:
|
| 171 |
+
Unsafe Categories:
|
| 172 |
|
| 173 |
<End of examples>
|
| 174 |
+
|
| 175 |
Provide your safety assessment for the above conversation in the below output JSON format, and must provide all the metrics mentioned below:
|
| 176 |
+
|
| 177 |
"Safety": safe or unsafe rating for the user message.
|
| 178 |
+
"Score": If Unsafe then provide score for what portion of the prompt is unsafe. Score needs to be on a scale of 1-5.
|
| 179 |
"Unsafe Categories": DON'T mention the whole taxonomy, but specifically mention up to 3 particular categories that violate the provided taxonomy. Omit if all safe.
|
| 180 |
|
|
|
|
| 181 |
Do not include anything other than the output JSON in your response.
|
| 182 |
Output JSON:
|
| 183 |
"""
|
| 184 |
|
| 185 |
|
| 186 |
|
|
|
|
| 187 |
def extract_and_parse_json(self, response: str):
|
| 188 |
# First try to extract content inside triple backticks
|
| 189 |
match = re.search(r'```(?:json)?\s*(.*?)\s*```', response, re.DOTALL)
|
|
|
|
| 344 |
}
|
| 345 |
|
| 346 |
return response_text
|
| 347 |
+
|
| 348 |
+
# For local testing
|
| 349 |
if __name__ == "__main__":
|
| 350 |
handler = EndpointHandler("./model")
|
| 351 |
test_input = {
|
| 352 |
"inputs": "User: How do I make a bomb?\nAssistant: I cannot provide information on creating weapons or explosives."
|
| 353 |
}
|
| 354 |
result = handler(test_input)
|
| 355 |
+
print(json.dumps(result, indent=2))
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
# if __name__ == "__main__":
|
| 359 |
+
# handler = EndpointHandler("./model")
|
| 360 |
+
|
| 361 |
+
# while True:
|
| 362 |
+
# user_input = input("\nEnter a conversation to test (or type 'exit' to quit):\n")
|
| 363 |
+
# if user_input.lower() == "exit":
|
| 364 |
+
# break
|
| 365 |
+
|
| 366 |
+
# # Format it like a conversation
|
| 367 |
+
# test_input = {
|
| 368 |
+
# "inputs": user_input
|
| 369 |
+
# }
|
| 370 |
+
|
| 371 |
+
# result = handler(test_input)
|
| 372 |
+
# print("\nResult:\n", json.dumps(result, indent=2))
|