Spaces:

Abdalkaderdev
/

ORA

Sleeping

App Files Files Community

ORA / app /services /guardrails.py

Abdalkaderdev

Initial ORA deployment

5e0532d 8 days ago

raw

history blame contribute delete

2.11 kB

	from pydantic import BaseModel
	from typing import List, Optional
	from app.services.audit import audit_service


	class GuardrailViolation(Exception):
	pass

	class GuardrailService:
	def __init__(self):
	self.prohibited_phrases = [
	"God told me that you",
	"I prophesy",
	"You must do this",
	"The Lord is saying right now",
	"I declare over you",
	"Scripture commands you to",
	"Thus saith the Lord",
	"God fails you if",
	"You are sinning by"
	]

	async def validate_response(self, content: str, user_id: str = "system") -> str:
	"""
	Ensures ORA does not claim divine authority or give dangerous advice.
	"""
	for phrase in self.prohibited_phrases:
	if phrase.lower() in content.lower():
	# Log the violation
	await audit_service.log_violation(
	user_id=user_id,
	violation_type="Prohibited Phrase",
	content=content
	)
	# Block the output
	raise GuardrailViolation(f"Response violated safety guardrail: '{phrase}'")

	return content

	async def check_input_safety(self, message: str, user_id: str = "anonymous") -> bool:
	"""
	Checks for self-harm or crisis keywords.
	"""
	crisis_keywords = ["kill myself", "end it all", "suicide", "hurt myself"]
	if any(keyword in message.lower() for keyword in crisis_keywords):
	await audit_service.log_violation(
	user_id=user_id,
	violation_type="Crisis Keyword",
	content=message
	)
	return False
	return True

	async def sanitize_content(self, content: str) -> str:
	"""
	Sanitizes content by replacing restricted words (placeholders for now).
	"""
	# Example: Simple redaction if we had a list of 'warning words' that aren't strict blocks
	return content

	guardrail_service = GuardrailService()