File size: 2,742 Bytes
ec97d4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# [Use the ethics framework from ethical-rag-starter.py]
# Or minimal version:

from dataclasses import dataclass
from typing import List, Dict
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

@dataclass
class EthicsCheckResult:
    passed: bool
    score: float
    reasoning: str
    recommendations: List[str]

class AIEthicsFramework:
    BLOCKED_DOMAINS = ['medical_diagnosis_unsupervised', 'legal_judgment', 'hiring_decisions']
    
    def __init__(self):
        self.audit_log = []
    
    def validate_query(self, query: str) -> Dict:
        """Check if query is ethically acceptable"""
        pii_keywords = ['ssn', 'password', 'credit card']
        unsafe_words = ['hack', 'exploit', 'weaponize']
        
        has_pii = any(kw in query.lower() for kw in pii_keywords)
        is_unsafe = any(w in query.lower() for w in unsafe_words)
        
        is_allowed = not (has_pii or is_unsafe)
        reason = ""
        if has_pii:
            reason = "Query requests PII"
        elif is_unsafe:
            reason = "Query seeks harmful information"
        
        return {
            'is_allowed': is_allowed,
            'reason': reason or 'Query approved',
            'details': {'pii_check': has_pii, 'safety_check': is_unsafe}
        }
    
    def validate_response(self, response: str) -> EthicsCheckResult:
        """Validate generated response"""
        quality = len(response.split()) / 20  # Simple quality metric
        quality = min(quality, 1.0)
        
        return EthicsCheckResult(
            passed=quality > 0.3,
            score=quality,
            reasoning="Response quality acceptable" if quality > 0.3 else "Response too brief",
            recommendations=[]
        )

def initialize_llm(model_name: str):
    """Load and initialize LLM"""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        load_in_8bit=True  # For memory efficiency
    )
    
    class SimpleLLM:
        def __init__(self, model, tokenizer):
            self.model = model
            self.tokenizer = tokenizer
        
        def generate(self, prompt: str, max_tokens: int = 300):
            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=max_tokens,
                    temperature=0.7,
                    top_p=0.9
                )
            return self.tokenizer.decode(outputs, skip_special_tokens=True)
    
    return SimpleLLM(model, tokenizer)