GopalJee2745 commited on
Commit
cf19f11
·
0 Parent(s):

Prepare HF Docker deployment: add root app.py, Dockerfile, startup handling

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. Dockerfile +17 -0
  3. Readme.md +8 -0
  4. app.py +2 -0
  5. app/.DS_Store +0 -0
  6. app/__init__.py +0 -0
  7. app/__pycache__/__init__.cpython-310.pyc +0 -0
  8. app/__pycache__/__init__.cpython-311.pyc +0 -0
  9. app/__pycache__/__init__.cpython-313.pyc +0 -0
  10. app/__pycache__/main.cpython-310.pyc +0 -0
  11. app/__pycache__/main.cpython-311.pyc +0 -0
  12. app/__pycache__/main.cpython-313.pyc +0 -0
  13. app/app.py +49 -0
  14. app/config/.DS_Store +0 -0
  15. app/config/__init__.py +0 -0
  16. app/config/settings.py +0 -0
  17. app/data/.DS_Store +0 -0
  18. app/data/__init__.py +0 -0
  19. app/embeddings/__init__.py +0 -0
  20. app/embeddings/__pycache__/__init__.cpython-310.pyc +0 -0
  21. app/embeddings/__pycache__/__init__.cpython-311.pyc +0 -0
  22. app/embeddings/__pycache__/embedding_model.cpython-310.pyc +0 -0
  23. app/embeddings/__pycache__/embedding_model.cpython-311.pyc +0 -0
  24. app/embeddings/embedding_model.py +15 -0
  25. app/models/__init__.py +0 -0
  26. app/models/__pycache__/__init__.cpython-310.pyc +0 -0
  27. app/models/__pycache__/__init__.cpython-311.pyc +0 -0
  28. app/models/__pycache__/model1_classifier.cpython-310.pyc +0 -0
  29. app/models/__pycache__/model1_classifier.cpython-311.pyc +0 -0
  30. app/models/__pycache__/model2_risk.cpython-310.pyc +0 -0
  31. app/models/__pycache__/model2_risk.cpython-311.pyc +0 -0
  32. app/models/__pycache__/model3_action.cpython-310.pyc +0 -0
  33. app/models/__pycache__/model3_action.cpython-311.pyc +0 -0
  34. app/models/model1_classifier.py +112 -0
  35. app/models/model2_risk.py +143 -0
  36. app/models/model3_action.py +105 -0
  37. app/notebooks/__init__.py +0 -0
  38. app/pipelines/__init__.py +0 -0
  39. app/pipelines/__pycache__/__init__.cpython-310.pyc +0 -0
  40. app/pipelines/__pycache__/__init__.cpython-311.pyc +0 -0
  41. app/pipelines/__pycache__/security_pipeline.cpython-310.pyc +0 -0
  42. app/pipelines/__pycache__/security_pipeline.cpython-311.pyc +0 -0
  43. app/pipelines/security_pipeline.py +104 -0
  44. app/rag.py +0 -0
  45. app/rag/__init__.py +0 -0
  46. app/rag/__pycache__/__init__.cpython-310.pyc +0 -0
  47. app/rag/__pycache__/__init__.cpython-311.pyc +0 -0
  48. app/rag/__pycache__/knowledge_loader.cpython-310.pyc +0 -0
  49. app/rag/__pycache__/knowledge_loader.cpython-311.pyc +0 -0
  50. app/rag/__pycache__/rag_engine.cpython-310.pyc +0 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # set workdir
4
+ WORKDIR /code
5
+
6
+ # copy repo
7
+ COPY . /code
8
+
9
+ # upgrade pip then install
10
+ RUN pip install --upgrade pip
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # expose the HF required port
14
+ EXPOSE 7860
15
+
16
+ # run uvicorn pointing to the app object exposed at repo-root/app.py -> app.app:app would also work
17
+ CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
Readme.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Email Security AI
3
+ emoji: "🛡️"
4
+ colorFrom: "blue"
5
+ colorTo: "purple"
6
+ sdk: docker
7
+ app_port: 7860
8
+ ---
app.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # repo-root/app.py
2
+ from app.app import app
app/.DS_Store ADDED
Binary file (10.2 kB). View file
 
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (147 Bytes). View file
 
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (163 Bytes). View file
 
app/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (151 Bytes). View file
 
app/__pycache__/main.cpython-310.pyc ADDED
Binary file (1.72 kB). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (2.71 kB). View file
 
app/__pycache__/main.cpython-313.pyc ADDED
Binary file (2.38 kB). View file
 
app/app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/main.py
2
+ import logging
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+
6
+ # import the router (assumes app/routes/analyze.py exists)
7
+ from app.routes.analyze import router as analyze_router
8
+
9
+ # import pipeline to warm up singletons on startup
10
+ import app.pipelines.security_pipeline as security_pipeline
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger("email-security-ai")
14
+
15
+ app = FastAPI(
16
+ title="Email Security AI",
17
+ version="1.0",
18
+ description="Model-1 -> Model-2 -> RAG -> Model-3 email security pipeline"
19
+ )
20
+
21
+ # Allow frontend access during development; tighten in prod.
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"], # change to specific origins in production
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ app.include_router(analyze_router, prefix="")
31
+
32
+ @app.on_event("startup")
33
+ async def startup_event():
34
+ """
35
+ Ensure model singletons are created and log basic info.
36
+ Note: model initialization (importing security_pipeline) may trigger HF model downloads
37
+ on first run — that is expected.
38
+ """
39
+ logger.info("Starting Email Security AI — warming models (may take time on first run).")
40
+ try:
41
+ # Load the newly decoupled pipeline models to memory before serving
42
+ security_pipeline.load_models()
43
+ logger.info("Model singletons initialized and ready for inference.")
44
+ except Exception as e:
45
+ logger.exception("Error while warming models: %s", e)
46
+
47
+ @app.get("/health", tags=["health"])
48
+ def health():
49
+ return {"status": "ok", "ready": True}
app/config/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/config/__init__.py ADDED
File without changes
app/config/settings.py ADDED
File without changes
app/data/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/data/__init__.py ADDED
File without changes
app/embeddings/__init__.py ADDED
File without changes
app/embeddings/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (158 Bytes). View file
 
app/embeddings/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (174 Bytes). View file
 
app/embeddings/__pycache__/embedding_model.cpython-310.pyc ADDED
Binary file (734 Bytes). View file
 
app/embeddings/__pycache__/embedding_model.cpython-311.pyc ADDED
Binary file (953 Bytes). View file
 
app/embeddings/embedding_model.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_huggingface import HuggingFaceEmbeddings
2
+
3
+
4
+ class EmbeddingModel:
5
+
6
+ def __init__(self):
7
+
8
+ self.model_name = "sentence-transformers/all-MiniLM-L6-v2"
9
+
10
+ self.embedding = HuggingFaceEmbeddings(
11
+ model_name=self.model_name
12
+ )
13
+
14
+ def get(self):
15
+ return self.embedding
app/models/__init__.py ADDED
File without changes
app/models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
app/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (170 Bytes). View file
 
app/models/__pycache__/model1_classifier.cpython-310.pyc ADDED
Binary file (2.92 kB). View file
 
app/models/__pycache__/model1_classifier.cpython-311.pyc ADDED
Binary file (2.44 kB). View file
 
app/models/__pycache__/model2_risk.cpython-310.pyc ADDED
Binary file (3.32 kB). View file
 
app/models/__pycache__/model2_risk.cpython-311.pyc ADDED
Binary file (4.98 kB). View file
 
app/models/__pycache__/model3_action.cpython-310.pyc ADDED
Binary file (3.74 kB). View file
 
app/models/__pycache__/model3_action.cpython-311.pyc ADDED
Binary file (7.51 kB). View file
 
app/models/model1_classifier.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import torch
3
+ from langchain_core.runnables import RunnableLambda
4
+ from transformers import pipeline
5
+
6
+ CANDIDATE_LABELS = [
7
+ "phishing",
8
+ "scam",
9
+ "fraud",
10
+ "malware",
11
+ "spam",
12
+ "benign"
13
+ ]
14
+
15
+ class Model1Classifier:
16
+ def __init__(self):
17
+ self.model_name = "facebook/bart-large-mnli"
18
+ self.model_version = "model1-v1"
19
+
20
+ # Determine device
21
+ if torch.backends.mps.is_available():
22
+ device = "mps"
23
+ elif torch.cuda.is_available():
24
+ device = "cuda"
25
+ else:
26
+ device = "cpu"
27
+
28
+ # Load HuggingFace pipeline with zero-shot classification
29
+ self.hf_pipeline = pipeline(
30
+ "zero-shot-classification",
31
+ model=self.model_name,
32
+ device=device
33
+ )
34
+
35
+ def _run_inference(self, input_text: str) -> dict:
36
+ """Internal method called by the LangChain Runnable"""
37
+ # Run inference using softmax over all classes and a targeted hypothesis template
38
+ # `multi_label=False` forces the probabilities to sum to 1.0 (Softmax)
39
+ result = self.hf_pipeline(
40
+ input_text,
41
+ candidate_labels=CANDIDATE_LABELS,
42
+ multi_label=False,
43
+ hypothesis_template="This email is {}."
44
+ )
45
+
46
+ labels = result["labels"]
47
+ scores = result["scores"]
48
+
49
+ # --- Heuristic Overrides for extremely short scam messages ---
50
+ text_lower = input_text.lower()
51
+ if "won an iphone" in text_lower or (
52
+ "congratulations" in text_lower and ("won" in text_lower or "prize" in text_lower)
53
+ ):
54
+ target_phishing_prob = 0.65
55
+
56
+ try:
57
+ p_index = labels.index("phishing")
58
+ current_p_prob = scores[p_index]
59
+ except ValueError:
60
+ p_index = -1
61
+ current_p_prob = 0.0
62
+
63
+ if current_p_prob < target_phishing_prob:
64
+ remaining_target = 1.0 - target_phishing_prob
65
+ remaining_current = 1.0 - current_p_prob
66
+ scale_factor = (remaining_target / remaining_current) if remaining_current > 0 else 0
67
+
68
+ for i in range(len(scores)):
69
+ if labels[i] == "phishing":
70
+ scores[i] = target_phishing_prob
71
+ else:
72
+ scores[i] = scores[i] * scale_factor
73
+
74
+ # Re-sort lists post-override
75
+ sorted_pairs = sorted(zip(labels, scores), key=lambda x: x[1], reverse=True)
76
+ labels = [p[0] for p in sorted_pairs]
77
+ scores = [p[1] for p in sorted_pairs]
78
+
79
+ top_labels = []
80
+ for label, score in list(zip(labels, scores))[:5]:
81
+ top_labels.append({
82
+ "label": label,
83
+ "probability": round(float(score), 4)
84
+ })
85
+
86
+ return {"top_labels": top_labels}
87
+
88
+ def predict(self, payload):
89
+ start = time.perf_counter()
90
+
91
+ # Safely extract text fields
92
+ subject = getattr(payload, "subject", "") or ""
93
+
94
+ text = ""
95
+ if getattr(payload, "content", None):
96
+ text = getattr(payload.content, "text", "") or ""
97
+
98
+ # Construct input text string
99
+ input_text = f"{subject} {text}".strip()
100
+ if not input_text:
101
+ input_text = "empty message"
102
+
103
+ # Invoke inference explicitly without failing LCEL wrapper
104
+ result_dict = self._run_inference(input_text)
105
+
106
+ elapsed = int((time.perf_counter() - start) * 1000)
107
+
108
+ return {
109
+ "top_labels": result_dict["top_labels"],
110
+ "model_version": self.model_version,
111
+ "time_ms": elapsed
112
+ }
app/models/model2_risk.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import torch
3
+ from typing import Dict, Any
4
+ from langchain_core.runnables import RunnableLambda
5
+ from transformers import pipeline
6
+
7
+ class Model2RiskEngine:
8
+ def __init__(self):
9
+ self.model_name = "microsoft/deberta-v3-base"
10
+ self.model_version = "model2-v3"
11
+
12
+ if torch.backends.mps.is_available():
13
+ device = "mps"
14
+ elif torch.cuda.is_available():
15
+ device = "cuda"
16
+ else:
17
+ device = "cpu"
18
+
19
+ # Initialize the DeBERTa model for sequence classification to identify
20
+ # specific risk factors inside the email body
21
+ try:
22
+ self.analyzer = pipeline(
23
+ "zero-shot-classification",
24
+ model=self.model_name,
25
+ device=device
26
+ )
27
+ except Exception as e:
28
+ print(f"Could not load zero-shot pipeline for {self.model_name}: {e}. Falling back to default.")
29
+ self.analyzer = None
30
+
31
+ self.candidate_reasons = [
32
+ "Contains urgent financial request",
33
+ "Suspicious domain detected",
34
+ "Requests sensitive information"
35
+ ]
36
+
37
+ def _run_nlp_analysis(self, text: str) -> list:
38
+ reasons = []
39
+ if self.analyzer is not None and text.strip():
40
+ try:
41
+ result = self.analyzer(text, candidate_labels=self.candidate_reasons)
42
+ # Parse through probabilities
43
+ for label, score in zip(result['labels'], result['scores']):
44
+ if score > 0.4:
45
+ reasons.append(label)
46
+ except Exception as e:
47
+ print(f"Model 2 inference error: {e}")
48
+ return reasons
49
+
50
+ def analyze(self, payload: Any, model1_result: Dict) -> Dict[str, Any]:
51
+ start = time.perf_counter()
52
+
53
+ reasons = []
54
+ risk_score = 0
55
+
56
+ # Construct input text string
57
+ text = ""
58
+ subject = ""
59
+ if payload.content and payload.content.text:
60
+ text = payload.content.text.lower()
61
+ if payload.subject:
62
+ subject = payload.subject.lower()
63
+
64
+ full_text = f"{subject} {text}".strip()
65
+
66
+ # -------------------------------------------------------------------------
67
+ # 1. Model 1 Integration Score
68
+ # -------------------------------------------------------------------------
69
+ if model1_result and "top_labels" in model1_result and len(model1_result["top_labels"]) > 0:
70
+ top_label = model1_result["top_labels"][0]["label"]
71
+ top_prob = model1_result["top_labels"][0]["probability"]
72
+
73
+ if top_label == "benign":
74
+ # Inverse penalty if benign
75
+ risk_score = max(0, 20 * (1 - top_prob))
76
+ else:
77
+ risk_score = top_prob * 60
78
+ reasons.append(f"Message classified as potential {top_label}")
79
+
80
+ # -------------------------------------------------------------------------
81
+ # 2. DeBERTa NLP Analysis
82
+ # -------------------------------------------------------------------------
83
+ nlp_reasons = self._run_nlp_analysis(full_text)
84
+ for r in nlp_reasons:
85
+ if r not in reasons:
86
+ reasons.append(r)
87
+ risk_score += 10 # Bump risk for each NLP factor identified
88
+
89
+ # -------------------------------------------------------------------------
90
+ # 3. Structural Heuristics
91
+ # -------------------------------------------------------------------------
92
+ if payload.metadata and getattr(payload.metadata, "has_attachment", False):
93
+ risk_score += 15
94
+ reasons.append("Message contains attachment")
95
+
96
+ # Link Analysis
97
+ if payload.links:
98
+ for link in payload.links:
99
+ domain = (getattr(link, "domain", "") or "").lower()
100
+ for keyword in ["login", "verify", "secure", "update", "account"]:
101
+ if keyword in domain:
102
+ if "Suspicious verification-style domain detected" not in reasons:
103
+ risk_score += 15
104
+ reasons.append("Suspicious verification-style domain detected")
105
+ break
106
+
107
+ # Sender Mismatch
108
+ if payload.sender and getattr(payload.sender, "email", None) and getattr(payload.sender, "domain", None):
109
+ email_domain = payload.sender.email.split("@")[-1].lower()
110
+ declared_domain = payload.sender.domain.lower()
111
+ if email_domain != declared_domain:
112
+ risk_score += 15
113
+ reasons.append("Sender domain mismatch detected")
114
+
115
+ # -------------------------------------------------------------------------
116
+ # Clamp Risk Score and Classify Strategy
117
+ # -------------------------------------------------------------------------
118
+ risk_score = min(100, int(risk_score))
119
+
120
+ if risk_score >= 75:
121
+ risk_level = "high"
122
+ elif risk_score >= 50:
123
+ risk_level = "risky"
124
+ elif risk_score >= 25:
125
+ risk_level = "suspicious"
126
+ else:
127
+ risk_level = "safe"
128
+
129
+ elapsed = int((time.perf_counter() - start) * 1000)
130
+
131
+ # Ensure reasons list handles duplicates
132
+ unique_reasons = []
133
+ for r in reasons:
134
+ if r not in unique_reasons:
135
+ unique_reasons.append(r)
136
+
137
+ return {
138
+ "risk_score": risk_score,
139
+ "risk_level": risk_level,
140
+ "reasons": unique_reasons,
141
+ "model_version": self.model_version,
142
+ "time_ms": elapsed,
143
+ }
app/models/model3_action.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+ from typing import List, Dict, Any, Optional
4
+
5
+ from langchain_core.prompts import PromptTemplate
6
+ from langchain_core.runnables import RunnableLambda
7
+ from transformers import pipeline
8
+
9
+ logger = logging.getLogger("email-security-ai.model3")
10
+ logger.addHandler(logging.NullHandler())
11
+
12
+ class Model3ActionGenerator:
13
+ """
14
+ Generates user safety actions using google/flan-t5-base
15
+ via LangChain LCEL implementation
16
+ """
17
+ def __init__(self, model_name: str = "google/flan-t5-base", device: str = "cpu"):
18
+ self.model_name = model_name
19
+ self.model_version = "model3-v2"
20
+
21
+ try:
22
+ self.generator = pipeline(
23
+ task="text2text-generation",
24
+ model=self.model_name,
25
+ device=device
26
+ )
27
+ logger.info("Model3 loaded successfully: %s", self.model_name)
28
+ except Exception as e:
29
+ logger.exception("Failed to load Model3: %s", e)
30
+ self.generator = None
31
+
32
+ # Build prompt template
33
+ self.prompt = PromptTemplate.from_template(
34
+ "Based on a {risk_level} email risk with these reasons: {reasons_text}. "
35
+ "Relevant cybersecurity knowledge: {evidence_text}. "
36
+ "What should the user do? List 3 specific short actionable steps separated by commas."
37
+ )
38
+
39
+ def _run_generation(self, prompt_text: str) -> str:
40
+ if self.generator is None:
41
+ return ""
42
+ try:
43
+ result = self.generator(
44
+ prompt_text.text if hasattr(prompt_text, "text") else str(prompt_text),
45
+ max_new_tokens=60,
46
+ do_sample=False,
47
+ num_return_sequences=1,
48
+ repetition_penalty=1.2,
49
+ no_repeat_ngram_size=3
50
+ )
51
+ return result[0]['generated_text']
52
+ except Exception as e:
53
+ logger.exception("Model 3 generation failed: %s", e)
54
+ return ""
55
+
56
+ def _parse_output(self, generated_text: str) -> List[str]:
57
+ if not generated_text:
58
+ return []
59
+
60
+ # Split by both commas and periods to break up run-on and repeating sentences
61
+ raw_actions = [a.strip() for a in generated_text.replace(".", ",").split(",") if a.strip()]
62
+
63
+ # Deduplicate while preserving order
64
+ actions = []
65
+ for a in raw_actions:
66
+ if a.lower() not in [existing.lower() for existing in actions]:
67
+ actions.append(a)
68
+
69
+ return actions[:5]
70
+
71
+ def _fallback_actions(self) -> List[str]:
72
+ return [
73
+ "Do not click any links in the message.",
74
+ "Verify the sender through the official website.",
75
+ "Report the message as phishing.",
76
+ "Delete the suspicious message."
77
+ ]
78
+
79
+ def generate(self, risk_analysis: Dict[str, Any], rag_evidence: List[str]) -> Dict[str, Any]:
80
+ start = time.perf_counter()
81
+
82
+ risk_level = risk_analysis.get("risk_level", "unknown")
83
+ reasons = risk_analysis.get("reasons", [])
84
+
85
+ reasons_text = ", ".join(reasons) if reasons else "None"
86
+ evidence_text = " ".join(rag_evidence) if rag_evidence else "None"
87
+
88
+ # Format prompt using LangChain and execute
89
+ prompt_value = self.prompt.format(
90
+ risk_level=risk_level,
91
+ reasons_text=reasons_text,
92
+ evidence_text=evidence_text
93
+ )
94
+ actions = self._parse_output(self._run_generation(prompt_value))
95
+
96
+ if not actions:
97
+ actions = self._fallback_actions()
98
+
99
+ elapsed = int((time.perf_counter() - start) * 1000)
100
+
101
+ return {
102
+ "actions": actions,
103
+ "model_version": self.model_version,
104
+ "time_ms": elapsed
105
+ }
app/notebooks/__init__.py ADDED
File without changes
app/pipelines/__init__.py ADDED
File without changes
app/pipelines/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (157 Bytes). View file
 
app/pipelines/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (173 Bytes). View file
 
app/pipelines/__pycache__/security_pipeline.cpython-310.pyc ADDED
Binary file (1.69 kB). View file
 
app/pipelines/__pycache__/security_pipeline.cpython-311.pyc ADDED
Binary file (2.68 kB). View file
 
app/pipelines/security_pipeline.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/pipeline/security_pipeline.py
2
+ import time
3
+
4
+ from app.models.model1_classifier import Model1Classifier
5
+ from app.models.model2_risk import Model2RiskEngine
6
+ from app.models.model3_action import Model3ActionGenerator
7
+ from app.rag.rag_engine import RAGEngine
8
+
9
+ from app.schemas.response_schema import (
10
+ AnalyzeResponse,
11
+ Classification,
12
+ RiskAnalysis,
13
+ Metadata,
14
+ )
15
+
16
+ # -------- LAZY MODEL LOADING --------
17
+
18
+ model1 = None
19
+ model2 = None
20
+ model3 = None
21
+ rag_engine = None
22
+
23
+ def load_models():
24
+ global model1, model2, model3, rag_engine
25
+ if model1 is None:
26
+ import os
27
+ os.makedirs("data/vector_store", exist_ok=True)
28
+ model1 = Model1Classifier()
29
+ model2 = Model2RiskEngine()
30
+ model3 = Model3ActionGenerator()
31
+ rag_engine = RAGEngine()
32
+ # Optionally ensure index is loaded
33
+ if hasattr(rag_engine, "rebuild_index"):
34
+ rag_engine.rebuild_index()
35
+
36
+
37
+ def analyze(payload):
38
+ load_models()
39
+ start_total = time.perf_counter()
40
+
41
+ # -------------------------------------------------------------
42
+ # 1. RUN MODEL 1 (SPAM / INTENT CLASSIFICATION)
43
+ # -------------------------------------------------------------
44
+ model1_result = model1.predict(payload)
45
+
46
+ # -------------------------------------------------------------
47
+ # 2. RUN MODEL 2 (RISK ENGINE)
48
+ # -------------------------------------------------------------
49
+ model2_result = model2.analyze(
50
+ payload,
51
+ model1_result
52
+ )
53
+
54
+ # -------------------------------------------------------------
55
+ # 3. RAG RETRIEVAL (EVIDENCE GATHERING)
56
+ # -------------------------------------------------------------
57
+ text_content = payload.content.text if payload.content and payload.content.text else ""
58
+ subject = payload.subject if payload.subject else ""
59
+ query = f"{subject} {text_content}".strip()
60
+ rag_evidence = rag_engine.get_evidence(query) if getattr(rag_engine, "get_evidence", None) else []
61
+
62
+ # -------------------------------------------------------------
63
+ # 4. RUN MODEL 3 (ACTION GENERATION)
64
+ # -------------------------------------------------------------
65
+ model3_result = model3.generate(
66
+ {"risk_level": model2_result.get("risk_level", "unknown"), "reasons": model2_result.get("reasons", [])},
67
+ rag_evidence
68
+ )
69
+ recommended_actions = model3_result.get("actions", [])
70
+
71
+ # -------------------------------------------------------------
72
+ # TOTAL LATENCY METRICS
73
+ # -------------------------------------------------------------
74
+ total_time = int((time.perf_counter() - start_total) * 1000)
75
+
76
+ metadata = Metadata(
77
+ processing_time_ms=total_time,
78
+ models={
79
+ "model1": "facebook/bart-large-mnli",
80
+ "model2": "microsoft/deberta-v3-base",
81
+ "model3": "google/flan-t5-base"
82
+ }
83
+ )
84
+
85
+ # -------------------------------------------------------------
86
+ # BUILD RESPONSE
87
+ # -------------------------------------------------------------
88
+ response = AnalyzeResponse(
89
+ classification=Classification(
90
+ top_labels=model1_result["top_labels"],
91
+ model_version=model1_result["model_version"]
92
+ ),
93
+ risk_analysis=RiskAnalysis(
94
+ risk_score=model2_result["risk_score"],
95
+ risk_level=model2_result["risk_level"],
96
+ reasons=model2_result["reasons"],
97
+ model_version=model2_result["model_version"]
98
+ ),
99
+ recommended_actions=recommended_actions,
100
+ rag_evidence=rag_evidence,
101
+ metadata=metadata
102
+ )
103
+
104
+ return response
app/rag.py ADDED
File without changes
app/rag/__init__.py ADDED
File without changes
app/rag/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (151 Bytes). View file
 
app/rag/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (167 Bytes). View file
 
app/rag/__pycache__/knowledge_loader.cpython-310.pyc ADDED
Binary file (742 Bytes). View file
 
app/rag/__pycache__/knowledge_loader.cpython-311.pyc ADDED
Binary file (1.19 kB). View file
 
app/rag/__pycache__/rag_engine.cpython-310.pyc ADDED
Binary file (2.95 kB). View file