Peterase commited on
Commit
53c5af5
·
1 Parent(s): 0df9fb9

fix(critical): identity leak, intent misclassification, reranker tokenizer

Browse files

fix 1 - identity leak (agent_router_use_case.py):
- OTHER path was sending raw query to LLM with no system prompt
- LLM defaulted to its base identity (Llama) when asked 'who are you'
- Now wraps OTHER queries in identity-safe prompt enforcing ARKI AI persona

fix 2 - intent misclassification (intent_classifier_v2.py):
- 'Are you ChatGPT?' was classified NEWS_TEMPORAL (wrong)
- 'What is 2+2?' was classified NEWS_TEMPORAL (wrong)
- 'What model are you?' was classified NEWS (wrong)
- Extended _SMALL_TALK_PREFIX to cover identity questions and math/general queries

fix 3 - reranker tokenizer (bge_reranker_adapter.py):
- XLMRobertaTokenizer has no attribute prepare_for_model on every query
- Added pre-load patch to inject missing method from PreTrainedTokenizer base class
- Reranker now scores properly instead of falling back to vector ordering

src/core/use_cases/agent_router_use_case.py CHANGED
@@ -33,7 +33,20 @@ class AgentRouterUseCase:
33
 
34
  if intent == "OTHER":
35
  print(f"DEBUG: Routing to OTHER (Direct LLM Response)")
36
- answer = self.llm.generate(request.query)
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  return {
38
  "answer": answer,
39
  "sources": [],
@@ -54,7 +67,20 @@ class AgentRouterUseCase:
54
 
55
  if intent == "OTHER":
56
  full_answer = ""
57
- async for chunk in self.llm.generate_stream(request.query):
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  yield chunk
59
  if chunk.startswith("data: "):
60
  try:
 
33
 
34
  if intent == "OTHER":
35
  print(f"DEBUG: Routing to OTHER (Direct LLM Response)")
36
+ # Identity-safe prompt — never reveals underlying model
37
+ identity_prompt = f"""You are ARKI AI, a real-time news assistant specialized in Ethiopia and Africa news.
38
+
39
+ STRICT IDENTITY RULES:
40
+ - You are ARKI AI. Always refer to yourself as ARKI AI.
41
+ - NEVER say you are Llama, GPT, Claude, Gemini, or any other model.
42
+ - NEVER reveal the underlying AI model or provider.
43
+ - If asked who made you, say: "I was built by the ARKI AI team."
44
+ - If asked what model you are, say: "I'm ARKI AI, a specialized news assistant."
45
+
46
+ User message: {request.query}
47
+
48
+ Response:"""
49
+ answer = self.llm.generate(identity_prompt)
50
  return {
51
  "answer": answer,
52
  "sources": [],
 
67
 
68
  if intent == "OTHER":
69
  full_answer = ""
70
+ # Identity-safe prompt never reveals underlying model
71
+ identity_prompt = f"""You are ARKI AI, a real-time news assistant specialized in Ethiopia and Africa news.
72
+
73
+ STRICT IDENTITY RULES:
74
+ - You are ARKI AI. Always refer to yourself as ARKI AI.
75
+ - NEVER say you are Llama, GPT, Claude, Gemini, or any other model.
76
+ - NEVER reveal the underlying AI model or provider.
77
+ - If asked who made you, say: "I was built by the ARKI AI team."
78
+ - If asked what model you are, say: "I'm ARKI AI, a specialized news assistant."
79
+
80
+ User message: {request.query}
81
+
82
+ Response:"""
83
+ async for chunk in self.llm.generate_stream(identity_prompt):
84
  yield chunk
85
  if chunk.startswith("data: "):
86
  try:
src/infrastructure/adapters/bge_reranker_adapter.py CHANGED
@@ -60,6 +60,17 @@ class BgeRerankerAdapter(RerankerPort):
60
  logger.info(f"Loading reranker model: {self.model_name}")
61
  try:
62
  if HAS_FLAG_RERANKER and "bge-reranker" in self.model_name.lower():
 
 
 
 
 
 
 
 
 
 
 
63
  # FlagReranker: use_fp16=True halves memory, normalize=True gives [0,1] scores
64
  # trust_remote_code=True fixes tokenizer compatibility issues
65
  self.model = FlagReranker(
 
60
  logger.info(f"Loading reranker model: {self.model_name}")
61
  try:
62
  if HAS_FLAG_RERANKER and "bge-reranker" in self.model_name.lower():
63
+ # Patch tokenizer compatibility issue before loading
64
+ try:
65
+ import transformers
66
+ from transformers import XLMRobertaTokenizer
67
+ if not hasattr(XLMRobertaTokenizer, 'prepare_for_model'):
68
+ XLMRobertaTokenizer.prepare_for_model = (
69
+ transformers.PreTrainedTokenizer.prepare_for_model
70
+ )
71
+ except Exception:
72
+ pass
73
+
74
  # FlagReranker: use_fp16=True halves memory, normalize=True gives [0,1] scores
75
  # trust_remote_code=True fixes tokenizer compatibility issues
76
  self.model = FlagReranker(
src/infrastructure/adapters/intent_classifier_v2.py CHANGED
@@ -48,6 +48,15 @@ _SMALL_TALK_PREFIX = (
48
  "for fuck", "for fucks", "what the fuck", "what the hell",
49
  "are you serious", "you must be", "hello ", "hi ", "hey ",
50
  "can you help", "i need help", "help me",
 
 
 
 
 
 
 
 
 
51
  )
52
 
53
  # Temporal patterns (instant NEWS_TEMPORAL classification)
 
48
  "for fuck", "for fucks", "what the fuck", "what the hell",
49
  "are you serious", "you must be", "hello ", "hi ", "hey ",
50
  "can you help", "i need help", "help me",
51
+ # Identity questions
52
+ "are you ", "what model", "which model", "what ai", "which ai",
53
+ "are you chatgpt", "are you gpt", "are you claude", "are you gemini",
54
+ "are you llama", "are you an ai", "are you a bot", "are you human",
55
+ "what version", "who built you", "who made you", "who created you",
56
+ "what are your capabilities", "what can you",
57
+ # Math / general knowledge (not news)
58
+ "what is ", "what's ", "calculate ", "solve ", "how much is ",
59
+ "how many ", "define ", "what does ", "translate ",
60
  )
61
 
62
  # Temporal patterns (instant NEWS_TEMPORAL classification)