Spaces:
Running
fix(3-bugs): Jina 401, reranker tokenizer, intent classifier v1 still running
Browse filesFix 1 - Jina 401 Unauthorized (jina_reader_adapter.py):
- Jina now requires API key for most news sites
- Added Authorization: Bearer header when JINA_API_KEY is set
- Added JINA_API_KEY to config.py and .env template
- Logs warning if no key set (get free key at jina.ai, 1M tokens/month)
Fix 2 - Reranker tokenizer (bge_reranker_adapter.py):
- Error changed from prepare_for_model to build_inputs_with_special_tokens
- Now patches ALL potentially missing XLMRobertaTokenizer methods:
prepare_for_model, build_inputs_with_special_tokens,
create_token_type_ids_from_sequences, get_special_tokens_mask
- Iterates over method list instead of single hard-coded patch
Fix 3 - Intent classifier v1 still running (agent_router_use_case.py):
- AgentRouterUseCase was using old v1 binary classifier
- Now uses v2 LLM-powered classifier (Groq 8B -> Gemini -> OpenRouter -> HF)
- Falls back to v1 if v2 fails
- Replaced print(DEBUG) with logger.info([Router] prefix)
|
@@ -113,3 +113,8 @@ SEARXNG_MAX_RESULTS=10
|
|
| 113 |
# Get free key: https://openrouter.ai/keys (no credit card required)
|
| 114 |
# Free models: Llama 4, Qwen 3, DeepSeek, Gemma 3 and more
|
| 115 |
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
# Get free key: https://openrouter.ai/keys (no credit card required)
|
| 114 |
# Free models: Llama 4, Qwen 3, DeepSeek, Gemma 3 and more
|
| 115 |
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
| 116 |
+
|
| 117 |
+
# --- Jina AI Reader (Full Article Extraction) ---
|
| 118 |
+
# Get free key: https://jina.ai (1M tokens/month free)
|
| 119 |
+
# Without key: most news sites return 401 Unauthorized
|
| 120 |
+
JINA_API_KEY=your-jina-api-key-here
|
|
@@ -86,6 +86,7 @@ class Settings(BaseSettings):
|
|
| 86 |
ENABLE_JINA_READER: bool = os.getenv("ENABLE_JINA_READER", "true").lower() == "true"
|
| 87 |
JINA_READER_TIMEOUT: float = float(os.getenv("JINA_READER_TIMEOUT", "8.0"))
|
| 88 |
JINA_READER_MAX_CONCURRENT: int = int(os.getenv("JINA_READER_MAX_CONCURRENT", "5"))
|
|
|
|
| 89 |
|
| 90 |
# Cache Settings (TTL in seconds)
|
| 91 |
CACHE_RESPONSE_TTL: int = int(os.getenv("CACHE_RESPONSE_TTL", "300")) # 5 minutes
|
|
|
|
| 86 |
ENABLE_JINA_READER: bool = os.getenv("ENABLE_JINA_READER", "true").lower() == "true"
|
| 87 |
JINA_READER_TIMEOUT: float = float(os.getenv("JINA_READER_TIMEOUT", "8.0"))
|
| 88 |
JINA_READER_MAX_CONCURRENT: int = int(os.getenv("JINA_READER_MAX_CONCURRENT", "5"))
|
| 89 |
+
JINA_API_KEY: str = os.getenv("JINA_API_KEY", "") # Get free key at https://jina.ai
|
| 90 |
|
| 91 |
# Cache Settings (TTL in seconds)
|
| 92 |
CACHE_RESPONSE_TTL: int = int(os.getenv("CACHE_RESPONSE_TTL", "300")) # 5 minutes
|
|
@@ -63,9 +63,21 @@ class AgentRouterUseCase:
|
|
| 63 |
self.chat_history_db = chat_history_db
|
| 64 |
|
| 65 |
def _classify_intent(self, query: str) -> str:
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
def _detect_language(self, query: str) -> str:
|
| 71 |
"""
|
|
|
|
| 63 |
self.chat_history_db = chat_history_db
|
| 64 |
|
| 65 |
def _classify_intent(self, query: str) -> str:
|
| 66 |
+
"""Use v2 LLM-powered classifier (Groq 8B β Gemini β OpenRouter β HF β default)"""
|
| 67 |
+
try:
|
| 68 |
+
from src.infrastructure.adapters.intent_classifier_v2 import intent_classifier_v2
|
| 69 |
+
result = intent_classifier_v2.classify(query)
|
| 70 |
+
intent = "OTHER" if result.intent == "OTHER" else "NEWS"
|
| 71 |
+
logger.info(
|
| 72 |
+
f"[Router] Intent={result.intent} conf={result.confidence:.2f} "
|
| 73 |
+
f"method={result.method} time={result.inference_time_ms:.1f}ms"
|
| 74 |
+
)
|
| 75 |
+
return intent
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.warning(f"[Router] v2 classifier failed: {e} β falling back to v1")
|
| 78 |
+
intent = intent_classifier.classify(query)
|
| 79 |
+
logger.info(f"[Router] Intent v1={intent} for query: '{query[:60]}'")
|
| 80 |
+
return intent
|
| 81 |
|
| 82 |
def _detect_language(self, query: str) -> str:
|
| 83 |
"""
|
|
@@ -60,16 +60,24 @@ class BgeRerankerAdapter(RerankerPort):
|
|
| 60 |
logger.info(f"Loading reranker model: {self.model_name}")
|
| 61 |
try:
|
| 62 |
if HAS_FLAG_RERANKER and "bge-reranker" in self.model_name.lower():
|
| 63 |
-
# Patch tokenizer compatibility
|
| 64 |
try:
|
| 65 |
import transformers
|
| 66 |
-
from transformers import XLMRobertaTokenizer
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
# FlagReranker: use_fp16=True halves memory, normalize=True gives [0,1] scores
|
| 75 |
# trust_remote_code=True fixes tokenizer compatibility issues
|
|
|
|
| 60 |
logger.info(f"Loading reranker model: {self.model_name}")
|
| 61 |
try:
|
| 62 |
if HAS_FLAG_RERANKER and "bge-reranker" in self.model_name.lower():
|
| 63 |
+
# Patch tokenizer compatibility issues before loading
|
| 64 |
try:
|
| 65 |
import transformers
|
| 66 |
+
from transformers import XLMRobertaTokenizer, PreTrainedTokenizer
|
| 67 |
+
# Patch all missing methods that different transformers versions may lack
|
| 68 |
+
for method_name in [
|
| 69 |
+
"prepare_for_model",
|
| 70 |
+
"build_inputs_with_special_tokens",
|
| 71 |
+
"create_token_type_ids_from_sequences",
|
| 72 |
+
"get_special_tokens_mask",
|
| 73 |
+
]:
|
| 74 |
+
if not hasattr(XLMRobertaTokenizer, method_name):
|
| 75 |
+
base_method = getattr(PreTrainedTokenizer, method_name, None)
|
| 76 |
+
if base_method:
|
| 77 |
+
setattr(XLMRobertaTokenizer, method_name, base_method)
|
| 78 |
+
logger.debug(f"Patched XLMRobertaTokenizer.{method_name}")
|
| 79 |
+
except Exception as patch_err:
|
| 80 |
+
logger.debug(f"Tokenizer patch skipped: {patch_err}")
|
| 81 |
|
| 82 |
# FlagReranker: use_fp16=True halves memory, normalize=True gives [0,1] scores
|
| 83 |
# trust_remote_code=True fixes tokenizer compatibility issues
|
|
@@ -58,14 +58,28 @@ class JinaReaderAdapter:
|
|
| 58 |
)
|
| 59 |
|
| 60 |
async def _ensure_client(self):
|
| 61 |
-
"""Lazy initialization of HTTP client"""
|
| 62 |
if self.client is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
self.client = httpx.AsyncClient(
|
| 64 |
timeout=self.timeout,
|
| 65 |
follow_redirects=True,
|
| 66 |
-
headers=
|
| 67 |
-
"User-Agent": "ARKI-AI-RAG/2.4 (Ethiopia News Assistant)"
|
| 68 |
-
}
|
| 69 |
)
|
| 70 |
|
| 71 |
async def extract_article(self, url: str) -> Dict[str, Any]:
|
|
|
|
| 58 |
)
|
| 59 |
|
| 60 |
async def _ensure_client(self):
|
| 61 |
+
"""Lazy initialization of HTTP client with optional API key auth"""
|
| 62 |
if self.client is None:
|
| 63 |
+
headers = {
|
| 64 |
+
"User-Agent": "ARKI-AI-RAG/2.4 (Ethiopia News Assistant)",
|
| 65 |
+
"Accept": "text/plain, text/markdown",
|
| 66 |
+
}
|
| 67 |
+
# Add Jina API key if available (required for most sites)
|
| 68 |
+
try:
|
| 69 |
+
from src.core.config import settings
|
| 70 |
+
jina_key = getattr(settings, "JINA_API_KEY", "")
|
| 71 |
+
if jina_key and jina_key not in ("", "your-jina-api-key-here"):
|
| 72 |
+
headers["Authorization"] = f"Bearer {jina_key}"
|
| 73 |
+
logger.info("Jina Reader: using API key authentication")
|
| 74 |
+
else:
|
| 75 |
+
logger.warning("Jina Reader: no API key set β most sites will return 401. Get free key at https://jina.ai")
|
| 76 |
+
except Exception:
|
| 77 |
+
pass
|
| 78 |
+
|
| 79 |
self.client = httpx.AsyncClient(
|
| 80 |
timeout=self.timeout,
|
| 81 |
follow_redirects=True,
|
| 82 |
+
headers=headers
|
|
|
|
|
|
|
| 83 |
)
|
| 84 |
|
| 85 |
async def extract_article(self, url: str) -> Dict[str, Any]:
|