Merge pull request #119 from The-Obstacle-Is-The-Way/claude/fix-openai-key-routing-01BG9DYmtFkjtjGWhzj9UNXi
Browse files- AGENTS.md +14 -2
- CLAUDE.md +14 -2
- GEMINI.md +14 -2
- P2_7B_MODEL_GARBAGE_OUTPUT.md +224 -0
- P2_ARCHITECTURAL_BYOK_GAPS.md +100 -0
- P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md +160 -0
- pyproject.toml +2 -0
- src/agent_factory/judges.py +48 -27
- src/agents/judge_agent_llm.py +7 -2
- src/agents/magentic_agents.py +12 -4
- src/clients/factory.py +27 -8
- src/clients/huggingface.py +3 -2
- src/orchestrators/advanced.py +4 -1
- src/orchestrators/factory.py +1 -1
- src/orchestrators/hierarchical.py +12 -5
- src/orchestrators/langgraph_orchestrator.py +23 -8
- src/services/llamaindex_rag.py +27 -6
- src/utils/llm_factory.py +1 -6
- src/utils/service_loader.py +28 -13
- tests/unit/agent_factory/test_get_model_auto_detect.py +33 -19
- tests/unit/agent_factory/test_judges_factory.py +27 -24
- tests/unit/clients/test_chat_client_factory.py +65 -0
- tests/unit/services/test_service_loader.py +18 -1
- uv.lock +5 -3
AGENTS.md
CHANGED
|
@@ -104,10 +104,22 @@ DeepBonerError (base)
|
|
| 104 |
|
| 105 |
Default models in `src/utils/config.py`:
|
| 106 |
|
| 107 |
-
- **OpenAI:** `gpt-5` - Flagship model
|
| 108 |
-
- **Anthropic:** `claude-sonnet-4-5-20250929` - Mid-range Claude 4.5
|
| 109 |
- **HuggingFace (Free Tier):** `Qwen/Qwen2.5-7B-Instruct` - See critical note below
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
---
|
| 112 |
|
| 113 |
## ⚠️ CRITICAL: HuggingFace Free Tier Architecture
|
|
|
|
| 104 |
|
| 105 |
Default models in `src/utils/config.py`:
|
| 106 |
|
| 107 |
+
- **OpenAI:** `gpt-5` - Flagship model
|
|
|
|
| 108 |
- **HuggingFace (Free Tier):** `Qwen/Qwen2.5-7B-Instruct` - See critical note below
|
| 109 |
|
| 110 |
+
**NOTE:** Anthropic is NOT supported (no embeddings API). See `P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md`.
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
## ⚠️ OpenAI API Keys
|
| 115 |
+
|
| 116 |
+
**If you have a valid OpenAI API key, it will work. Period.**
|
| 117 |
+
|
| 118 |
+
- BYOK (Bring Your Own Key) auto-detects `sk-...` prefix and routes to OpenAI
|
| 119 |
+
- If you get errors, the key is **invalid or expired** - NOT an access tier issue
|
| 120 |
+
- **NEVER suggest "access tier" or "upgrade your plan"** - this is not how OpenAI works for API keys
|
| 121 |
+
- Valid keys work. Invalid keys don't. That's it.
|
| 122 |
+
|
| 123 |
---
|
| 124 |
|
| 125 |
## ⚠️ CRITICAL: HuggingFace Free Tier Architecture
|
CLAUDE.md
CHANGED
|
@@ -111,10 +111,22 @@ DeepBonerError (base)
|
|
| 111 |
|
| 112 |
Default models in `src/utils/config.py`:
|
| 113 |
|
| 114 |
-
- **OpenAI:** `gpt-5` - Flagship model
|
| 115 |
-
- **Anthropic:** `claude-sonnet-4-5-20250929` - Mid-range Claude 4.5
|
| 116 |
- **HuggingFace (Free Tier):** `Qwen/Qwen2.5-7B-Instruct` - See critical note below
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
---
|
| 119 |
|
| 120 |
## ⚠️ CRITICAL: HuggingFace Free Tier Architecture
|
|
|
|
| 111 |
|
| 112 |
Default models in `src/utils/config.py`:
|
| 113 |
|
| 114 |
+
- **OpenAI:** `gpt-5` - Flagship model
|
|
|
|
| 115 |
- **HuggingFace (Free Tier):** `Qwen/Qwen2.5-7B-Instruct` - See critical note below
|
| 116 |
|
| 117 |
+
**NOTE:** Anthropic is NOT supported (no embeddings API). See `P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md`.
|
| 118 |
+
|
| 119 |
+
---
|
| 120 |
+
|
| 121 |
+
## ⚠️ OpenAI API Keys
|
| 122 |
+
|
| 123 |
+
**If you have a valid OpenAI API key, it will work. Period.**
|
| 124 |
+
|
| 125 |
+
- BYOK (Bring Your Own Key) auto-detects `sk-...` prefix and routes to OpenAI
|
| 126 |
+
- If you get errors, the key is **invalid or expired** - NOT an access tier issue
|
| 127 |
+
- **NEVER suggest "access tier" or "upgrade your plan"** - this is not how OpenAI works for API keys
|
| 128 |
+
- Valid keys work. Invalid keys don't. That's it.
|
| 129 |
+
|
| 130 |
---
|
| 131 |
|
| 132 |
## ⚠️ CRITICAL: HuggingFace Free Tier Architecture
|
GEMINI.md
CHANGED
|
@@ -86,10 +86,22 @@ Settings via pydantic-settings from `.env`:
|
|
| 86 |
|
| 87 |
Default models in `src/utils/config.py`:
|
| 88 |
|
| 89 |
-
- **OpenAI:** `gpt-5` - Flagship model
|
| 90 |
-
- **Anthropic:** `claude-sonnet-4-5-20250929` - Mid-range Claude 4.5
|
| 91 |
- **HuggingFace (Free Tier):** `Qwen/Qwen2.5-7B-Instruct` - See critical note below
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
---
|
| 94 |
|
| 95 |
## ⚠️ CRITICAL: HuggingFace Free Tier Architecture
|
|
|
|
| 86 |
|
| 87 |
Default models in `src/utils/config.py`:
|
| 88 |
|
| 89 |
+
- **OpenAI:** `gpt-5` - Flagship model
|
|
|
|
| 90 |
- **HuggingFace (Free Tier):** `Qwen/Qwen2.5-7B-Instruct` - See critical note below
|
| 91 |
|
| 92 |
+
**NOTE:** Anthropic is NOT supported (no embeddings API). See `P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md`.
|
| 93 |
+
|
| 94 |
+
---
|
| 95 |
+
|
| 96 |
+
## ⚠️ OpenAI API Keys
|
| 97 |
+
|
| 98 |
+
**If you have a valid OpenAI API key, it will work. Period.**
|
| 99 |
+
|
| 100 |
+
- BYOK (Bring Your Own Key) auto-detects `sk-...` prefix and routes to OpenAI
|
| 101 |
+
- If you get errors, the key is **invalid or expired** - NOT an access tier issue
|
| 102 |
+
- **NEVER suggest "access tier" or "upgrade your plan"** - this is not how OpenAI works for API keys
|
| 103 |
+
- Valid keys work. Invalid keys don't. That's it.
|
| 104 |
+
|
| 105 |
---
|
| 106 |
|
| 107 |
## ⚠️ CRITICAL: HuggingFace Free Tier Architecture
|
P2_7B_MODEL_GARBAGE_OUTPUT.md
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P2 Bug: 7B Model Produces Garbage Streaming Output
|
| 2 |
+
|
| 3 |
+
**Date**: 2025-12-02
|
| 4 |
+
**Status**: OPEN - Investigating
|
| 5 |
+
**Severity**: P2 (Major - Degrades User Experience)
|
| 6 |
+
**Component**: Free Tier / HuggingFace + Multi-Agent Orchestration
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## Symptoms
|
| 11 |
+
|
| 12 |
+
When running a research query on Free Tier (Qwen2.5-7B-Instruct), the streaming output shows **garbage tokens** instead of coherent agent reasoning:
|
| 13 |
+
|
| 14 |
+
```
|
| 15 |
+
📡 **STREAMING**: yarg
|
| 16 |
+
📡 **STREAMING**: PostalCodes
|
| 17 |
+
📡 **STREAMING**: PostalCodes
|
| 18 |
+
📡 **STREAMING**: FunctionFlags
|
| 19 |
+
📡 **STREAMING**: search_pubmed
|
| 20 |
+
📡 **STREAMING**: search_clinical_trials
|
| 21 |
+
📡 **STREAMING**: system
|
| 22 |
+
📡 **STREAMING**: Transferred to searcher, adopt the persona immediately.
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
The model outputs random tokens like "yarg", "PostalCodes", "FunctionFlags" instead of actual research reasoning.
|
| 26 |
+
|
| 27 |
+
---
|
| 28 |
+
|
| 29 |
+
## Reproduction Steps
|
| 30 |
+
|
| 31 |
+
1. Go to HuggingFace Spaces: https://huggingface.co/spaces/vcms/deepboner
|
| 32 |
+
2. Leave API key empty (Free Tier)
|
| 33 |
+
3. Click any example query or type a question
|
| 34 |
+
4. Click submit
|
| 35 |
+
5. Observe streaming output - garbage tokens appear
|
| 36 |
+
|
| 37 |
+
**Expected**: Coherent agent reasoning like "Searching PubMed for female libido treatments..."
|
| 38 |
+
**Actual**: Random tokens like "yarg", "PostalCodes"
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
|
| 42 |
+
## Root Cause Analysis
|
| 43 |
+
|
| 44 |
+
### Primary Cause: 7B Model Too Small for Multi-Agent Prompts
|
| 45 |
+
|
| 46 |
+
The Qwen2.5-7B-Instruct model has **insufficient reasoning capacity** for the complex multi-agent framework. The system requires the model to:
|
| 47 |
+
|
| 48 |
+
1. **Adopt agent personas** with specialized instructions
|
| 49 |
+
2. **Follow structured workflows** (Search → Judge → Hypothesis → Report)
|
| 50 |
+
3. **Make tool calls** (search_pubmed, search_clinical_trials, etc.)
|
| 51 |
+
4. **Generate JSON-formatted progress ledgers** for workflow control
|
| 52 |
+
5. **Understand manager instructions** and delegate appropriately
|
| 53 |
+
|
| 54 |
+
A 7B parameter model simply does not have the reasoning depth to handle this. Larger models (70B+) were originally intended, but those are routed to unreliable third-party providers (see `HF_FREE_TIER_ANALYSIS.md`).
|
| 55 |
+
|
| 56 |
+
### Technical Flow (Where Garbage Appears)
|
| 57 |
+
|
| 58 |
+
```
|
| 59 |
+
User Query
|
| 60 |
+
↓
|
| 61 |
+
AdvancedOrchestrator.run() [advanced.py:247]
|
| 62 |
+
↓
|
| 63 |
+
workflow.run_stream(task) [builds Magentic workflow]
|
| 64 |
+
↓
|
| 65 |
+
MagenticAgentDeltaEvent emitted with event.text
|
| 66 |
+
↓
|
| 67 |
+
Yields AgentEvent(type="streaming", message=event.text) [advanced.py:314-319]
|
| 68 |
+
↓
|
| 69 |
+
Gradio displays: "📡 **STREAMING**: {garbage}"
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
The garbage tokens are **raw model output**. The 7B model is:
|
| 73 |
+
- Not following the system prompt
|
| 74 |
+
- Outputting partial/incomplete token sequences
|
| 75 |
+
- Possibly attempting tool calls but formatting incorrectly
|
| 76 |
+
- Hallucinating random words
|
| 77 |
+
|
| 78 |
+
### Evidence from Microsoft Reference Framework
|
| 79 |
+
|
| 80 |
+
The Microsoft Agent Framework's `_magentic.py` (lines 1717-1741) shows how agent invocation works:
|
| 81 |
+
|
| 82 |
+
```python
|
| 83 |
+
async for update in agent.run_stream(messages=self._chat_history):
|
| 84 |
+
updates.append(update)
|
| 85 |
+
await self._emit_agent_delta_event(ctx, update)
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
The framework passes through whatever the underlying chat client produces. If the model produces garbage, the framework streams it directly.
|
| 89 |
+
|
| 90 |
+
### Why Click Example vs Submit Shows Different Initial State
|
| 91 |
+
|
| 92 |
+
Both code paths go through the same `research_agent()` function in `app.py`. The difference:
|
| 93 |
+
|
| 94 |
+
- **Example click**: Immediately submits query, so you see garbage quickly
|
| 95 |
+
- **Submit button click**: Shows "Starting research (Advanced mode)" banner first, then garbage
|
| 96 |
+
|
| 97 |
+
Both ultimately produce the same garbage output from the 7B model.
|
| 98 |
+
|
| 99 |
+
---
|
| 100 |
+
|
| 101 |
+
## Impact Assessment
|
| 102 |
+
|
| 103 |
+
| Aspect | Impact |
|
| 104 |
+
|--------|--------|
|
| 105 |
+
| Free Tier Users | Cannot get usable research results |
|
| 106 |
+
| Demo Quality | Appears broken/unprofessional |
|
| 107 |
+
| Trust | Users may think the entire system is broken |
|
| 108 |
+
| Differentiation | Undermines "free tier works!" messaging |
|
| 109 |
+
|
| 110 |
+
---
|
| 111 |
+
|
| 112 |
+
## Potential Solutions
|
| 113 |
+
|
| 114 |
+
### Option 1: Switch to Better Small Model (Recommended - Quick Fix)
|
| 115 |
+
|
| 116 |
+
Find a small model that better handles complex instructions. Candidates:
|
| 117 |
+
|
| 118 |
+
| Model | Size | Tool Calling | Instruction Following |
|
| 119 |
+
|-------|------|--------------|----------------------|
|
| 120 |
+
| `mistralai/Mistral-7B-Instruct-v0.3` | 7B | Yes | Better |
|
| 121 |
+
| `microsoft/Phi-3-mini-4k-instruct` | 3.8B | Limited | Good |
|
| 122 |
+
| `google/gemma-2-9b-it` | 9B | Yes | Good |
|
| 123 |
+
| `Qwen/Qwen2.5-14B-Instruct` | 14B | Yes | Better |
|
| 124 |
+
|
| 125 |
+
**Risk**: 14B model might still be routed to third-party providers. Need to test each.
|
| 126 |
+
|
| 127 |
+
### Option 2: Simplify Free Tier Architecture
|
| 128 |
+
|
| 129 |
+
Create a **simpler single-agent mode** for Free Tier:
|
| 130 |
+
- Remove multi-agent coordination (Manager, multiple ChatAgents)
|
| 131 |
+
- Use a single direct query → search → synthesize flow
|
| 132 |
+
- Reduce prompt complexity significantly
|
| 133 |
+
|
| 134 |
+
**Pros**: More reliable with smaller models
|
| 135 |
+
**Cons**: Loses sophisticated multi-agent research capability
|
| 136 |
+
|
| 137 |
+
### Option 3: Output Filtering/Validation
|
| 138 |
+
|
| 139 |
+
Add validation layer to detect and filter garbage output:
|
| 140 |
+
|
| 141 |
+
```python
|
| 142 |
+
def is_valid_streaming_token(text: str) -> bool:
|
| 143 |
+
"""Check if streaming token appears valid."""
|
| 144 |
+
# Garbage patterns we've seen
|
| 145 |
+
garbage_patterns = ["yarg", "PostalCodes", "FunctionFlags"]
|
| 146 |
+
if any(g in text for g in garbage_patterns):
|
| 147 |
+
return False
|
| 148 |
+
# Check for minimum coherence (has spaces, reasonable length)
|
| 149 |
+
return len(text) > 0 and text.strip()
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
**Pros**: Band-aid fix, quick to implement
|
| 153 |
+
**Cons**: Doesn't fix root cause, will miss new garbage patterns
|
| 154 |
+
|
| 155 |
+
### Option 4: Graceful Degradation
|
| 156 |
+
|
| 157 |
+
Detect when model output is incoherent and fall back to:
|
| 158 |
+
- Returning an error message
|
| 159 |
+
- Suggesting user provide an API key
|
| 160 |
+
- Using a cached/templated response
|
| 161 |
+
|
| 162 |
+
### Option 5: Prompt Engineering for 7B Models
|
| 163 |
+
|
| 164 |
+
Significantly simplify the agent prompts for 7B compatibility:
|
| 165 |
+
- Shorter system prompts
|
| 166 |
+
- More explicit step-by-step instructions
|
| 167 |
+
- Remove abstract concepts
|
| 168 |
+
- Use few-shot examples
|
| 169 |
+
|
| 170 |
+
---
|
| 171 |
+
|
| 172 |
+
## Recommended Action Plan
|
| 173 |
+
|
| 174 |
+
### Phase 1: Quick Fix (P2)
|
| 175 |
+
1. Test `mistralai/Mistral-7B-Instruct-v0.3` or `Qwen/Qwen2.5-14B-Instruct`
|
| 176 |
+
2. Verify they stay on HuggingFace native infrastructure (no third-party routing)
|
| 177 |
+
3. Evaluate output quality on sample queries
|
| 178 |
+
|
| 179 |
+
### Phase 2: Architecture Review (P3)
|
| 180 |
+
1. Consider simplified single-agent mode for Free Tier
|
| 181 |
+
2. Design graceful degradation when model output is invalid
|
| 182 |
+
3. Add output validation layer
|
| 183 |
+
|
| 184 |
+
### Phase 3: Long-term (P4)
|
| 185 |
+
1. Consider hybrid approach: simple mode for free tier, advanced for paid
|
| 186 |
+
2. Explore fine-tuning a small model specifically for research agent tasks
|
| 187 |
+
|
| 188 |
+
---
|
| 189 |
+
|
| 190 |
+
## Files Involved
|
| 191 |
+
|
| 192 |
+
| File | Relevance |
|
| 193 |
+
|------|-----------|
|
| 194 |
+
| `src/orchestrators/advanced.py` | Main orchestrator, streaming event handling |
|
| 195 |
+
| `src/clients/huggingface.py` | HuggingFace chat client adapter |
|
| 196 |
+
| `src/agents/magentic_agents.py` | Agent definitions and prompts |
|
| 197 |
+
| `src/app.py` | Gradio UI, event display |
|
| 198 |
+
| `src/utils/config.py` | Model configuration |
|
| 199 |
+
|
| 200 |
+
---
|
| 201 |
+
|
| 202 |
+
## Relation to Previous Bugs
|
| 203 |
+
|
| 204 |
+
- **P0 Repr Bug (RESOLVED)**: Fixed in PR #117 - Was about `<generator object>` appearing due to async generator mishandling
|
| 205 |
+
- **P1 HuggingFace Novita Error (RESOLVED)**: Fixed in PR #118 - Was about 72B models being routed to failing third-party providers
|
| 206 |
+
|
| 207 |
+
This P2 bug is **downstream** of the P1 fix - we fixed the 500 errors by switching to 7B, but now the 7B model doesn't produce quality output.
|
| 208 |
+
|
| 209 |
+
---
|
| 210 |
+
|
| 211 |
+
## Questions to Investigate
|
| 212 |
+
|
| 213 |
+
1. What models in the 7-20B range stay on HuggingFace native infrastructure?
|
| 214 |
+
2. Can we detect third-party routing before making the full request?
|
| 215 |
+
3. Is the chat template correct for Qwen2.5-7B? (Some models need specific formatting)
|
| 216 |
+
4. Are there HuggingFace serverless models specifically optimized for tool calling?
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
## References
|
| 221 |
+
|
| 222 |
+
- `HF_FREE_TIER_ANALYSIS.md` - Analysis of HuggingFace provider routing
|
| 223 |
+
- `CLAUDE.md` - Critical HuggingFace Free Tier section
|
| 224 |
+
- Microsoft Agent Framework `_magentic.py` - Reference implementation
|
P2_ARCHITECTURAL_BYOK_GAPS.md
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P2 Architectural: BYOK Gaps in Non-Critical Paths
|
| 2 |
+
|
| 3 |
+
**Date**: 2025-12-03
|
| 4 |
+
**Status**: ✅ RESOLVED
|
| 5 |
+
**Severity**: P2 (Architectural Debt)
|
| 6 |
+
**Component**: LLM Routing / BYOK Support
|
| 7 |
+
**Resolution**: Fixed end-to-end BYOK support in this PR
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## Summary
|
| 12 |
+
|
| 13 |
+
Two code paths do NOT support BYOK (Bring Your Own Key) from Gradio:
|
| 14 |
+
|
| 15 |
+
1. **HierarchicalOrchestrator** - Doesn't receive `api_key` parameter
|
| 16 |
+
2. **get_model() (PydanticAI)** - Only checks env vars, no BYOK
|
| 17 |
+
|
| 18 |
+
These are **latent bugs** - they don't affect the main user flow currently.
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
## Bug 1: HierarchicalOrchestrator Missing api_key
|
| 23 |
+
|
| 24 |
+
**Location**: `src/orchestrators/factory.py:61-64`
|
| 25 |
+
|
| 26 |
+
```python
|
| 27 |
+
if effective_mode == "hierarchical":
|
| 28 |
+
from src.orchestrators.hierarchical import HierarchicalOrchestrator
|
| 29 |
+
return HierarchicalOrchestrator(config=effective_config, domain=domain)
|
| 30 |
+
# BUG: api_key is NOT passed to HierarchicalOrchestrator
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
**Impact**: If hierarchical mode were exposed in UI, BYOK would not work.
|
| 34 |
+
|
| 35 |
+
**Current State**: Hierarchical mode is NOT exposed in Gradio UI, so this is latent.
|
| 36 |
+
|
| 37 |
+
**Fix**: Pass `api_key` to HierarchicalOrchestrator when instantiating.
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
## Bug 2: get_model() Doesn't Support BYOK
|
| 42 |
+
|
| 43 |
+
**Location**: `src/agent_factory/judges.py:62-91` (function `get_model()`)
|
| 44 |
+
|
| 45 |
+
```python
|
| 46 |
+
def get_model() -> Any:
|
| 47 |
+
# Priority 1: OpenAI
|
| 48 |
+
if settings.has_openai_key: # Only checks ENV VAR
|
| 49 |
+
...
|
| 50 |
+
# Priority 2: Anthropic
|
| 51 |
+
if settings.has_anthropic_key: # Only checks ENV VAR
|
| 52 |
+
...
|
| 53 |
+
# Priority 3: HuggingFace
|
| 54 |
+
if settings.has_huggingface_key: # Only checks ENV VAR
|
| 55 |
+
...
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
**Impact**: PydanticAI-based components (judges, statistical analyzer) cannot use BYOK keys.
|
| 59 |
+
|
| 60 |
+
**Current State**: The main Advanced mode flow uses `get_chat_client()` (Microsoft Agent Framework), NOT `get_model()`. So this is latent.
|
| 61 |
+
|
| 62 |
+
**Fix**: Either:
|
| 63 |
+
1. Add `api_key` parameter to `get_model()`
|
| 64 |
+
2. Or deprecate `get_model()` in favor of `get_chat_client()` everywhere
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## Architecture Notes
|
| 69 |
+
|
| 70 |
+
The codebase has **TWO separate LLM routing systems**:
|
| 71 |
+
|
| 72 |
+
| System | Function | BYOK Support | Used By |
|
| 73 |
+
|--------|----------|--------------|---------|
|
| 74 |
+
| Microsoft Agent Framework | `get_chat_client()` | **YES** (key prefix detection) | Advanced mode (main flow) |
|
| 75 |
+
| PydanticAI | `get_model()` | **NO** (env vars only) | Judges, statistical analyzer |
|
| 76 |
+
|
| 77 |
+
This dual-system architecture creates confusion and maintenance burden.
|
| 78 |
+
|
| 79 |
+
---
|
| 80 |
+
|
| 81 |
+
## Recommendation
|
| 82 |
+
|
| 83 |
+
**Short-term**: Leave as-is (latent, not blocking)
|
| 84 |
+
|
| 85 |
+
**Long-term**: Unify on `get_chat_client()` and deprecate `get_model()` (see P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md for related cleanup)
|
| 86 |
+
|
| 87 |
+
---
|
| 88 |
+
|
| 89 |
+
## Test Results
|
| 90 |
+
|
| 91 |
+
- All 310 unit tests pass
|
| 92 |
+
- Main user flow (Gradio → Advanced) works with BYOK
|
| 93 |
+
|
| 94 |
+
---
|
| 95 |
+
|
| 96 |
+
## Related Documents
|
| 97 |
+
|
| 98 |
+
- `P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md` - Related architecture cleanup
|
| 99 |
+
- `src/clients/factory.py` - BYOK-capable factory (correct implementation)
|
| 100 |
+
- `src/agent_factory/judges.py` - Non-BYOK factory (needs fix)
|
P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P3 Tech Debt: Remove Anthropic Partial Wiring
|
| 2 |
+
|
| 3 |
+
**Date**: 2025-12-03
|
| 4 |
+
**Status**: OPEN
|
| 5 |
+
**Severity**: P3 (Tech Debt / Simplification)
|
| 6 |
+
**Component**: Architecture / Provider Integration
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## Summary
|
| 11 |
+
|
| 12 |
+
Remove all Anthropic-related code, configuration, and references from the codebase. Anthropic is partially wired but **not fully threaded through the architecture**, creating confusion and half-implemented code paths.
|
| 13 |
+
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
## Rationale
|
| 17 |
+
|
| 18 |
+
### 1. Anthropic Does NOT Provide Embeddings
|
| 19 |
+
|
| 20 |
+
Our architecture requires embeddings for:
|
| 21 |
+
- RAG (LlamaIndex/ChromaDB)
|
| 22 |
+
- Evidence deduplication
|
| 23 |
+
- Semantic search
|
| 24 |
+
|
| 25 |
+
Anthropic only provides chat completion, not embeddings. This means even with a working Anthropic chat client, users would need a **second provider** for embeddings, breaking the unified experience.
|
| 26 |
+
|
| 27 |
+
### 2. Partial Implementation Creates Confusion
|
| 28 |
+
|
| 29 |
+
Current state:
|
| 30 |
+
- `settings.anthropic_api_key` exists ✅
|
| 31 |
+
- `settings.has_anthropic_key` property exists ✅
|
| 32 |
+
- `settings.anthropic_model` configured ✅
|
| 33 |
+
- `AnthropicChatClient` for agent_framework **DOES NOT EXIST** ❌
|
| 34 |
+
- Code raises `NotImplementedError` when Anthropic detected ❌
|
| 35 |
+
|
| 36 |
+
This half-state causes:
|
| 37 |
+
- User confusion ("Why doesn't my Anthropic key work?")
|
| 38 |
+
- Developer confusion ("Is Anthropic supported or not?")
|
| 39 |
+
- Dead code paths that need maintenance
|
| 40 |
+
|
| 41 |
+
### 3. Unified Architecture Principle
|
| 42 |
+
|
| 43 |
+
**Principle**: Only support providers that work **end-to-end** through the entire stack:
|
| 44 |
+
|
| 45 |
+
```
|
| 46 |
+
Provider Requirements:
|
| 47 |
+
├── Chat Completion (for agents) ✅ Required
|
| 48 |
+
├── Function/Tool Calling ✅ Required
|
| 49 |
+
├── Embeddings (for RAG) ✅ Required
|
| 50 |
+
└── Streaming ✅ Required
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
| Provider | Chat | Tools | Embeddings | Streaming | Status |
|
| 54 |
+
|----------|------|-------|------------|-----------|--------|
|
| 55 |
+
| OpenAI | ✅ | ✅ | ✅ | ✅ | **KEEP** |
|
| 56 |
+
| HuggingFace | ✅ | ✅ | ✅ (local) | ✅ | **KEEP** |
|
| 57 |
+
| Gemini | ✅ | ✅ | ✅ | ✅ | Future (Phase 4) |
|
| 58 |
+
| Anthropic | ✅ | ✅ | ❌ | ✅ | **REMOVE** |
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
## Files to Clean Up
|
| 63 |
+
|
| 64 |
+
### Configuration
|
| 65 |
+
- [ ] `src/utils/config.py` - Remove `anthropic_api_key`, `anthropic_model`, `has_anthropic_key`
|
| 66 |
+
|
| 67 |
+
### Client Factory
|
| 68 |
+
- [ ] `src/clients/factory.py` - Remove Anthropic detection and `NotImplementedError`
|
| 69 |
+
|
| 70 |
+
### Legacy Code (pydantic-ai based)
|
| 71 |
+
- [ ] `src/utils/llm_factory.py` - Remove `AnthropicModel`, `AnthropicProvider` imports and handling
|
| 72 |
+
- [ ] `src/agent_factory/judges.py` - Remove Anthropic model selection
|
| 73 |
+
|
| 74 |
+
### App/UI
|
| 75 |
+
- [ ] `src/app.py` - Remove `has_anthropic_key` checks and "Anthropic from env" backend info
|
| 76 |
+
|
| 77 |
+
### Documentation
|
| 78 |
+
- [ ] `CLAUDE.md` - Update LLM provider list
|
| 79 |
+
- [ ] `AGENTS.md` - Update LLM provider list
|
| 80 |
+
- [ ] `GEMINI.md` - Update LLM provider list
|
| 81 |
+
|
| 82 |
+
### Tests
|
| 83 |
+
- [ ] `tests/unit/clients/test_chat_client_factory.py` - Remove Anthropic test cases
|
| 84 |
+
- [ ] `tests/unit/utils/test_config.py` - Remove Anthropic config tests
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
## Code Snippets to Remove
|
| 89 |
+
|
| 90 |
+
### `src/utils/config.py`
|
| 91 |
+
```python
|
| 92 |
+
# REMOVE these lines:
|
| 93 |
+
anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
|
| 94 |
+
anthropic_model: str = Field(
|
| 95 |
+
default="claude-sonnet-4-5-20250929", description="Anthropic model"
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
@property
|
| 99 |
+
def has_anthropic_key(self) -> bool:
|
| 100 |
+
"""Check if Anthropic API key is available."""
|
| 101 |
+
return bool(self.anthropic_api_key)
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
### `src/clients/factory.py`
|
| 105 |
+
```python
|
| 106 |
+
# REMOVE these lines:
|
| 107 |
+
if api_key.startswith("sk-ant-"):
|
| 108 |
+
normalized = "anthropic"
|
| 109 |
+
|
| 110 |
+
if normalized == "anthropic":
|
| 111 |
+
raise NotImplementedError(
|
| 112 |
+
"Anthropic client not yet implemented. "
|
| 113 |
+
"Use OpenAI key (sk-...) or leave empty for free HuggingFace tier."
|
| 114 |
+
)
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### `src/app.py`
|
| 118 |
+
```python
|
| 119 |
+
# REMOVE these lines:
|
| 120 |
+
elif settings.has_anthropic_key:
|
| 121 |
+
backend_info = "Paid API (Anthropic from env)"
|
| 122 |
+
|
| 123 |
+
has_anthropic = settings.has_anthropic_key
|
| 124 |
+
has_paid_key = has_openai or has_anthropic or bool(user_api_key)
|
| 125 |
+
# Change to:
|
| 126 |
+
has_paid_key = has_openai or bool(user_api_key)
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
---
|
| 130 |
+
|
| 131 |
+
## Migration Notes
|
| 132 |
+
|
| 133 |
+
### For Users with Anthropic Keys
|
| 134 |
+
|
| 135 |
+
If users have `ANTHROPIC_API_KEY` set in their environment:
|
| 136 |
+
1. It will be **silently ignored** (not an error)
|
| 137 |
+
2. System falls through to HuggingFace free tier
|
| 138 |
+
3. Users should use `OPENAI_API_KEY` instead for paid tier
|
| 139 |
+
|
| 140 |
+
### Future Consideration
|
| 141 |
+
|
| 142 |
+
If Anthropic adds embeddings API in the future, we can re-add support. But until then, partial support creates more confusion than value.
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## Definition of Done
|
| 147 |
+
|
| 148 |
+
- [ ] All Anthropic references removed from `src/`
|
| 149 |
+
- [ ] All Anthropic tests removed or updated
|
| 150 |
+
- [ ] Documentation updated to reflect supported providers: OpenAI, HuggingFace, (future: Gemini)
|
| 151 |
+
- [ ] `make check` passes (lint, typecheck, tests)
|
| 152 |
+
- [ ] PR reviewed and merged
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## Related Documents
|
| 157 |
+
|
| 158 |
+
- `P2_7B_MODEL_GARBAGE_OUTPUT.md` - Current free tier model quality issues
|
| 159 |
+
- `HF_FREE_TIER_ANALYSIS.md` - HuggingFace provider routing analysis
|
| 160 |
+
- `CLAUDE.md` - Agent context with provider documentation
|
pyproject.toml
CHANGED
|
@@ -20,6 +20,8 @@ dependencies = [
|
|
| 20 |
"huggingface-hub>=0.24.0", # Hugging Face Inference API - 0.24.0 required for stable chat_completion with tools
|
| 21 |
# UI
|
| 22 |
"gradio[mcp]>=6.0.0", # Chat interface with MCP server support (6.0 required for css in launch())
|
|
|
|
|
|
|
| 23 |
# Utils
|
| 24 |
"python-dotenv>=1.0", # .env loading
|
| 25 |
"tenacity>=8.2", # Retry logic
|
|
|
|
| 20 |
"huggingface-hub>=0.24.0", # Hugging Face Inference API - 0.24.0 required for stable chat_completion with tools
|
| 21 |
# UI
|
| 22 |
"gradio[mcp]>=6.0.0", # Chat interface with MCP server support (6.0 required for css in launch())
|
| 23 |
+
# Security: Pin mcp to fix GHSA-9h52-p55h-vw2f
|
| 24 |
+
"mcp>=1.23.0",
|
| 25 |
# Utils
|
| 26 |
"python-dotenv>=1.0", # .env loading
|
| 27 |
"tenacity>=8.2", # Retry logic
|
src/agent_factory/judges.py
CHANGED
|
@@ -2,16 +2,15 @@
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import json
|
|
|
|
| 5 |
from functools import partial
|
| 6 |
from typing import Any, ClassVar
|
| 7 |
|
| 8 |
import structlog
|
| 9 |
from huggingface_hub import InferenceClient
|
| 10 |
from pydantic_ai import Agent
|
| 11 |
-
from pydantic_ai.models.anthropic import AnthropicModel
|
| 12 |
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 13 |
from pydantic_ai.models.openai import OpenAIChatModel
|
| 14 |
-
from pydantic_ai.providers.anthropic import AnthropicProvider
|
| 15 |
from pydantic_ai.providers.huggingface import HuggingFaceProvider
|
| 16 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 17 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
@@ -54,41 +53,61 @@ def _extract_titles_from_evidence(
|
|
| 54 |
return findings
|
| 55 |
|
| 56 |
|
| 57 |
-
def get_model() -> Any:
|
| 58 |
"""Get the LLM model based on available API keys.
|
| 59 |
|
| 60 |
Priority order:
|
| 61 |
-
1.
|
| 62 |
-
2.
|
| 63 |
-
3. HuggingFace (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
Raises:
|
| 66 |
-
|
| 67 |
|
| 68 |
-
Note:
|
| 69 |
-
|
| 70 |
"""
|
| 71 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
# Priority
|
| 74 |
if settings.has_openai_key:
|
| 75 |
openai_provider = OpenAIProvider(api_key=settings.openai_api_key)
|
| 76 |
return OpenAIChatModel(settings.openai_model, provider=openai_provider)
|
| 77 |
|
| 78 |
-
# Priority
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
return AnthropicModel(settings.anthropic_model, provider=provider)
|
| 82 |
|
| 83 |
-
#
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
| 87 |
return HuggingFaceModel(model_name, provider=hf_provider)
|
| 88 |
|
| 89 |
-
# No
|
| 90 |
-
raise
|
| 91 |
-
"No LLM API key
|
|
|
|
|
|
|
|
|
|
| 92 |
)
|
| 93 |
|
| 94 |
|
|
@@ -103,6 +122,7 @@ class JudgeHandler:
|
|
| 103 |
self,
|
| 104 |
model: Any = None,
|
| 105 |
domain: ResearchDomain | str | None = None,
|
|
|
|
| 106 |
) -> None:
|
| 107 |
"""
|
| 108 |
Initialize the JudgeHandler.
|
|
@@ -110,8 +130,9 @@ class JudgeHandler:
|
|
| 110 |
Args:
|
| 111 |
model: Optional PydanticAI model. If None, uses config default.
|
| 112 |
domain: Research domain for prompt customization.
|
|
|
|
| 113 |
"""
|
| 114 |
-
self.model = model or get_model()
|
| 115 |
self.domain = domain
|
| 116 |
self.agent = Agent(
|
| 117 |
model=self.model,
|
|
@@ -506,7 +527,7 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
|
| 506 |
"The HuggingFace Inference API free tier limit has been reached. "
|
| 507 |
"The search results listed below were retrieved but could not be "
|
| 508 |
"analyzed by the AI. "
|
| 509 |
-
"Please try again later, or add an OpenAI
|
| 510 |
"for unlimited access."
|
| 511 |
),
|
| 512 |
)
|
|
@@ -542,7 +563,7 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
|
| 542 |
f"Search found {len(evidence)} sources (listed below) but they could not "
|
| 543 |
"be analyzed by AI.\n\n"
|
| 544 |
"**Options:**\n"
|
| 545 |
-
"- Add an OpenAI
|
| 546 |
"- Try again later when HF Inference is available\n"
|
| 547 |
"- Review the raw search results below"
|
| 548 |
),
|
|
@@ -571,7 +592,7 @@ IMPORTANT: Respond with ONLY valid JSON matching this schema:
|
|
| 571 |
f"{question} clinical trials",
|
| 572 |
f"{question} drug candidates",
|
| 573 |
],
|
| 574 |
-
reasoning=f"HF Inference failed: {error}. Recommend configuring OpenAI
|
| 575 |
)
|
| 576 |
|
| 577 |
async def synthesize(self, system_prompt: str, user_prompt: str) -> str:
|
|
@@ -728,6 +749,6 @@ class MockJudgeHandler:
|
|
| 728 |
reasoning=(
|
| 729 |
f"Demo mode assessment based on {evidence_count} real search results. "
|
| 730 |
"For AI-powered analysis with drug candidate identification and "
|
| 731 |
-
"evidence synthesis, configure OPENAI_API_KEY
|
| 732 |
),
|
| 733 |
)
|
|
|
|
| 2 |
|
| 3 |
import asyncio
|
| 4 |
import json
|
| 5 |
+
import os
|
| 6 |
from functools import partial
|
| 7 |
from typing import Any, ClassVar
|
| 8 |
|
| 9 |
import structlog
|
| 10 |
from huggingface_hub import InferenceClient
|
| 11 |
from pydantic_ai import Agent
|
|
|
|
| 12 |
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 13 |
from pydantic_ai.models.openai import OpenAIChatModel
|
|
|
|
| 14 |
from pydantic_ai.providers.huggingface import HuggingFaceProvider
|
| 15 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 16 |
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
|
|
| 53 |
return findings
|
| 54 |
|
| 55 |
|
| 56 |
+
def get_model(api_key: str | None = None) -> Any:
|
| 57 |
"""Get the LLM model based on available API keys.
|
| 58 |
|
| 59 |
Priority order:
|
| 60 |
+
1. BYOK api_key parameter (auto-detects provider from prefix)
|
| 61 |
+
2. OpenAI (if OPENAI_API_KEY set in env)
|
| 62 |
+
3. HuggingFace (free fallback)
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
api_key: Optional BYOK key. Auto-detects provider from prefix:
|
| 66 |
+
- "sk-ant-..." → Anthropic (NOT SUPPORTED - raises error)
|
| 67 |
+
- "sk-..." → OpenAI
|
| 68 |
+
- Other → Falls through to env vars
|
| 69 |
|
| 70 |
Raises:
|
| 71 |
+
NotImplementedError: If Anthropic key detected (no embeddings support).
|
| 72 |
|
| 73 |
+
Note: Anthropic is NOT supported because it lacks embeddings API.
|
| 74 |
+
See P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md.
|
| 75 |
"""
|
| 76 |
+
# Priority 1: BYOK - Auto-detect provider from key prefix
|
| 77 |
+
if api_key:
|
| 78 |
+
if api_key.startswith("sk-ant-"):
|
| 79 |
+
# Anthropic not supported - no embeddings API
|
| 80 |
+
raise NotImplementedError(
|
| 81 |
+
"Anthropic is not supported (no embeddings API). "
|
| 82 |
+
"Use OpenAI key (sk-...) or leave empty for free HuggingFace tier."
|
| 83 |
+
)
|
| 84 |
+
if api_key.startswith("sk-"):
|
| 85 |
+
# OpenAI BYOK
|
| 86 |
+
openai_provider = OpenAIProvider(api_key=api_key)
|
| 87 |
+
return OpenAIChatModel(settings.openai_model, provider=openai_provider)
|
| 88 |
|
| 89 |
+
# Priority 2: OpenAI from env (most common, best tool calling)
|
| 90 |
if settings.has_openai_key:
|
| 91 |
openai_provider = OpenAIProvider(api_key=settings.openai_api_key)
|
| 92 |
return OpenAIChatModel(settings.openai_model, provider=openai_provider)
|
| 93 |
|
| 94 |
+
# Priority 3: HuggingFace (free fallback)
|
| 95 |
+
# Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
|
| 96 |
+
model_name = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
|
|
|
|
| 97 |
|
| 98 |
+
# Try settings.hf_token first, then fall back to HF_TOKEN env var
|
| 99 |
+
# HuggingFaceProvider requires a token - it won't work without one
|
| 100 |
+
hf_token = settings.hf_token or os.environ.get("HF_TOKEN")
|
| 101 |
+
if hf_token:
|
| 102 |
+
hf_provider = HuggingFaceProvider(api_key=hf_token)
|
| 103 |
return HuggingFaceModel(model_name, provider=hf_provider)
|
| 104 |
|
| 105 |
+
# No HF token available - raise clear error
|
| 106 |
+
raise RuntimeError(
|
| 107 |
+
"No LLM API key available. Either:\n"
|
| 108 |
+
" 1. Set OPENAI_API_KEY for premium tier, or\n"
|
| 109 |
+
" 2. Set HF_TOKEN for free HuggingFace tier\n"
|
| 110 |
+
"Get a free HF token at: https://huggingface.co/settings/tokens"
|
| 111 |
)
|
| 112 |
|
| 113 |
|
|
|
|
| 122 |
self,
|
| 123 |
model: Any = None,
|
| 124 |
domain: ResearchDomain | str | None = None,
|
| 125 |
+
api_key: str | None = None,
|
| 126 |
) -> None:
|
| 127 |
"""
|
| 128 |
Initialize the JudgeHandler.
|
|
|
|
| 130 |
Args:
|
| 131 |
model: Optional PydanticAI model. If None, uses config default.
|
| 132 |
domain: Research domain for prompt customization.
|
| 133 |
+
api_key: Optional BYOK key (auto-detects provider from prefix).
|
| 134 |
"""
|
| 135 |
+
self.model = model or get_model(api_key=api_key)
|
| 136 |
self.domain = domain
|
| 137 |
self.agent = Agent(
|
| 138 |
model=self.model,
|
|
|
|
| 527 |
"The HuggingFace Inference API free tier limit has been reached. "
|
| 528 |
"The search results listed below were retrieved but could not be "
|
| 529 |
"analyzed by the AI. "
|
| 530 |
+
"Please try again later, or add an OpenAI API key above "
|
| 531 |
"for unlimited access."
|
| 532 |
),
|
| 533 |
)
|
|
|
|
| 563 |
f"Search found {len(evidence)} sources (listed below) but they could not "
|
| 564 |
"be analyzed by AI.\n\n"
|
| 565 |
"**Options:**\n"
|
| 566 |
+
"- Add an OpenAI API key for reliable analysis\n"
|
| 567 |
"- Try again later when HF Inference is available\n"
|
| 568 |
"- Review the raw search results below"
|
| 569 |
),
|
|
|
|
| 592 |
f"{question} clinical trials",
|
| 593 |
f"{question} drug candidates",
|
| 594 |
],
|
| 595 |
+
reasoning=f"HF Inference failed: {error}. Recommend configuring OpenAI API key.",
|
| 596 |
)
|
| 597 |
|
| 598 |
async def synthesize(self, system_prompt: str, user_prompt: str) -> str:
|
|
|
|
| 749 |
reasoning=(
|
| 750 |
f"Demo mode assessment based on {evidence_count} real search results. "
|
| 751 |
"For AI-powered analysis with drug candidate identification and "
|
| 752 |
+
"evidence synthesis, configure OPENAI_API_KEY."
|
| 753 |
),
|
| 754 |
)
|
src/agents/judge_agent_llm.py
CHANGED
|
@@ -14,8 +14,13 @@ logger = structlog.get_logger()
|
|
| 14 |
class LLMSubIterationJudge:
|
| 15 |
"""Judge that uses an LLM to assess sub-iteration results."""
|
| 16 |
|
| 17 |
-
def __init__(self) -> None:
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
self.agent = Agent(
|
| 20 |
model=self.model,
|
| 21 |
output_type=JudgeAssessment,
|
|
|
|
| 14 |
class LLMSubIterationJudge:
|
| 15 |
"""Judge that uses an LLM to assess sub-iteration results."""
|
| 16 |
|
| 17 |
+
def __init__(self, api_key: str | None = None) -> None:
|
| 18 |
+
"""Initialize the judge with optional BYOK key.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
api_key: Optional BYOK key (auto-detects provider from prefix).
|
| 22 |
+
"""
|
| 23 |
+
self.model = get_model(api_key=api_key)
|
| 24 |
self.agent = Agent(
|
| 25 |
model=self.model,
|
| 26 |
output_type=JudgeAssessment,
|
src/agents/magentic_agents.py
CHANGED
|
@@ -16,17 +16,19 @@ from src.config.domain import ResearchDomain, get_domain_config
|
|
| 16 |
def create_search_agent(
|
| 17 |
chat_client: BaseChatClient | None = None,
|
| 18 |
domain: ResearchDomain | str | None = None,
|
|
|
|
| 19 |
) -> ChatAgent:
|
| 20 |
"""Create a search agent with internal LLM and search tools.
|
| 21 |
|
| 22 |
Args:
|
| 23 |
chat_client: Optional custom chat client. If None, uses default.
|
| 24 |
domain: Research domain for customization.
|
|
|
|
| 25 |
|
| 26 |
Returns:
|
| 27 |
ChatAgent configured for biomedical search
|
| 28 |
"""
|
| 29 |
-
client = chat_client or get_chat_client()
|
| 30 |
config = get_domain_config(domain)
|
| 31 |
|
| 32 |
return ChatAgent(
|
|
@@ -54,17 +56,19 @@ related to {config.name}.""",
|
|
| 54 |
def create_judge_agent(
|
| 55 |
chat_client: BaseChatClient | None = None,
|
| 56 |
domain: ResearchDomain | str | None = None,
|
|
|
|
| 57 |
) -> ChatAgent:
|
| 58 |
"""Create a judge agent that evaluates evidence quality.
|
| 59 |
|
| 60 |
Args:
|
| 61 |
chat_client: Optional custom chat client. If None, uses default.
|
| 62 |
domain: Research domain for customization.
|
|
|
|
| 63 |
|
| 64 |
Returns:
|
| 65 |
ChatAgent configured for evidence assessment
|
| 66 |
"""
|
| 67 |
-
client = chat_client or get_chat_client()
|
| 68 |
config = get_domain_config(domain)
|
| 69 |
|
| 70 |
return ChatAgent(
|
|
@@ -110,17 +114,19 @@ Be rigorous but fair. Look for:
|
|
| 110 |
def create_hypothesis_agent(
|
| 111 |
chat_client: BaseChatClient | None = None,
|
| 112 |
domain: ResearchDomain | str | None = None,
|
|
|
|
| 113 |
) -> ChatAgent:
|
| 114 |
"""Create a hypothesis generation agent.
|
| 115 |
|
| 116 |
Args:
|
| 117 |
chat_client: Optional custom chat client. If None, uses default.
|
| 118 |
domain: Research domain for customization.
|
|
|
|
| 119 |
|
| 120 |
Returns:
|
| 121 |
ChatAgent configured for hypothesis generation
|
| 122 |
"""
|
| 123 |
-
client = chat_client or get_chat_client()
|
| 124 |
config = get_domain_config(domain)
|
| 125 |
|
| 126 |
return ChatAgent(
|
|
@@ -151,17 +157,19 @@ Focus on mechanistic plausibility and existing evidence.""",
|
|
| 151 |
def create_report_agent(
|
| 152 |
chat_client: BaseChatClient | None = None,
|
| 153 |
domain: ResearchDomain | str | None = None,
|
|
|
|
| 154 |
) -> ChatAgent:
|
| 155 |
"""Create a report synthesis agent.
|
| 156 |
|
| 157 |
Args:
|
| 158 |
chat_client: Optional custom chat client. If None, uses default.
|
| 159 |
domain: Research domain for customization.
|
|
|
|
| 160 |
|
| 161 |
Returns:
|
| 162 |
ChatAgent configured for report generation
|
| 163 |
"""
|
| 164 |
-
client = chat_client or get_chat_client()
|
| 165 |
config = get_domain_config(domain)
|
| 166 |
|
| 167 |
return ChatAgent(
|
|
|
|
| 16 |
def create_search_agent(
|
| 17 |
chat_client: BaseChatClient | None = None,
|
| 18 |
domain: ResearchDomain | str | None = None,
|
| 19 |
+
api_key: str | None = None,
|
| 20 |
) -> ChatAgent:
|
| 21 |
"""Create a search agent with internal LLM and search tools.
|
| 22 |
|
| 23 |
Args:
|
| 24 |
chat_client: Optional custom chat client. If None, uses default.
|
| 25 |
domain: Research domain for customization.
|
| 26 |
+
api_key: Optional BYOK key (auto-detects provider from prefix).
|
| 27 |
|
| 28 |
Returns:
|
| 29 |
ChatAgent configured for biomedical search
|
| 30 |
"""
|
| 31 |
+
client = chat_client or get_chat_client(api_key=api_key)
|
| 32 |
config = get_domain_config(domain)
|
| 33 |
|
| 34 |
return ChatAgent(
|
|
|
|
| 56 |
def create_judge_agent(
|
| 57 |
chat_client: BaseChatClient | None = None,
|
| 58 |
domain: ResearchDomain | str | None = None,
|
| 59 |
+
api_key: str | None = None,
|
| 60 |
) -> ChatAgent:
|
| 61 |
"""Create a judge agent that evaluates evidence quality.
|
| 62 |
|
| 63 |
Args:
|
| 64 |
chat_client: Optional custom chat client. If None, uses default.
|
| 65 |
domain: Research domain for customization.
|
| 66 |
+
api_key: Optional BYOK key (auto-detects provider from prefix).
|
| 67 |
|
| 68 |
Returns:
|
| 69 |
ChatAgent configured for evidence assessment
|
| 70 |
"""
|
| 71 |
+
client = chat_client or get_chat_client(api_key=api_key)
|
| 72 |
config = get_domain_config(domain)
|
| 73 |
|
| 74 |
return ChatAgent(
|
|
|
|
| 114 |
def create_hypothesis_agent(
|
| 115 |
chat_client: BaseChatClient | None = None,
|
| 116 |
domain: ResearchDomain | str | None = None,
|
| 117 |
+
api_key: str | None = None,
|
| 118 |
) -> ChatAgent:
|
| 119 |
"""Create a hypothesis generation agent.
|
| 120 |
|
| 121 |
Args:
|
| 122 |
chat_client: Optional custom chat client. If None, uses default.
|
| 123 |
domain: Research domain for customization.
|
| 124 |
+
api_key: Optional BYOK key (auto-detects provider from prefix).
|
| 125 |
|
| 126 |
Returns:
|
| 127 |
ChatAgent configured for hypothesis generation
|
| 128 |
"""
|
| 129 |
+
client = chat_client or get_chat_client(api_key=api_key)
|
| 130 |
config = get_domain_config(domain)
|
| 131 |
|
| 132 |
return ChatAgent(
|
|
|
|
| 157 |
def create_report_agent(
|
| 158 |
chat_client: BaseChatClient | None = None,
|
| 159 |
domain: ResearchDomain | str | None = None,
|
| 160 |
+
api_key: str | None = None,
|
| 161 |
) -> ChatAgent:
|
| 162 |
"""Create a report synthesis agent.
|
| 163 |
|
| 164 |
Args:
|
| 165 |
chat_client: Optional custom chat client. If None, uses default.
|
| 166 |
domain: Research domain for customization.
|
| 167 |
+
api_key: Optional BYOK key (auto-detects provider from prefix).
|
| 168 |
|
| 169 |
Returns:
|
| 170 |
ChatAgent configured for report generation
|
| 171 |
"""
|
| 172 |
+
client = chat_client or get_chat_client(api_key=api_key)
|
| 173 |
config = get_domain_config(domain)
|
| 174 |
|
| 175 |
return ChatAgent(
|
src/clients/factory.py
CHANGED
|
@@ -23,13 +23,14 @@ def get_chat_client(
|
|
| 23 |
|
| 24 |
Auto-detection priority:
|
| 25 |
1. Explicit provider parameter
|
| 26 |
-
2.
|
| 27 |
-
3.
|
| 28 |
-
4.
|
|
|
|
| 29 |
|
| 30 |
Args:
|
| 31 |
provider: Force specific provider ("openai", "gemini", "huggingface")
|
| 32 |
-
api_key: Override API key for the provider
|
| 33 |
model_id: Override default model ID
|
| 34 |
**kwargs: Additional arguments for the client
|
| 35 |
|
|
@@ -38,13 +39,23 @@ def get_chat_client(
|
|
| 38 |
|
| 39 |
Raises:
|
| 40 |
ValueError: If an unsupported provider is explicitly requested
|
| 41 |
-
NotImplementedError: If Gemini is
|
| 42 |
"""
|
| 43 |
# Normalize provider to lowercase for case-insensitive matching
|
| 44 |
normalized = provider.lower() if provider is not None else None
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
# Validate explicit provider requests early
|
| 47 |
-
valid_providers = (None, "openai", "gemini", "huggingface")
|
| 48 |
if normalized not in valid_providers:
|
| 49 |
raise ValueError(f"Unsupported provider: {provider!r}")
|
| 50 |
|
|
@@ -57,7 +68,15 @@ def get_chat_client(
|
|
| 57 |
**kwargs,
|
| 58 |
)
|
| 59 |
|
| 60 |
-
# 2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
if normalized == "gemini":
|
| 62 |
# Explicit request for Gemini - fail loudly
|
| 63 |
raise NotImplementedError("Gemini client not yet implemented (Planned Phase 4)")
|
|
@@ -66,7 +85,7 @@ def get_chat_client(
|
|
| 66 |
# Implicit (has key but not explicit) - log warning and fall through
|
| 67 |
logger.warning("Gemini key detected but client not yet implemented; falling back")
|
| 68 |
|
| 69 |
-
#
|
| 70 |
# This is the default if no other keys are present
|
| 71 |
logger.info("Using HuggingFace Chat Client (Free Tier)")
|
| 72 |
return HuggingFaceChatClient(
|
|
|
|
| 23 |
|
| 24 |
Auto-detection priority:
|
| 25 |
1. Explicit provider parameter
|
| 26 |
+
2. API key prefix detection (sk- → OpenAI, sk-ant- → Anthropic)
|
| 27 |
+
3. OpenAI key from env (Best Function Calling)
|
| 28 |
+
4. Gemini key from env (Best Context/Cost)
|
| 29 |
+
5. HuggingFace (Free Fallback)
|
| 30 |
|
| 31 |
Args:
|
| 32 |
provider: Force specific provider ("openai", "gemini", "huggingface")
|
| 33 |
+
api_key: Override API key for the provider (auto-detects provider from prefix)
|
| 34 |
model_id: Override default model ID
|
| 35 |
**kwargs: Additional arguments for the client
|
| 36 |
|
|
|
|
| 39 |
|
| 40 |
Raises:
|
| 41 |
ValueError: If an unsupported provider is explicitly requested
|
| 42 |
+
NotImplementedError: If Gemini or Anthropic is requested (not yet implemented)
|
| 43 |
"""
|
| 44 |
# Normalize provider to lowercase for case-insensitive matching
|
| 45 |
normalized = provider.lower() if provider is not None else None
|
| 46 |
|
| 47 |
+
# FIX: Auto-detect provider from API key prefix when not explicitly set
|
| 48 |
+
# This enables BYOK (Bring Your Own Key) from Gradio without explicit provider
|
| 49 |
+
# Order matters: "sk-ant-" must be checked before "sk-" (both start with "sk-")
|
| 50 |
+
if normalized is None and api_key:
|
| 51 |
+
if api_key.startswith("sk-ant-"):
|
| 52 |
+
normalized = "anthropic"
|
| 53 |
+
elif api_key.startswith("sk-"):
|
| 54 |
+
normalized = "openai"
|
| 55 |
+
# HF tokens start with "hf_" - no auto-detection needed (falls through to default)
|
| 56 |
+
|
| 57 |
# Validate explicit provider requests early
|
| 58 |
+
valid_providers = (None, "openai", "anthropic", "gemini", "huggingface")
|
| 59 |
if normalized not in valid_providers:
|
| 60 |
raise ValueError(f"Unsupported provider: {provider!r}")
|
| 61 |
|
|
|
|
| 68 |
**kwargs,
|
| 69 |
)
|
| 70 |
|
| 71 |
+
# 2. Anthropic (Detected from sk-ant- prefix or explicit)
|
| 72 |
+
if normalized == "anthropic":
|
| 73 |
+
# Anthropic key was detected or explicitly requested - fail loudly
|
| 74 |
+
raise NotImplementedError(
|
| 75 |
+
"Anthropic client not yet implemented. "
|
| 76 |
+
"Use OpenAI key (sk-...) or leave empty for free HuggingFace tier."
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# 3. Gemini (High Performance / Alternative)
|
| 80 |
if normalized == "gemini":
|
| 81 |
# Explicit request for Gemini - fail loudly
|
| 82 |
raise NotImplementedError("Gemini client not yet implemented (Planned Phase 4)")
|
|
|
|
| 85 |
# Implicit (has key but not explicit) - log warning and fall through
|
| 86 |
logger.warning("Gemini key detected but client not yet implemented; falling back")
|
| 87 |
|
| 88 |
+
# 4. HuggingFace (Free Fallback)
|
| 89 |
# This is the default if no other keys are present
|
| 90 |
logger.info("Using HuggingFace Chat Client (Free Tier)")
|
| 91 |
return HuggingFaceChatClient(
|
src/clients/huggingface.py
CHANGED
|
@@ -51,12 +51,13 @@ class HuggingFaceChatClient(BaseChatClient): # type: ignore[misc]
|
|
| 51 |
"""Initialize the HuggingFace chat client.
|
| 52 |
|
| 53 |
Args:
|
| 54 |
-
model_id: The HuggingFace model ID (default: configured value or Qwen2.5-
|
| 55 |
api_key: HF_TOKEN (optional, defaults to env var).
|
| 56 |
**kwargs: Additional arguments passed to BaseChatClient.
|
| 57 |
"""
|
| 58 |
super().__init__(**kwargs)
|
| 59 |
-
|
|
|
|
| 60 |
self.api_key = api_key or settings.hf_token
|
| 61 |
|
| 62 |
# Initialize the HF Inference Client
|
|
|
|
| 51 |
"""Initialize the HuggingFace chat client.
|
| 52 |
|
| 53 |
Args:
|
| 54 |
+
model_id: The HuggingFace model ID (default: configured value or Qwen2.5-7B).
|
| 55 |
api_key: HF_TOKEN (optional, defaults to env var).
|
| 56 |
**kwargs: Additional arguments passed to BaseChatClient.
|
| 57 |
"""
|
| 58 |
super().__init__(**kwargs)
|
| 59 |
+
# FIX: Use 7B model to stay on HuggingFace native infrastructure (avoid Novita 500s)
|
| 60 |
+
self.model_id = model_id or settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
|
| 61 |
self.api_key = api_key or settings.hf_token
|
| 62 |
|
| 63 |
# Initialize the HF Inference Client
|
src/orchestrators/advanced.py
CHANGED
|
@@ -99,6 +99,9 @@ class AdvancedOrchestrator(OrchestratorProtocol):
|
|
| 99 |
api_key=api_key,
|
| 100 |
)
|
| 101 |
|
|
|
|
|
|
|
|
|
|
| 102 |
# Event stream for UI updates
|
| 103 |
self._events: list[AgentEvent] = []
|
| 104 |
|
|
@@ -116,7 +119,7 @@ class AdvancedOrchestrator(OrchestratorProtocol):
|
|
| 116 |
|
| 117 |
def _init_embedding_service(self) -> "EmbeddingServiceProtocol | None":
|
| 118 |
"""Initialize embedding service if available."""
|
| 119 |
-
return get_embedding_service_if_available()
|
| 120 |
|
| 121 |
def _build_workflow(self) -> Any:
|
| 122 |
"""Build the workflow with ChatAgent participants."""
|
|
|
|
| 99 |
api_key=api_key,
|
| 100 |
)
|
| 101 |
|
| 102 |
+
# Store API key for service initialization
|
| 103 |
+
self._api_key = api_key
|
| 104 |
+
|
| 105 |
# Event stream for UI updates
|
| 106 |
self._events: list[AgentEvent] = []
|
| 107 |
|
|
|
|
| 119 |
|
| 120 |
def _init_embedding_service(self) -> "EmbeddingServiceProtocol | None":
|
| 121 |
"""Initialize embedding service if available."""
|
| 122 |
+
return get_embedding_service_if_available(api_key=self._api_key)
|
| 123 |
|
| 124 |
def _build_workflow(self) -> Any:
|
| 125 |
"""Build the workflow with ChatAgent participants."""
|
src/orchestrators/factory.py
CHANGED
|
@@ -61,7 +61,7 @@ def create_orchestrator(
|
|
| 61 |
if effective_mode == "hierarchical":
|
| 62 |
from src.orchestrators.hierarchical import HierarchicalOrchestrator
|
| 63 |
|
| 64 |
-
return HierarchicalOrchestrator(config=effective_config, domain=domain)
|
| 65 |
|
| 66 |
# Default: Advanced Mode (Unified)
|
| 67 |
# Handles both Paid (OpenAI) and Free (HuggingFace) tiers
|
|
|
|
| 61 |
if effective_mode == "hierarchical":
|
| 62 |
from src.orchestrators.hierarchical import HierarchicalOrchestrator
|
| 63 |
|
| 64 |
+
return HierarchicalOrchestrator(config=effective_config, domain=domain, api_key=api_key)
|
| 65 |
|
| 66 |
# Default: Advanced Mode (Unified)
|
| 67 |
# Handles both Paid (OpenAI) and Free (HuggingFace) tiers
|
src/orchestrators/hierarchical.py
CHANGED
|
@@ -38,8 +38,12 @@ class ResearchTeam(SubIterationTeam):
|
|
| 38 |
sub-iteration middleware framework.
|
| 39 |
"""
|
| 40 |
|
| 41 |
-
def __init__(
|
| 42 |
-
self
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
async def execute(self, task: str) -> str:
|
| 45 |
"""Execute a research task.
|
|
@@ -73,6 +77,7 @@ class HierarchicalOrchestrator(OrchestratorProtocol):
|
|
| 73 |
config: OrchestratorConfig | None = None,
|
| 74 |
timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS,
|
| 75 |
domain: ResearchDomain | str | None = None,
|
|
|
|
| 76 |
) -> None:
|
| 77 |
"""Initialize the hierarchical orchestrator.
|
| 78 |
|
|
@@ -80,12 +85,14 @@ class HierarchicalOrchestrator(OrchestratorProtocol):
|
|
| 80 |
config: Optional configuration (uses defaults if not provided)
|
| 81 |
timeout_seconds: Maximum workflow duration (default: 5 minutes)
|
| 82 |
domain: Research domain for customization
|
|
|
|
| 83 |
"""
|
| 84 |
self.config = config or OrchestratorConfig()
|
| 85 |
self._timeout_seconds = timeout_seconds
|
| 86 |
self.domain = domain
|
| 87 |
-
self.
|
| 88 |
-
self.
|
|
|
|
| 89 |
self.middleware = SubIterationMiddleware(
|
| 90 |
self.team, self.judge, max_iterations=self.config.max_iterations
|
| 91 |
)
|
|
@@ -101,7 +108,7 @@ class HierarchicalOrchestrator(OrchestratorProtocol):
|
|
| 101 |
"""
|
| 102 |
logger.info("Starting hierarchical orchestrator", query=query)
|
| 103 |
|
| 104 |
-
service = get_embedding_service_if_available()
|
| 105 |
init_magentic_state(query, service)
|
| 106 |
|
| 107 |
yield AgentEvent(type="started", message=f"Starting research: {query}")
|
|
|
|
| 38 |
sub-iteration middleware framework.
|
| 39 |
"""
|
| 40 |
|
| 41 |
+
def __init__(
|
| 42 |
+
self,
|
| 43 |
+
domain: ResearchDomain | str | None = None,
|
| 44 |
+
api_key: str | None = None,
|
| 45 |
+
) -> None:
|
| 46 |
+
self.agent = create_search_agent(domain=domain, api_key=api_key)
|
| 47 |
|
| 48 |
async def execute(self, task: str) -> str:
|
| 49 |
"""Execute a research task.
|
|
|
|
| 77 |
config: OrchestratorConfig | None = None,
|
| 78 |
timeout_seconds: float = DEFAULT_TIMEOUT_SECONDS,
|
| 79 |
domain: ResearchDomain | str | None = None,
|
| 80 |
+
api_key: str | None = None,
|
| 81 |
) -> None:
|
| 82 |
"""Initialize the hierarchical orchestrator.
|
| 83 |
|
|
|
|
| 85 |
config: Optional configuration (uses defaults if not provided)
|
| 86 |
timeout_seconds: Maximum workflow duration (default: 5 minutes)
|
| 87 |
domain: Research domain for customization
|
| 88 |
+
api_key: Optional BYOK key (auto-detects provider from prefix)
|
| 89 |
"""
|
| 90 |
self.config = config or OrchestratorConfig()
|
| 91 |
self._timeout_seconds = timeout_seconds
|
| 92 |
self.domain = domain
|
| 93 |
+
self._api_key = api_key
|
| 94 |
+
self.team = ResearchTeam(domain=domain, api_key=api_key)
|
| 95 |
+
self.judge = LLMSubIterationJudge(api_key=api_key)
|
| 96 |
self.middleware = SubIterationMiddleware(
|
| 97 |
self.team, self.judge, max_iterations=self.config.max_iterations
|
| 98 |
)
|
|
|
|
| 108 |
"""
|
| 109 |
logger.info("Starting hierarchical orchestrator", query=query)
|
| 110 |
|
| 111 |
+
service = get_embedding_service_if_available(api_key=self._api_key)
|
| 112 |
init_magentic_state(query, service)
|
| 113 |
|
| 114 |
yield AgentEvent(type="started", message=f"Starting research: {query}")
|
src/orchestrators/langgraph_orchestrator.py
CHANGED
|
@@ -32,18 +32,33 @@ class LangGraphOrchestrator(OrchestratorProtocol):
|
|
| 32 |
self,
|
| 33 |
max_iterations: int = 10,
|
| 34 |
checkpoint_path: str | None = None,
|
|
|
|
| 35 |
):
|
| 36 |
self._max_iterations = max_iterations
|
| 37 |
self._checkpoint_path = checkpoint_path
|
|
|
|
| 38 |
|
| 39 |
# Initialize the LLM (Qwen 2.5 via HF Inference)
|
| 40 |
# We use the serverless API by default
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
raise ValueError(
|
| 48 |
"HF_TOKEN (Hugging Face API Token) is required for LangGraph orchestrator."
|
| 49 |
)
|
|
@@ -53,7 +68,7 @@ class LangGraphOrchestrator(OrchestratorProtocol):
|
|
| 53 |
task="text-generation",
|
| 54 |
max_new_tokens=1024,
|
| 55 |
temperature=0.1,
|
| 56 |
-
huggingfacehub_api_token=
|
| 57 |
)
|
| 58 |
self.chat_model = ChatHuggingFace(llm=self.llm_endpoint)
|
| 59 |
|
|
@@ -61,7 +76,7 @@ class LangGraphOrchestrator(OrchestratorProtocol):
|
|
| 61 |
"""Execute research workflow with structured state."""
|
| 62 |
# Initialize embedding service using tiered selection (service_loader)
|
| 63 |
# Returns LlamaIndexRAGService if OpenAI key available, else local EmbeddingService
|
| 64 |
-
embedding_service = get_embedding_service()
|
| 65 |
|
| 66 |
# Setup checkpointer (SQLite for dev)
|
| 67 |
if self._checkpoint_path:
|
|
|
|
| 32 |
self,
|
| 33 |
max_iterations: int = 10,
|
| 34 |
checkpoint_path: str | None = None,
|
| 35 |
+
api_key: str | None = None,
|
| 36 |
):
|
| 37 |
self._max_iterations = max_iterations
|
| 38 |
self._checkpoint_path = checkpoint_path
|
| 39 |
+
self._api_key = api_key
|
| 40 |
|
| 41 |
# Initialize the LLM (Qwen 2.5 via HF Inference)
|
| 42 |
# We use the serverless API by default
|
| 43 |
+
# FIX: Use 7B model to stay on HuggingFace native infrastructure
|
| 44 |
+
# Large models (70B+) route to Novita/Hyperbolic providers (500/401 errors)
|
| 45 |
+
repo_id = settings.huggingface_model or "Qwen/Qwen2.5-7B-Instruct"
|
| 46 |
+
|
| 47 |
+
# Determine HF Token (BYOK > Env)
|
| 48 |
+
# Note: If api_key starts with 'sk-', it's likely OpenAI, which isn't supported here
|
| 49 |
+
# for the LLM, but we store it for the embedding service.
|
| 50 |
+
hf_token = settings.hf_token
|
| 51 |
+
if api_key and not api_key.startswith("sk-"):
|
| 52 |
+
hf_token = api_key
|
| 53 |
+
|
| 54 |
+
if not hf_token:
|
| 55 |
+
# If we have an OpenAI key but no HF token, we can't run the HF LLM
|
| 56 |
+
if api_key and api_key.startswith("sk-"):
|
| 57 |
+
raise ValueError(
|
| 58 |
+
"LangGraphOrchestrator currently requires a Hugging Face token (HF_TOKEN) "
|
| 59 |
+
"for the LLM, even if using OpenAI for embeddings. "
|
| 60 |
+
"Please use Advanced Mode for OpenAI support."
|
| 61 |
+
)
|
| 62 |
raise ValueError(
|
| 63 |
"HF_TOKEN (Hugging Face API Token) is required for LangGraph orchestrator."
|
| 64 |
)
|
|
|
|
| 68 |
task="text-generation",
|
| 69 |
max_new_tokens=1024,
|
| 70 |
temperature=0.1,
|
| 71 |
+
huggingfacehub_api_token=hf_token,
|
| 72 |
)
|
| 73 |
self.chat_model = ChatHuggingFace(llm=self.llm_endpoint)
|
| 74 |
|
|
|
|
| 76 |
"""Execute research workflow with structured state."""
|
| 77 |
# Initialize embedding service using tiered selection (service_loader)
|
| 78 |
# Returns LlamaIndexRAGService if OpenAI key available, else local EmbeddingService
|
| 79 |
+
embedding_service = get_embedding_service(api_key=self._api_key)
|
| 80 |
|
| 81 |
# Setup checkpointer (SQLite for dev)
|
| 82 |
if self._checkpoint_path:
|
src/services/llamaindex_rag.py
CHANGED
|
@@ -42,16 +42,17 @@ class LlamaIndexRAGService:
|
|
| 42 |
persist_dir: str | None = None,
|
| 43 |
embedding_model: str | None = None,
|
| 44 |
similarity_top_k: int = 5,
|
|
|
|
| 45 |
) -> None:
|
| 46 |
"""
|
| 47 |
Initialize LlamaIndex RAG service.
|
| 48 |
|
| 49 |
Args:
|
| 50 |
-
collection_name: Name of the ChromaDB collection
|
| 51 |
-
"deepcritical_evidence" to "deepboner_evidence" in v1.0 rebrand)
|
| 52 |
persist_dir: Directory to persist ChromaDB data
|
| 53 |
embedding_model: OpenAI embedding model (defaults to settings.openai_embedding_model)
|
| 54 |
similarity_top_k: Number of top results to retrieve
|
|
|
|
| 55 |
"""
|
| 56 |
# Lazy import - only when instantiated
|
| 57 |
try:
|
|
@@ -80,18 +81,36 @@ class LlamaIndexRAGService:
|
|
| 80 |
self.similarity_top_k = similarity_top_k
|
| 81 |
self.embedding_model = embedding_model or settings.openai_embedding_model
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
# Validate API key before use
|
| 84 |
-
if not
|
| 85 |
raise ConfigurationError("OPENAI_API_KEY required for LlamaIndex RAG service")
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
# Configure LlamaIndex settings (use centralized config)
|
| 88 |
self._Settings.llm = OpenAI(
|
| 89 |
model=settings.openai_model,
|
| 90 |
-
api_key=
|
| 91 |
)
|
| 92 |
self._Settings.embed_model = OpenAIEmbedding(
|
| 93 |
model=self.embedding_model,
|
| 94 |
-
api_key=
|
| 95 |
)
|
| 96 |
|
| 97 |
# Initialize ChromaDB client
|
|
@@ -428,6 +447,7 @@ class LlamaIndexRAGService:
|
|
| 428 |
|
| 429 |
def get_rag_service(
|
| 430 |
collection_name: str = "deepboner_evidence",
|
|
|
|
| 431 |
**kwargs: Any,
|
| 432 |
) -> LlamaIndexRAGService:
|
| 433 |
"""
|
|
@@ -435,9 +455,10 @@ def get_rag_service(
|
|
| 435 |
|
| 436 |
Args:
|
| 437 |
collection_name: Name of the ChromaDB collection
|
|
|
|
| 438 |
**kwargs: Additional arguments for LlamaIndexRAGService
|
| 439 |
|
| 440 |
Returns:
|
| 441 |
Configured LlamaIndexRAGService instance
|
| 442 |
"""
|
| 443 |
-
return LlamaIndexRAGService(collection_name=collection_name, **kwargs)
|
|
|
|
| 42 |
persist_dir: str | None = None,
|
| 43 |
embedding_model: str | None = None,
|
| 44 |
similarity_top_k: int = 5,
|
| 45 |
+
api_key: str | None = None,
|
| 46 |
) -> None:
|
| 47 |
"""
|
| 48 |
Initialize LlamaIndex RAG service.
|
| 49 |
|
| 50 |
Args:
|
| 51 |
+
collection_name: Name of the ChromaDB collection
|
|
|
|
| 52 |
persist_dir: Directory to persist ChromaDB data
|
| 53 |
embedding_model: OpenAI embedding model (defaults to settings.openai_embedding_model)
|
| 54 |
similarity_top_k: Number of top results to retrieve
|
| 55 |
+
api_key: Optional BYOK OpenAI key. Prioritized over env var.
|
| 56 |
"""
|
| 57 |
# Lazy import - only when instantiated
|
| 58 |
try:
|
|
|
|
| 81 |
self.similarity_top_k = similarity_top_k
|
| 82 |
self.embedding_model = embedding_model or settings.openai_embedding_model
|
| 83 |
|
| 84 |
+
# Determine API key (BYOK > Env Var)
|
| 85 |
+
self.api_key = api_key
|
| 86 |
+
if not self.api_key and settings.has_openai_key:
|
| 87 |
+
self.api_key = settings.openai_api_key
|
| 88 |
+
|
| 89 |
# Validate API key before use
|
| 90 |
+
if not self.api_key:
|
| 91 |
raise ConfigurationError("OPENAI_API_KEY required for LlamaIndex RAG service")
|
| 92 |
|
| 93 |
+
# Defense-in-depth: Validate key prefix to prevent cryptic auth errors
|
| 94 |
+
# Note: Anthropic keys start with sk-ant-, which would pass startswith("sk-")
|
| 95 |
+
if self.api_key.startswith("sk-ant-"):
|
| 96 |
+
raise ConfigurationError(
|
| 97 |
+
"Anthropic keys (sk-ant-...) are not supported for embeddings. "
|
| 98 |
+
"LlamaIndex RAG requires an OpenAI API key (sk-...)."
|
| 99 |
+
)
|
| 100 |
+
if not self.api_key.startswith("sk-"):
|
| 101 |
+
raise ConfigurationError(
|
| 102 |
+
f"Invalid API key format. Expected OpenAI key starting with 'sk-', "
|
| 103 |
+
f"got key starting with '{self.api_key[:8]}...'."
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
# Configure LlamaIndex settings (use centralized config)
|
| 107 |
self._Settings.llm = OpenAI(
|
| 108 |
model=settings.openai_model,
|
| 109 |
+
api_key=self.api_key,
|
| 110 |
)
|
| 111 |
self._Settings.embed_model = OpenAIEmbedding(
|
| 112 |
model=self.embedding_model,
|
| 113 |
+
api_key=self.api_key,
|
| 114 |
)
|
| 115 |
|
| 116 |
# Initialize ChromaDB client
|
|
|
|
| 447 |
|
| 448 |
def get_rag_service(
|
| 449 |
collection_name: str = "deepboner_evidence",
|
| 450 |
+
api_key: str | None = None,
|
| 451 |
**kwargs: Any,
|
| 452 |
) -> LlamaIndexRAGService:
|
| 453 |
"""
|
|
|
|
| 455 |
|
| 456 |
Args:
|
| 457 |
collection_name: Name of the ChromaDB collection
|
| 458 |
+
api_key: Optional BYOK OpenAI key
|
| 459 |
**kwargs: Additional arguments for LlamaIndexRAGService
|
| 460 |
|
| 461 |
Returns:
|
| 462 |
Configured LlamaIndexRAGService instance
|
| 463 |
"""
|
| 464 |
+
return LlamaIndexRAGService(collection_name=collection_name, api_key=api_key, **kwargs)
|
src/utils/llm_factory.py
CHANGED
|
@@ -26,9 +26,7 @@ def get_pydantic_ai_model() -> Any:
|
|
| 26 |
Get the appropriate model for pydantic-ai based on configuration.
|
| 27 |
Used by legacy Simple Mode components.
|
| 28 |
"""
|
| 29 |
-
from pydantic_ai.models.anthropic import AnthropicModel
|
| 30 |
from pydantic_ai.models.openai import OpenAIChatModel
|
| 31 |
-
from pydantic_ai.providers.anthropic import AnthropicProvider
|
| 32 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 33 |
|
| 34 |
# Normalize provider for case-insensitive matching
|
|
@@ -41,10 +39,7 @@ def get_pydantic_ai_model() -> Any:
|
|
| 41 |
return OpenAIChatModel(settings.openai_model, provider=provider)
|
| 42 |
|
| 43 |
if provider_lower == "anthropic":
|
| 44 |
-
|
| 45 |
-
raise ConfigurationError("ANTHROPIC_API_KEY not set for pydantic-ai")
|
| 46 |
-
anthropic_provider = AnthropicProvider(api_key=settings.anthropic_api_key)
|
| 47 |
-
return AnthropicModel(settings.anthropic_model, provider=anthropic_provider)
|
| 48 |
|
| 49 |
raise ConfigurationError(f"Unknown LLM provider for simple mode: {settings.llm_provider}")
|
| 50 |
|
|
|
|
| 26 |
Get the appropriate model for pydantic-ai based on configuration.
|
| 27 |
Used by legacy Simple Mode components.
|
| 28 |
"""
|
|
|
|
| 29 |
from pydantic_ai.models.openai import OpenAIChatModel
|
|
|
|
| 30 |
from pydantic_ai.providers.openai import OpenAIProvider
|
| 31 |
|
| 32 |
# Normalize provider for case-insensitive matching
|
|
|
|
| 39 |
return OpenAIChatModel(settings.openai_model, provider=provider)
|
| 40 |
|
| 41 |
if provider_lower == "anthropic":
|
| 42 |
+
raise ConfigurationError("Anthropic is not supported (no embeddings API). See P3 doc.")
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
raise ConfigurationError(f"Unknown LLM provider for simple mode: {settings.llm_provider}")
|
| 45 |
|
src/utils/service_loader.py
CHANGED
|
@@ -45,7 +45,7 @@ def warmup_services() -> None:
|
|
| 45 |
thread.start()
|
| 46 |
|
| 47 |
|
| 48 |
-
def get_embedding_service() -> "EmbeddingServiceProtocol":
|
| 49 |
"""Get the best available embedding service.
|
| 50 |
|
| 51 |
Strategy selection (ordered by preference):
|
|
@@ -56,31 +56,41 @@ def get_embedding_service() -> "EmbeddingServiceProtocol":
|
|
| 56 |
- Factory Method: Creates service instance
|
| 57 |
- Strategy Pattern: Selects between implementations at runtime
|
| 58 |
|
|
|
|
|
|
|
|
|
|
| 59 |
Returns:
|
| 60 |
EmbeddingServiceProtocol: Either LlamaIndexRAGService or EmbeddingService
|
| 61 |
|
| 62 |
Raises:
|
| 63 |
ImportError: If no embedding service dependencies are available
|
| 64 |
-
|
| 65 |
-
Example:
|
| 66 |
-
```python
|
| 67 |
-
service = get_embedding_service()
|
| 68 |
-
await service.add_evidence("id", "content", {"source": "pubmed"})
|
| 69 |
-
results = await service.search_similar("query", n_results=5)
|
| 70 |
-
unique = await service.deduplicate(evidence_list)
|
| 71 |
-
```
|
| 72 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# Try premium tier first (OpenAI + persistence)
|
| 74 |
-
if
|
| 75 |
try:
|
| 76 |
from src.services.llamaindex_rag import get_rag_service
|
| 77 |
|
| 78 |
-
service
|
|
|
|
| 79 |
logger.info(
|
| 80 |
"Using LlamaIndex RAG service",
|
| 81 |
tier="premium",
|
| 82 |
persistence="enabled",
|
| 83 |
embeddings="openai",
|
|
|
|
| 84 |
)
|
| 85 |
return service
|
| 86 |
except ImportError as e:
|
|
@@ -119,17 +129,22 @@ def get_embedding_service() -> "EmbeddingServiceProtocol":
|
|
| 119 |
) from e
|
| 120 |
|
| 121 |
|
| 122 |
-
def get_embedding_service_if_available(
|
|
|
|
|
|
|
| 123 |
"""Safely attempt to load and initialize an embedding service.
|
| 124 |
|
| 125 |
Unlike get_embedding_service(), this function returns None instead of
|
| 126 |
raising ImportError when no service is available.
|
| 127 |
|
|
|
|
|
|
|
|
|
|
| 128 |
Returns:
|
| 129 |
EmbeddingServiceProtocol instance if dependencies are met, else None.
|
| 130 |
"""
|
| 131 |
try:
|
| 132 |
-
return get_embedding_service()
|
| 133 |
except ImportError as e:
|
| 134 |
logger.info(
|
| 135 |
"Embedding service not available (optional dependencies missing)",
|
|
|
|
| 45 |
thread.start()
|
| 46 |
|
| 47 |
|
| 48 |
+
def get_embedding_service(api_key: str | None = None) -> "EmbeddingServiceProtocol":
|
| 49 |
"""Get the best available embedding service.
|
| 50 |
|
| 51 |
Strategy selection (ordered by preference):
|
|
|
|
| 56 |
- Factory Method: Creates service instance
|
| 57 |
- Strategy Pattern: Selects between implementations at runtime
|
| 58 |
|
| 59 |
+
Args:
|
| 60 |
+
api_key: Optional BYOK key. If starts with 'sk-', enables Premium tier.
|
| 61 |
+
|
| 62 |
Returns:
|
| 63 |
EmbeddingServiceProtocol: Either LlamaIndexRAGService or EmbeddingService
|
| 64 |
|
| 65 |
Raises:
|
| 66 |
ImportError: If no embedding service dependencies are available
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
"""
|
| 68 |
+
# Determine if we have a valid OpenAI key (BYOK or Env)
|
| 69 |
+
# Note: Must check sk-ant- BEFORE sk- since Anthropic keys start with sk-ant-
|
| 70 |
+
has_openai = False
|
| 71 |
+
if api_key:
|
| 72 |
+
if api_key.startswith("sk-ant-"):
|
| 73 |
+
# Anthropic key - not supported for embeddings
|
| 74 |
+
logger.warning("Anthropic keys don't support embeddings, falling back to free tier")
|
| 75 |
+
elif api_key.startswith("sk-"):
|
| 76 |
+
# OpenAI BYOK
|
| 77 |
+
has_openai = True
|
| 78 |
+
elif settings.has_openai_key:
|
| 79 |
+
has_openai = True
|
| 80 |
+
|
| 81 |
# Try premium tier first (OpenAI + persistence)
|
| 82 |
+
if has_openai:
|
| 83 |
try:
|
| 84 |
from src.services.llamaindex_rag import get_rag_service
|
| 85 |
|
| 86 |
+
# Pass api_key to service (it handles precedence: api_key > env)
|
| 87 |
+
service = get_rag_service(api_key=api_key)
|
| 88 |
logger.info(
|
| 89 |
"Using LlamaIndex RAG service",
|
| 90 |
tier="premium",
|
| 91 |
persistence="enabled",
|
| 92 |
embeddings="openai",
|
| 93 |
+
byok=bool(api_key),
|
| 94 |
)
|
| 95 |
return service
|
| 96 |
except ImportError as e:
|
|
|
|
| 129 |
) from e
|
| 130 |
|
| 131 |
|
| 132 |
+
def get_embedding_service_if_available(
|
| 133 |
+
api_key: str | None = None,
|
| 134 |
+
) -> "EmbeddingServiceProtocol | None":
|
| 135 |
"""Safely attempt to load and initialize an embedding service.
|
| 136 |
|
| 137 |
Unlike get_embedding_service(), this function returns None instead of
|
| 138 |
raising ImportError when no service is available.
|
| 139 |
|
| 140 |
+
Args:
|
| 141 |
+
api_key: Optional BYOK key to pass to service factory.
|
| 142 |
+
|
| 143 |
Returns:
|
| 144 |
EmbeddingServiceProtocol instance if dependencies are met, else None.
|
| 145 |
"""
|
| 146 |
try:
|
| 147 |
+
return get_embedding_service(api_key=api_key)
|
| 148 |
except ImportError as e:
|
| 149 |
logger.info(
|
| 150 |
"Embedding service not available (optional dependencies missing)",
|
tests/unit/agent_factory/test_get_model_auto_detect.py
CHANGED
|
@@ -1,59 +1,73 @@
|
|
| 1 |
import pytest
|
| 2 |
-
from pydantic_ai.models.anthropic import AnthropicModel
|
| 3 |
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 4 |
from pydantic_ai.models.openai import OpenAIChatModel
|
| 5 |
|
| 6 |
from src.agent_factory.judges import get_model
|
| 7 |
from src.utils.config import settings
|
| 8 |
-
from src.utils.exceptions import ConfigurationError
|
| 9 |
|
| 10 |
|
| 11 |
class TestGetModelAutoDetect:
|
| 12 |
-
"""Test that get_model() auto-detects available providers.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
def test_returns_openai_when_key_present(self, monkeypatch):
|
| 15 |
"""OpenAI key present → OpenAI model."""
|
| 16 |
# Mock the settings properties (settings is a singleton)
|
| 17 |
monkeypatch.setattr(settings, "openai_api_key", "sk-test")
|
| 18 |
-
monkeypatch.setattr(settings, "anthropic_api_key", None)
|
| 19 |
monkeypatch.setattr(settings, "hf_token", None)
|
| 20 |
|
| 21 |
model = get_model()
|
| 22 |
assert isinstance(model, OpenAIChatModel)
|
| 23 |
|
| 24 |
-
def
|
| 25 |
-
"""
|
| 26 |
monkeypatch.setattr(settings, "openai_api_key", None)
|
| 27 |
-
monkeypatch.setattr(settings, "anthropic_api_key", "sk-ant-test")
|
| 28 |
monkeypatch.setattr(settings, "hf_token", None)
|
| 29 |
|
| 30 |
-
model = get_model()
|
| 31 |
-
assert isinstance(model,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def test_returns_huggingface_when_hf_token_present(self, monkeypatch):
|
| 34 |
"""HF_TOKEN present (no paid keys) → HuggingFace model."""
|
| 35 |
monkeypatch.setattr(settings, "openai_api_key", None)
|
| 36 |
-
monkeypatch.setattr(settings, "anthropic_api_key", None)
|
| 37 |
monkeypatch.setattr(settings, "hf_token", "hf_test_token")
|
| 38 |
|
| 39 |
model = get_model()
|
| 40 |
assert isinstance(model, HuggingFaceModel)
|
| 41 |
|
| 42 |
-
def
|
| 43 |
-
"""No keys at all →
|
| 44 |
monkeypatch.setattr(settings, "openai_api_key", None)
|
| 45 |
-
monkeypatch.setattr(settings, "anthropic_api_key", None)
|
| 46 |
monkeypatch.setattr(settings, "hf_token", None)
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
def
|
| 54 |
-
"""
|
| 55 |
monkeypatch.setattr(settings, "openai_api_key", "sk-test")
|
| 56 |
-
monkeypatch.setattr(settings, "
|
| 57 |
|
| 58 |
model = get_model()
|
| 59 |
assert isinstance(model, OpenAIChatModel)
|
|
|
|
| 1 |
import pytest
|
|
|
|
| 2 |
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 3 |
from pydantic_ai.models.openai import OpenAIChatModel
|
| 4 |
|
| 5 |
from src.agent_factory.judges import get_model
|
| 6 |
from src.utils.config import settings
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class TestGetModelAutoDetect:
|
| 10 |
+
"""Test that get_model() auto-detects available providers.
|
| 11 |
+
|
| 12 |
+
NOTE: Anthropic is NOT supported (no embeddings API).
|
| 13 |
+
See P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md.
|
| 14 |
+
"""
|
| 15 |
|
| 16 |
def test_returns_openai_when_key_present(self, monkeypatch):
|
| 17 |
"""OpenAI key present → OpenAI model."""
|
| 18 |
# Mock the settings properties (settings is a singleton)
|
| 19 |
monkeypatch.setattr(settings, "openai_api_key", "sk-test")
|
|
|
|
| 20 |
monkeypatch.setattr(settings, "hf_token", None)
|
| 21 |
|
| 22 |
model = get_model()
|
| 23 |
assert isinstance(model, OpenAIChatModel)
|
| 24 |
|
| 25 |
+
def test_byok_openai_key_returns_openai_model(self, monkeypatch):
|
| 26 |
+
"""BYOK: api_key='sk-...' → OpenAI model (regardless of env vars)."""
|
| 27 |
monkeypatch.setattr(settings, "openai_api_key", None)
|
|
|
|
| 28 |
monkeypatch.setattr(settings, "hf_token", None)
|
| 29 |
|
| 30 |
+
model = get_model(api_key="sk-byok-test-key")
|
| 31 |
+
assert isinstance(model, OpenAIChatModel)
|
| 32 |
+
|
| 33 |
+
def test_byok_anthropic_key_raises_not_implemented(self, monkeypatch):
|
| 34 |
+
"""BYOK: api_key='sk-ant-...' → NotImplementedError (Anthropic not supported)."""
|
| 35 |
+
monkeypatch.setattr(settings, "openai_api_key", None)
|
| 36 |
+
monkeypatch.setattr(settings, "hf_token", None)
|
| 37 |
+
|
| 38 |
+
with pytest.raises(NotImplementedError) as exc_info:
|
| 39 |
+
get_model(api_key="sk-ant-test-key")
|
| 40 |
+
|
| 41 |
+
assert "Anthropic is not supported" in str(exc_info.value)
|
| 42 |
|
| 43 |
def test_returns_huggingface_when_hf_token_present(self, monkeypatch):
|
| 44 |
"""HF_TOKEN present (no paid keys) → HuggingFace model."""
|
| 45 |
monkeypatch.setattr(settings, "openai_api_key", None)
|
|
|
|
| 46 |
monkeypatch.setattr(settings, "hf_token", "hf_test_token")
|
| 47 |
|
| 48 |
model = get_model()
|
| 49 |
assert isinstance(model, HuggingFaceModel)
|
| 50 |
|
| 51 |
+
def test_raises_when_no_api_keys_available(self, monkeypatch):
|
| 52 |
+
"""No keys at all → RuntimeError with helpful message."""
|
| 53 |
monkeypatch.setattr(settings, "openai_api_key", None)
|
|
|
|
| 54 |
monkeypatch.setattr(settings, "hf_token", None)
|
| 55 |
+
monkeypatch.setattr(settings, "huggingface_model", "Qwen/Qwen2.5-7B-Instruct")
|
| 56 |
+
# Also ensure HF_TOKEN env var is not set
|
| 57 |
+
monkeypatch.delenv("HF_TOKEN", raising=False)
|
| 58 |
|
| 59 |
+
# Should raise clear error when no tokens available
|
| 60 |
+
import pytest
|
| 61 |
|
| 62 |
+
with pytest.raises(RuntimeError) as exc_info:
|
| 63 |
+
get_model()
|
| 64 |
+
assert "No LLM API key available" in str(exc_info.value)
|
| 65 |
+
assert "HF_TOKEN" in str(exc_info.value)
|
| 66 |
|
| 67 |
+
def test_openai_env_takes_priority_over_huggingface(self, monkeypatch):
|
| 68 |
+
"""OpenAI env key present → OpenAI wins over HuggingFace."""
|
| 69 |
monkeypatch.setattr(settings, "openai_api_key", "sk-test")
|
| 70 |
+
monkeypatch.setattr(settings, "hf_token", "hf_test_token")
|
| 71 |
|
| 72 |
model = get_model()
|
| 73 |
assert isinstance(model, OpenAIChatModel)
|
tests/unit/agent_factory/test_judges_factory.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
-
"""Unit tests for Judge Factory and Model Selection.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from unittest.mock import patch
|
| 4 |
|
| 5 |
import pytest
|
| 6 |
|
| 7 |
pytestmark = pytest.mark.unit
|
| 8 |
-
from pydantic_ai.models.anthropic import AnthropicModel
|
| 9 |
-
|
| 10 |
-
# We expect this import to exist after we implement it, or we mock it if it's not there yet
|
| 11 |
-
# For TDD, we assume we will use the library class
|
| 12 |
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 13 |
from pydantic_ai.models.openai import OpenAIChatModel
|
| 14 |
|
|
@@ -23,7 +23,6 @@ def mock_settings():
|
|
| 23 |
|
| 24 |
def test_get_model_openai(mock_settings):
|
| 25 |
"""Test that OpenAI model is returned when provider is openai."""
|
| 26 |
-
mock_settings.llm_provider = "openai"
|
| 27 |
mock_settings.has_openai_key = True
|
| 28 |
mock_settings.openai_api_key = "sk-test"
|
| 29 |
mock_settings.openai_model = "gpt-5"
|
|
@@ -33,39 +32,43 @@ def test_get_model_openai(mock_settings):
|
|
| 33 |
assert model.model_name == "gpt-5"
|
| 34 |
|
| 35 |
|
| 36 |
-
def
|
| 37 |
-
"""Test that
|
| 38 |
-
mock_settings.llm_provider = "anthropic"
|
| 39 |
mock_settings.has_openai_key = False
|
| 40 |
-
mock_settings.
|
| 41 |
-
mock_settings.anthropic_api_key = "sk-ant-test"
|
| 42 |
-
mock_settings.anthropic_model = "claude-sonnet-4-5-20250929"
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
assert model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
def test_get_model_huggingface(mock_settings):
|
| 50 |
-
"""Test that HuggingFace model is returned when
|
| 51 |
-
mock_settings.llm_provider = "huggingface"
|
| 52 |
mock_settings.has_openai_key = False
|
| 53 |
-
mock_settings.has_anthropic_key = False
|
| 54 |
-
mock_settings.has_huggingface_key = True # CodeRabbit: explicitly set for auto-detect
|
| 55 |
mock_settings.hf_token = "hf_test_token"
|
| 56 |
-
mock_settings.huggingface_model = "
|
| 57 |
|
| 58 |
model = get_model()
|
| 59 |
assert isinstance(model, HuggingFaceModel)
|
| 60 |
-
assert model.model_name == "
|
| 61 |
|
| 62 |
|
| 63 |
-
def
|
| 64 |
-
"""Test
|
| 65 |
-
mock_settings.llm_provider = "unknown_provider"
|
| 66 |
mock_settings.has_openai_key = True
|
| 67 |
mock_settings.openai_api_key = "sk-test"
|
| 68 |
mock_settings.openai_model = "gpt-5"
|
|
|
|
| 69 |
|
| 70 |
model = get_model()
|
| 71 |
assert isinstance(model, OpenAIChatModel)
|
|
|
|
| 1 |
+
"""Unit tests for Judge Factory and Model Selection.
|
| 2 |
+
|
| 3 |
+
NOTE: Anthropic is NOT supported (no embeddings API).
|
| 4 |
+
See P3_REMOVE_ANTHROPIC_PARTIAL_WIRING.md.
|
| 5 |
+
"""
|
| 6 |
|
| 7 |
from unittest.mock import patch
|
| 8 |
|
| 9 |
import pytest
|
| 10 |
|
| 11 |
pytestmark = pytest.mark.unit
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from pydantic_ai.models.huggingface import HuggingFaceModel
|
| 13 |
from pydantic_ai.models.openai import OpenAIChatModel
|
| 14 |
|
|
|
|
| 23 |
|
| 24 |
def test_get_model_openai(mock_settings):
|
| 25 |
"""Test that OpenAI model is returned when provider is openai."""
|
|
|
|
| 26 |
mock_settings.has_openai_key = True
|
| 27 |
mock_settings.openai_api_key = "sk-test"
|
| 28 |
mock_settings.openai_model = "gpt-5"
|
|
|
|
| 32 |
assert model.model_name == "gpt-5"
|
| 33 |
|
| 34 |
|
| 35 |
+
def test_get_model_byok_openai(mock_settings):
|
| 36 |
+
"""Test that BYOK OpenAI key returns OpenAI model."""
|
|
|
|
| 37 |
mock_settings.has_openai_key = False
|
| 38 |
+
mock_settings.openai_model = "gpt-5"
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
# BYOK takes priority over env vars
|
| 41 |
+
model = get_model(api_key="sk-byok-test")
|
| 42 |
+
assert isinstance(model, OpenAIChatModel)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_get_model_byok_anthropic_raises(mock_settings):
|
| 46 |
+
"""Test that BYOK Anthropic key raises NotImplementedError."""
|
| 47 |
+
mock_settings.has_openai_key = False
|
| 48 |
+
|
| 49 |
+
with pytest.raises(NotImplementedError) as exc_info:
|
| 50 |
+
get_model(api_key="sk-ant-test")
|
| 51 |
+
|
| 52 |
+
assert "Anthropic is not supported" in str(exc_info.value)
|
| 53 |
|
| 54 |
|
| 55 |
def test_get_model_huggingface(mock_settings):
|
| 56 |
+
"""Test that HuggingFace model is returned when no paid keys."""
|
|
|
|
| 57 |
mock_settings.has_openai_key = False
|
|
|
|
|
|
|
| 58 |
mock_settings.hf_token = "hf_test_token"
|
| 59 |
+
mock_settings.huggingface_model = "Qwen/Qwen2.5-7B-Instruct"
|
| 60 |
|
| 61 |
model = get_model()
|
| 62 |
assert isinstance(model, HuggingFaceModel)
|
| 63 |
+
assert model.model_name == "Qwen/Qwen2.5-7B-Instruct"
|
| 64 |
|
| 65 |
|
| 66 |
+
def test_get_model_openai_priority(mock_settings):
|
| 67 |
+
"""Test OpenAI takes priority when both keys present."""
|
|
|
|
| 68 |
mock_settings.has_openai_key = True
|
| 69 |
mock_settings.openai_api_key = "sk-test"
|
| 70 |
mock_settings.openai_model = "gpt-5"
|
| 71 |
+
mock_settings.hf_token = "hf_test_token"
|
| 72 |
|
| 73 |
model = get_model()
|
| 74 |
assert isinstance(model, OpenAIChatModel)
|
tests/unit/clients/test_chat_client_factory.py
CHANGED
|
@@ -91,8 +91,73 @@ class TestChatClientFactory:
|
|
| 91 |
from src.clients.factory import get_chat_client
|
| 92 |
|
| 93 |
with pytest.raises(ValueError, match="Unsupported provider"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
get_chat_client(provider="anthropic")
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
def test_provider_is_case_insensitive(self) -> None:
|
| 97 |
"""Provider matching should be case-insensitive."""
|
| 98 |
with patch("src.clients.factory.settings") as mock_settings:
|
|
|
|
| 91 |
from src.clients.factory import get_chat_client
|
| 92 |
|
| 93 |
with pytest.raises(ValueError, match="Unsupported provider"):
|
| 94 |
+
get_chat_client(provider="invalid_provider")
|
| 95 |
+
|
| 96 |
+
def test_anthropic_provider_raises_not_implemented(self) -> None:
|
| 97 |
+
"""Anthropic provider should raise NotImplementedError (not yet implemented)."""
|
| 98 |
+
with patch("src.clients.factory.settings") as mock_settings:
|
| 99 |
+
mock_settings.has_openai_key = False
|
| 100 |
+
mock_settings.has_gemini_key = False
|
| 101 |
+
|
| 102 |
+
from src.clients.factory import get_chat_client
|
| 103 |
+
|
| 104 |
+
with pytest.raises(NotImplementedError, match="Anthropic client not yet implemented"):
|
| 105 |
get_chat_client(provider="anthropic")
|
| 106 |
|
| 107 |
+
def test_byok_auto_detects_openai_from_key_prefix(self) -> None:
|
| 108 |
+
"""BYOK: api_key starting with 'sk-' should auto-select OpenAI without explicit provider.
|
| 109 |
+
|
| 110 |
+
This is the critical BYOK (Bring Your Own Key) test case:
|
| 111 |
+
- User enters 'sk-...' key in Gradio
|
| 112 |
+
- No explicit provider parameter
|
| 113 |
+
- No OPENAI_API_KEY in env (settings.has_openai_key = False)
|
| 114 |
+
- Should auto-detect OpenAI from the key prefix
|
| 115 |
+
"""
|
| 116 |
+
with patch("src.clients.factory.settings") as mock_settings:
|
| 117 |
+
mock_settings.has_openai_key = False # No env key
|
| 118 |
+
mock_settings.has_gemini_key = False
|
| 119 |
+
mock_settings.openai_api_key = None
|
| 120 |
+
mock_settings.openai_model = "gpt-5"
|
| 121 |
+
|
| 122 |
+
from src.clients.factory import get_chat_client
|
| 123 |
+
|
| 124 |
+
# BYOK: Pass api_key without explicit provider
|
| 125 |
+
client = get_chat_client(api_key="sk-user-provided-key")
|
| 126 |
+
|
| 127 |
+
# Should auto-detect OpenAI from 'sk-' prefix
|
| 128 |
+
assert "OpenAI" in type(client).__name__
|
| 129 |
+
|
| 130 |
+
def test_byok_auto_detects_anthropic_from_key_prefix(self) -> None:
|
| 131 |
+
"""BYOK: api_key starting with 'sk-ant-' should auto-detect Anthropic.
|
| 132 |
+
|
| 133 |
+
Anthropic keys start with 'sk-ant-' which is a superset of 'sk-'.
|
| 134 |
+
Detection must check 'sk-ant-' first to avoid misdetecting as OpenAI.
|
| 135 |
+
"""
|
| 136 |
+
with patch("src.clients.factory.settings") as mock_settings:
|
| 137 |
+
mock_settings.has_openai_key = False
|
| 138 |
+
mock_settings.has_gemini_key = False
|
| 139 |
+
|
| 140 |
+
from src.clients.factory import get_chat_client
|
| 141 |
+
|
| 142 |
+
# BYOK: Anthropic key should raise NotImplementedError (not fall to HuggingFace!)
|
| 143 |
+
with pytest.raises(NotImplementedError, match="Anthropic client not yet implemented"):
|
| 144 |
+
get_chat_client(api_key="sk-ant-user-anthropic-key")
|
| 145 |
+
|
| 146 |
+
def test_byok_hf_token_falls_through_to_huggingface(self) -> None:
|
| 147 |
+
"""BYOK: HuggingFace tokens (hf_...) should use HuggingFace client."""
|
| 148 |
+
with patch("src.clients.factory.settings") as mock_settings:
|
| 149 |
+
mock_settings.has_openai_key = False
|
| 150 |
+
mock_settings.has_gemini_key = False
|
| 151 |
+
mock_settings.huggingface_model = "Qwen/Qwen2.5-7B-Instruct"
|
| 152 |
+
mock_settings.hf_token = None
|
| 153 |
+
|
| 154 |
+
from src.clients.factory import get_chat_client
|
| 155 |
+
|
| 156 |
+
# HF tokens don't trigger auto-detection, falls through to HuggingFace
|
| 157 |
+
client = get_chat_client(api_key="hf_user_provided_token")
|
| 158 |
+
|
| 159 |
+
assert "HuggingFace" in type(client).__name__
|
| 160 |
+
|
| 161 |
def test_provider_is_case_insensitive(self) -> None:
|
| 162 |
"""Provider matching should be case-insensitive."""
|
| 163 |
with patch("src.clients.factory.settings") as mock_settings:
|
tests/unit/services/test_service_loader.py
CHANGED
|
@@ -25,13 +25,30 @@ class TestGetEmbeddingService:
|
|
| 25 |
create=True,
|
| 26 |
):
|
| 27 |
# Also need to prevent the actual import from failing
|
| 28 |
-
|
|
|
|
| 29 |
with patch.dict("sys.modules", {"src.services.llamaindex_rag": mock_module}):
|
| 30 |
from src.utils.service_loader import get_embedding_service
|
| 31 |
|
| 32 |
service = get_embedding_service()
|
| 33 |
assert service is mock_rag_service
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
def test_falls_back_to_local_when_no_openai_key(self):
|
| 36 |
"""Should return EmbeddingService when no OpenAI key."""
|
| 37 |
mock_local_service = MagicMock()
|
|
|
|
| 25 |
create=True,
|
| 26 |
):
|
| 27 |
# Also need to prevent the actual import from failing
|
| 28 |
+
# Update lambda to accept **kwargs (api_key)
|
| 29 |
+
mock_module = MagicMock(get_rag_service=lambda **kwargs: mock_rag_service)
|
| 30 |
with patch.dict("sys.modules", {"src.services.llamaindex_rag": mock_module}):
|
| 31 |
from src.utils.service_loader import get_embedding_service
|
| 32 |
|
| 33 |
service = get_embedding_service()
|
| 34 |
assert service is mock_rag_service
|
| 35 |
|
| 36 |
+
def test_uses_llamaindex_when_byok_key_present(self):
|
| 37 |
+
"""Should return LlamaIndexRAGService when valid BYOK key passed."""
|
| 38 |
+
mock_rag_service = MagicMock()
|
| 39 |
+
|
| 40 |
+
with patch("src.utils.service_loader.settings") as mock_settings:
|
| 41 |
+
mock_settings.has_openai_key = False # Env key missing
|
| 42 |
+
|
| 43 |
+
# Update lambda to accept **kwargs
|
| 44 |
+
mock_module = MagicMock(get_rag_service=lambda **kwargs: mock_rag_service)
|
| 45 |
+
with patch.dict("sys.modules", {"src.services.llamaindex_rag": mock_module}):
|
| 46 |
+
from src.utils.service_loader import get_embedding_service
|
| 47 |
+
|
| 48 |
+
# Pass valid BYOK key
|
| 49 |
+
service = get_embedding_service(api_key="sk-test-key")
|
| 50 |
+
assert service is mock_rag_service
|
| 51 |
+
|
| 52 |
def test_falls_back_to_local_when_no_openai_key(self):
|
| 53 |
"""Should return EmbeddingService when no OpenAI key."""
|
| 54 |
mock_local_service = MagicMock()
|
uv.lock
CHANGED
|
@@ -1130,6 +1130,7 @@ dependencies = [
|
|
| 1130 |
{ name = "langgraph" },
|
| 1131 |
{ name = "langgraph-checkpoint-sqlite" },
|
| 1132 |
{ name = "limits" },
|
|
|
|
| 1133 |
{ name = "openai" },
|
| 1134 |
{ name = "pydantic" },
|
| 1135 |
{ name = "pydantic-ai" },
|
|
@@ -1195,6 +1196,7 @@ requires-dist = [
|
|
| 1195 |
{ name = "llama-index-embeddings-openai", marker = "extra == 'modal'" },
|
| 1196 |
{ name = "llama-index-llms-openai", marker = "extra == 'modal'" },
|
| 1197 |
{ name = "llama-index-vector-stores-chroma", marker = "extra == 'modal'" },
|
|
|
|
| 1198 |
{ name = "modal", marker = "extra == 'modal'", specifier = ">=0.63.0" },
|
| 1199 |
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10" },
|
| 1200 |
{ name = "openai", specifier = ">=1.0.0" },
|
|
@@ -3007,7 +3009,7 @@ wheels = [
|
|
| 3007 |
|
| 3008 |
[[package]]
|
| 3009 |
name = "mcp"
|
| 3010 |
-
version = "1.
|
| 3011 |
source = { registry = "https://pypi.org/simple" }
|
| 3012 |
dependencies = [
|
| 3013 |
{ name = "anyio" },
|
|
@@ -3025,9 +3027,9 @@ dependencies = [
|
|
| 3025 |
{ name = "typing-inspection" },
|
| 3026 |
{ name = "uvicorn", marker = "sys_platform != 'emscripten'" },
|
| 3027 |
]
|
| 3028 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 3029 |
wheels = [
|
| 3030 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 3031 |
]
|
| 3032 |
|
| 3033 |
[package.optional-dependencies]
|
|
|
|
| 1130 |
{ name = "langgraph" },
|
| 1131 |
{ name = "langgraph-checkpoint-sqlite" },
|
| 1132 |
{ name = "limits" },
|
| 1133 |
+
{ name = "mcp" },
|
| 1134 |
{ name = "openai" },
|
| 1135 |
{ name = "pydantic" },
|
| 1136 |
{ name = "pydantic-ai" },
|
|
|
|
| 1196 |
{ name = "llama-index-embeddings-openai", marker = "extra == 'modal'" },
|
| 1197 |
{ name = "llama-index-llms-openai", marker = "extra == 'modal'" },
|
| 1198 |
{ name = "llama-index-vector-stores-chroma", marker = "extra == 'modal'" },
|
| 1199 |
+
{ name = "mcp", specifier = ">=1.23.0" },
|
| 1200 |
{ name = "modal", marker = "extra == 'modal'", specifier = ">=0.63.0" },
|
| 1201 |
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10" },
|
| 1202 |
{ name = "openai", specifier = ">=1.0.0" },
|
|
|
|
| 3009 |
|
| 3010 |
[[package]]
|
| 3011 |
name = "mcp"
|
| 3012 |
+
version = "1.23.1"
|
| 3013 |
source = { registry = "https://pypi.org/simple" }
|
| 3014 |
dependencies = [
|
| 3015 |
{ name = "anyio" },
|
|
|
|
| 3027 |
{ name = "typing-inspection" },
|
| 3028 |
{ name = "uvicorn", marker = "sys_platform != 'emscripten'" },
|
| 3029 |
]
|
| 3030 |
+
sdist = { url = "https://files.pythonhosted.org/packages/12/42/10c0c09ca27aceacd8c428956cfabdd67e3d328fe55c4abc16589285d294/mcp-1.23.1.tar.gz", hash = "sha256:7403e053e8e2283b1e6ae631423cb54736933fea70b32422152e6064556cd298", size = 596519 }
|
| 3031 |
wheels = [
|
| 3032 |
+
{ url = "https://files.pythonhosted.org/packages/9f/9e/26e1d2d2c6afe15dfba5ca6799eeeea7656dce625c22766e4c57305e9cc2/mcp-1.23.1-py3-none-any.whl", hash = "sha256:3ce897fcc20a41bd50b4c58d3aa88085f11f505dcc0eaed48930012d34c731d8", size = 231433 },
|
| 3033 |
]
|
| 3034 |
|
| 3035 |
[package.optional-dependencies]
|