refactor(prompts): Unify prompt storage in src/prompts/ (Priority 5) (#127)
Browse filesPriority 5: Unified prompt storage in src/prompts/
✅ All checks passed | 0 CodeRabbit comments
- src/agents/magentic_agents.py +8 -101
- src/config/domain.py +0 -25
- src/prompts/hypothesis.py +20 -17
- src/prompts/judge.py +38 -76
- src/prompts/report.py +91 -118
- src/prompts/search.py +22 -0
- tests/unit/agents/test_magentic_agents_domain.py +2 -2
- tests/unit/config/test_domain.py +1 -4
- tests/unit/prompts/test_hypothesis_prompt_domain.py +4 -4
- tests/unit/prompts/test_judge_prompt.py +2 -4
- tests/unit/prompts/test_judge_prompt_domain.py +7 -9
- tests/unit/prompts/test_report_prompt_domain.py +2 -2
src/agents/magentic_agents.py
CHANGED
|
@@ -11,6 +11,10 @@ from src.agents.tools import (
|
|
| 11 |
from src.clients.base import BaseChatClient
|
| 12 |
from src.clients.factory import get_chat_client
|
| 13 |
from src.config.domain import ResearchDomain, get_domain_config
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def create_search_agent(
|
|
@@ -34,19 +38,7 @@ def create_search_agent(
|
|
| 34 |
return ChatAgent(
|
| 35 |
name="SearchAgent",
|
| 36 |
description=config.search_agent_description,
|
| 37 |
-
instructions=
|
| 38 |
-
|
| 39 |
-
1. Analyze the request to determine what to search for
|
| 40 |
-
2. Extract key search terms (drug names, disease names, mechanisms)
|
| 41 |
-
3. Use the appropriate search tools:
|
| 42 |
-
- search_pubmed for peer-reviewed papers
|
| 43 |
-
- search_clinical_trials for clinical studies
|
| 44 |
-
- search_preprints for cutting-edge findings
|
| 45 |
-
4. Summarize what you found and highlight key evidence
|
| 46 |
-
|
| 47 |
-
Be thorough - search multiple databases when appropriate.
|
| 48 |
-
Focus on finding: mechanisms of action, clinical evidence, and specific findings
|
| 49 |
-
related to {config.name}.""",
|
| 50 |
chat_client=client,
|
| 51 |
tools=[search_pubmed, search_clinical_trials, search_preprints],
|
| 52 |
temperature=1.0, # Explicitly set for reasoning model compatibility (o1/o3)
|
|
@@ -69,43 +61,11 @@ def create_judge_agent(
|
|
| 69 |
ChatAgent configured for evidence assessment
|
| 70 |
"""
|
| 71 |
client = chat_client or get_chat_client(api_key=api_key)
|
| 72 |
-
config = get_domain_config(domain)
|
| 73 |
|
| 74 |
return ChatAgent(
|
| 75 |
name="JudgeAgent",
|
| 76 |
description="Evaluates evidence quality and determines if sufficient for synthesis",
|
| 77 |
-
instructions=
|
| 78 |
-
|
| 79 |
-
When asked to evaluate:
|
| 80 |
-
|
| 81 |
-
1. Review all evidence presented in the conversation
|
| 82 |
-
2. Score on two dimensions (0-10 each):
|
| 83 |
-
- Mechanism Score: How well is the biological mechanism explained?
|
| 84 |
-
- Clinical Score: How strong is the clinical/preclinical evidence?
|
| 85 |
-
3. Determine if evidence is SUFFICIENT for a final report:
|
| 86 |
-
- Sufficient: Clear mechanism + supporting clinical data
|
| 87 |
-
- Insufficient: Gaps in mechanism OR weak clinical evidence
|
| 88 |
-
4. If insufficient, suggest specific search queries to fill gaps
|
| 89 |
-
|
| 90 |
-
## CRITICAL OUTPUT FORMAT
|
| 91 |
-
To ensure the workflow terminates when appropriate, you MUST follow these rules:
|
| 92 |
-
|
| 93 |
-
IF evidence is SUFFICIENT (confidence >= 70%):
|
| 94 |
-
Start your response with a line like:
|
| 95 |
-
"✅ SUFFICIENT EVIDENCE (confidence: 72%). STOP SEARCHING. Delegate to ReportAgent NOW."
|
| 96 |
-
Use your actual numeric confidence instead of 72.
|
| 97 |
-
Then explain why.
|
| 98 |
-
|
| 99 |
-
IF evidence is INSUFFICIENT:
|
| 100 |
-
Start with "❌ INSUFFICIENT: <Reason>."
|
| 101 |
-
Then provide scores and next queries.
|
| 102 |
-
|
| 103 |
-
Be rigorous but fair. Look for:
|
| 104 |
-
- Molecular targets and pathways
|
| 105 |
-
- Animal model studies
|
| 106 |
-
- Human clinical trials
|
| 107 |
-
- Safety data
|
| 108 |
-
- Drug-drug interactions""",
|
| 109 |
chat_client=client,
|
| 110 |
temperature=1.0, # Explicitly set for reasoning model compatibility
|
| 111 |
)
|
|
@@ -132,23 +92,7 @@ def create_hypothesis_agent(
|
|
| 132 |
return ChatAgent(
|
| 133 |
name="HypothesisAgent",
|
| 134 |
description=config.hypothesis_agent_description,
|
| 135 |
-
instructions=
|
| 136 |
-
|
| 137 |
-
Based on evidence:
|
| 138 |
-
|
| 139 |
-
1. Identify the key molecular targets involved
|
| 140 |
-
2. Map the biological pathways affected
|
| 141 |
-
3. Generate testable hypotheses in this format:
|
| 142 |
-
|
| 143 |
-
DRUG -> TARGET -> PATHWAY -> THERAPEUTIC EFFECT
|
| 144 |
-
|
| 145 |
-
Example:
|
| 146 |
-
Testosterone -> Androgen receptor -> Dopamine modulation -> Enhanced libido
|
| 147 |
-
|
| 148 |
-
4. Explain the rationale for each hypothesis
|
| 149 |
-
5. Suggest what additional evidence would support or refute it
|
| 150 |
-
|
| 151 |
-
Focus on mechanistic plausibility and existing evidence.""",
|
| 152 |
chat_client=client,
|
| 153 |
temperature=1.0, # Explicitly set for reasoning model compatibility
|
| 154 |
)
|
|
@@ -170,48 +114,11 @@ def create_report_agent(
|
|
| 170 |
ChatAgent configured for report generation
|
| 171 |
"""
|
| 172 |
client = chat_client or get_chat_client(api_key=api_key)
|
| 173 |
-
config = get_domain_config(domain)
|
| 174 |
|
| 175 |
return ChatAgent(
|
| 176 |
name="ReportAgent",
|
| 177 |
description="Synthesizes research findings into structured reports",
|
| 178 |
-
instructions=
|
| 179 |
-
|
| 180 |
-
When asked to synthesize:
|
| 181 |
-
|
| 182 |
-
Generate a structured report with these sections:
|
| 183 |
-
|
| 184 |
-
## Executive Summary
|
| 185 |
-
Brief overview of findings and recommendation
|
| 186 |
-
|
| 187 |
-
## Methodology
|
| 188 |
-
Databases searched, queries used, evidence reviewed
|
| 189 |
-
|
| 190 |
-
## Key Findings
|
| 191 |
-
### Mechanism of Action
|
| 192 |
-
- Molecular targets
|
| 193 |
-
- Biological pathways
|
| 194 |
-
- Proposed mechanism
|
| 195 |
-
|
| 196 |
-
### Clinical Evidence
|
| 197 |
-
- Preclinical studies
|
| 198 |
-
- Clinical trials
|
| 199 |
-
- Safety profile
|
| 200 |
-
|
| 201 |
-
## Candidates
|
| 202 |
-
List specific candidates with potential
|
| 203 |
-
|
| 204 |
-
## Limitations
|
| 205 |
-
Gaps in evidence, conflicting data, caveats
|
| 206 |
-
|
| 207 |
-
## Conclusion
|
| 208 |
-
Final recommendation with confidence level
|
| 209 |
-
|
| 210 |
-
## References
|
| 211 |
-
Use the 'get_bibliography' tool to fetch the complete list of citations.
|
| 212 |
-
Format them as a numbered list.
|
| 213 |
-
|
| 214 |
-
Be comprehensive but concise. Cite evidence for all claims.""",
|
| 215 |
chat_client=client,
|
| 216 |
tools=[get_bibliography],
|
| 217 |
temperature=1.0, # Explicitly set for reasoning model compatibility
|
|
|
|
| 11 |
from src.clients.base import BaseChatClient
|
| 12 |
from src.clients.factory import get_chat_client
|
| 13 |
from src.config.domain import ResearchDomain, get_domain_config
|
| 14 |
+
from src.prompts.hypothesis import get_system_prompt as get_hypothesis_prompt
|
| 15 |
+
from src.prompts.judge import get_system_prompt as get_judge_prompt
|
| 16 |
+
from src.prompts.report import get_system_prompt as get_report_prompt
|
| 17 |
+
from src.prompts.search import get_system_prompt as get_search_prompt
|
| 18 |
|
| 19 |
|
| 20 |
def create_search_agent(
|
|
|
|
| 38 |
return ChatAgent(
|
| 39 |
name="SearchAgent",
|
| 40 |
description=config.search_agent_description,
|
| 41 |
+
instructions=get_search_prompt(domain),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
chat_client=client,
|
| 43 |
tools=[search_pubmed, search_clinical_trials, search_preprints],
|
| 44 |
temperature=1.0, # Explicitly set for reasoning model compatibility (o1/o3)
|
|
|
|
| 61 |
ChatAgent configured for evidence assessment
|
| 62 |
"""
|
| 63 |
client = chat_client or get_chat_client(api_key=api_key)
|
|
|
|
| 64 |
|
| 65 |
return ChatAgent(
|
| 66 |
name="JudgeAgent",
|
| 67 |
description="Evaluates evidence quality and determines if sufficient for synthesis",
|
| 68 |
+
instructions=get_judge_prompt(domain),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
chat_client=client,
|
| 70 |
temperature=1.0, # Explicitly set for reasoning model compatibility
|
| 71 |
)
|
|
|
|
| 92 |
return ChatAgent(
|
| 93 |
name="HypothesisAgent",
|
| 94 |
description=config.hypothesis_agent_description,
|
| 95 |
+
instructions=get_hypothesis_prompt(domain),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
chat_client=client,
|
| 97 |
temperature=1.0, # Explicitly set for reasoning model compatibility
|
| 98 |
)
|
|
|
|
| 114 |
ChatAgent configured for report generation
|
| 115 |
"""
|
| 116 |
client = chat_client or get_chat_client(api_key=api_key)
|
|
|
|
| 117 |
|
| 118 |
return ChatAgent(
|
| 119 |
name="ReportAgent",
|
| 120 |
description="Synthesizes research findings into structured reports",
|
| 121 |
+
instructions=get_report_prompt(domain),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
chat_client=client,
|
| 123 |
tools=[get_bibliography],
|
| 124 |
temperature=1.0, # Explicitly set for reasoning model compatibility
|
src/config/domain.py
CHANGED
|
@@ -46,16 +46,6 @@ class DomainConfig(BaseModel):
|
|
| 46 |
report_title: str
|
| 47 |
report_focus: str
|
| 48 |
|
| 49 |
-
# Judge prompts
|
| 50 |
-
judge_system_prompt: str
|
| 51 |
-
judge_scoring_prompt: str
|
| 52 |
-
|
| 53 |
-
# Hypothesis prompts
|
| 54 |
-
hypothesis_system_prompt: str
|
| 55 |
-
|
| 56 |
-
# Report writer prompts
|
| 57 |
-
report_system_prompt: str
|
| 58 |
-
|
| 59 |
# Search context
|
| 60 |
search_description: str
|
| 61 |
search_example_query: str
|
|
@@ -74,21 +64,6 @@ SEXUAL_HEALTH_CONFIG = DomainConfig(
|
|
| 74 |
description="Sexual health and wellness research specialist",
|
| 75 |
report_title="## Sexual Health Analysis",
|
| 76 |
report_focus="sexual health and wellness interventions",
|
| 77 |
-
judge_system_prompt="""You are an expert sexual health research judge.
|
| 78 |
-
Your role is to evaluate evidence for sexual health interventions, assess
|
| 79 |
-
efficacy and safety data, and determine clinical applicability.""",
|
| 80 |
-
judge_scoring_prompt="""Score this evidence for sexual health relevance.
|
| 81 |
-
Provide ONLY scores and extracted data.""",
|
| 82 |
-
hypothesis_system_prompt=(
|
| 83 |
-
"""You are a biomedical research scientist specializing in sexual health.
|
| 84 |
-
Your role is to generate evidence-based hypotheses for sexual health interventions,
|
| 85 |
-
identifying mechanisms of action and potential therapeutic applications."""
|
| 86 |
-
),
|
| 87 |
-
report_system_prompt=(
|
| 88 |
-
"""You are a scientific writer specializing in sexual health research reports.
|
| 89 |
-
Your role is to synthesize evidence into clear recommendations for sexual health
|
| 90 |
-
interventions with proper safety considerations."""
|
| 91 |
-
),
|
| 92 |
search_description="Searches biomedical literature for sexual health evidence",
|
| 93 |
search_example_query="testosterone therapy female libido",
|
| 94 |
search_agent_description="Searches PubMed for sexual health evidence",
|
|
|
|
| 46 |
report_title: str
|
| 47 |
report_focus: str
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# Search context
|
| 50 |
search_description: str
|
| 51 |
search_example_query: str
|
|
|
|
| 64 |
description="Sexual health and wellness research specialist",
|
| 65 |
report_title="## Sexual Health Analysis",
|
| 66 |
report_focus="sexual health and wellness interventions",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
search_description="Searches biomedical literature for sexual health evidence",
|
| 68 |
search_example_query="testosterone therapy female libido",
|
| 69 |
search_agent_description="Searches PubMed for sexual health evidence",
|
src/prompts/hypothesis.py
CHANGED
|
@@ -13,33 +13,36 @@ if TYPE_CHECKING:
|
|
| 13 |
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 14 |
"""Get the system prompt for the hypothesis agent."""
|
| 15 |
config = get_domain_config(domain)
|
| 16 |
-
return f"""{config.hypothesis_system_prompt}
|
| 17 |
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
1. Proposes a MECHANISM: Drug -> Target -> Pathway -> Effect
|
| 22 |
-
2. Is TESTABLE: Can be supported or refuted by literature search
|
| 23 |
-
3. Is SPECIFIC: Names actual molecular targets and pathways
|
| 24 |
-
4. Generates SEARCH QUERIES: Helps find more evidence
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
- Pathway: Dopaminergic signaling modulation
|
| 30 |
-
- Effect: Enhanced libido in HSDD
|
| 31 |
-
- Confidence: 0.7
|
| 32 |
-
- Search suggestions: ["testosterone libido mechanism", "sildenafil efficacy women"]
|
| 33 |
|
| 34 |
-
|
| 35 |
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
SYSTEM_PROMPT = get_system_prompt()
|
| 39 |
|
| 40 |
|
| 41 |
async def format_hypothesis_prompt(
|
| 42 |
-
query: str,
|
|
|
|
|
|
|
| 43 |
) -> str:
|
| 44 |
"""Format prompt for hypothesis generation.
|
| 45 |
|
|
|
|
| 13 |
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 14 |
"""Get the system prompt for the hypothesis agent."""
|
| 15 |
config = get_domain_config(domain)
|
|
|
|
| 16 |
|
| 17 |
+
return f"""You are a biomedical research scientist specializing in {config.name}.
|
| 18 |
+
Your role is to generate evidence-based hypotheses for interventions,
|
| 19 |
+
identifying mechanisms of action and potential therapeutic applications.
|
| 20 |
|
| 21 |
+
Based on evidence:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
1. Identify the key molecular targets involved
|
| 24 |
+
2. Map the biological pathways affected
|
| 25 |
+
3. Generate testable hypotheses in this format:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
DRUG -> TARGET -> PATHWAY -> THERAPEUTIC EFFECT
|
| 28 |
|
| 29 |
+
Example:
|
| 30 |
+
Testosterone -> Androgen receptor -> Dopamine modulation -> Enhanced libido
|
| 31 |
|
| 32 |
+
4. Explain the rationale for each hypothesis
|
| 33 |
+
5. Suggest what additional evidence would support or refute it
|
| 34 |
+
|
| 35 |
+
Focus on mechanistic plausibility and existing evidence."""
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# Keep SYSTEM_PROMPT for backwards compatibility (used by PydanticAI agents)
|
| 39 |
SYSTEM_PROMPT = get_system_prompt()
|
| 40 |
|
| 41 |
|
| 42 |
async def format_hypothesis_prompt(
|
| 43 |
+
query: str,
|
| 44 |
+
evidence: list["Evidence"],
|
| 45 |
+
embeddings: "EmbeddingServiceProtocol | None" = None,
|
| 46 |
) -> str:
|
| 47 |
"""Format prompt for hypothesis generation.
|
| 48 |
|
src/prompts/judge.py
CHANGED
|
@@ -5,73 +5,49 @@ from src.utils.models import Evidence
|
|
| 5 |
|
| 6 |
|
| 7 |
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 8 |
-
"""Get the system prompt for the judge agent."""
|
| 9 |
config = get_domain_config(domain)
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
Your
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
## Output Format
|
| 47 |
-
|
| 48 |
-
Return valid JSON with these fields:
|
| 49 |
-
- details.mechanism_score (int 0-10)
|
| 50 |
-
- details.mechanism_reasoning (string)
|
| 51 |
-
- details.clinical_evidence_score (int 0-10)
|
| 52 |
-
- details.clinical_reasoning (string)
|
| 53 |
-
- details.drug_candidates (list of strings)
|
| 54 |
-
- details.key_findings (list of strings)
|
| 55 |
-
- sufficient (boolean) - TRUE if scores suggest enough evidence
|
| 56 |
-
- confidence (float 0-1)
|
| 57 |
-
- recommendation ("continue" or "synthesize") - Your suggestion (system may override)
|
| 58 |
-
- next_search_queries (list) - If continuing, suggest FOCUSED queries
|
| 59 |
-
- reasoning (string)
|
| 60 |
-
|
| 61 |
-
## CRITICAL: Search Query Rules
|
| 62 |
-
|
| 63 |
-
When suggesting next_search_queries:
|
| 64 |
-
- STAY FOCUSED on the original research question
|
| 65 |
-
- Do NOT drift to tangential topics
|
| 66 |
-
- If question is about "female libido", do NOT suggest "bone health" or "muscle mass"
|
| 67 |
-
- Refine existing terms, don't explore random medical associations
|
| 68 |
-
"""
|
| 69 |
|
| 70 |
|
| 71 |
def get_scoring_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 72 |
"""Get the scoring instructions for the judge."""
|
| 73 |
-
|
| 74 |
-
|
| 75 |
|
| 76 |
|
| 77 |
# Keep SYSTEM_PROMPT for backwards compatibility
|
|
@@ -118,9 +94,6 @@ def format_user_prompt(
|
|
| 118 |
) -> str:
|
| 119 |
"""
|
| 120 |
Format user prompt with selected evidence and iteration context.
|
| 121 |
-
|
| 122 |
-
NOTE: Evidence should be pre-selected using select_evidence_for_judge().
|
| 123 |
-
This function assumes evidence is already capped.
|
| 124 |
"""
|
| 125 |
# Use explicit None check - 0 is a valid count (empty evidence)
|
| 126 |
total_count = total_evidence_count if total_evidence_count is not None else len(evidence)
|
|
@@ -140,7 +113,6 @@ def format_user_prompt(
|
|
| 140 |
|
| 141 |
evidence_text = "\n\n".join([format_single_evidence(i, e) for i, e in enumerate(evidence)])
|
| 142 |
|
| 143 |
-
# Lost-in-the-middle mitigation: put critical context at START and END
|
| 144 |
return f"""## Research Question (IMPORTANT - stay focused on this)
|
| 145 |
{question}
|
| 146 |
|
|
@@ -156,22 +128,12 @@ def format_user_prompt(
|
|
| 156 |
## Your Task
|
| 157 |
|
| 158 |
{scoring_prompt}
|
| 159 |
-
DO NOT decide "synthesize" vs "continue" - that decision is made by the system.
|
| 160 |
-
|
| 161 |
-
## REMINDER: Original Question (stay focused)
|
| 162 |
-
{question}
|
| 163 |
"""
|
| 164 |
|
| 165 |
|
| 166 |
def format_empty_evidence_prompt(question: str) -> str:
|
| 167 |
"""
|
| 168 |
Format prompt when no evidence was found.
|
| 169 |
-
|
| 170 |
-
Args:
|
| 171 |
-
question: The user's research question
|
| 172 |
-
|
| 173 |
-
Returns:
|
| 174 |
-
Formatted prompt string
|
| 175 |
"""
|
| 176 |
return f"""## Research Question
|
| 177 |
{question}
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 8 |
+
"""Get the system prompt for the judge agent (Magentic/Advanced Mode)."""
|
| 9 |
config = get_domain_config(domain)
|
| 10 |
+
|
| 11 |
+
return f"""You are an expert research judge specializing in {config.name}.
|
| 12 |
+
Your role is to evaluate evidence for interventions, assess efficacy and safety data,
|
| 13 |
+
and determine clinical applicability.
|
| 14 |
+
|
| 15 |
+
When asked to evaluate:
|
| 16 |
+
|
| 17 |
+
1. Review all evidence presented in the conversation
|
| 18 |
+
2. Score on two dimensions (0-10 each):
|
| 19 |
+
- Mechanism Score: How well is the biological mechanism explained?
|
| 20 |
+
- Clinical Score: How strong is the clinical/preclinical evidence?
|
| 21 |
+
3. Determine if evidence is SUFFICIENT for a final report:
|
| 22 |
+
- Sufficient: Clear mechanism + supporting clinical data
|
| 23 |
+
- Insufficient: Gaps in mechanism OR weak clinical evidence
|
| 24 |
+
4. If insufficient, suggest specific search queries to fill gaps
|
| 25 |
+
|
| 26 |
+
## CRITICAL OUTPUT FORMAT
|
| 27 |
+
To ensure the workflow terminates when appropriate, you MUST follow these rules:
|
| 28 |
+
|
| 29 |
+
IF evidence is SUFFICIENT (confidence >= 70%):
|
| 30 |
+
Start your response with a line like:
|
| 31 |
+
"✅ SUFFICIENT EVIDENCE (confidence: 72%). STOP SEARCHING. Delegate to ReportAgent NOW."
|
| 32 |
+
Use your actual numeric confidence instead of 72.
|
| 33 |
+
Then explain why.
|
| 34 |
+
|
| 35 |
+
IF evidence is INSUFFICIENT:
|
| 36 |
+
Start with "❌ INSUFFICIENT: <Reason>."
|
| 37 |
+
Then provide scores and next queries.
|
| 38 |
+
|
| 39 |
+
Be rigorous but fair. Look for:
|
| 40 |
+
- Molecular targets and pathways
|
| 41 |
+
- Animal model studies
|
| 42 |
+
- Human clinical trials
|
| 43 |
+
- Safety data
|
| 44 |
+
- Drug-drug interactions"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
def get_scoring_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 48 |
"""Get the scoring instructions for the judge."""
|
| 49 |
+
return """Score this evidence for relevance.
|
| 50 |
+
Provide ONLY scores and extracted data."""
|
| 51 |
|
| 52 |
|
| 53 |
# Keep SYSTEM_PROMPT for backwards compatibility
|
|
|
|
| 94 |
) -> str:
|
| 95 |
"""
|
| 96 |
Format user prompt with selected evidence and iteration context.
|
|
|
|
|
|
|
|
|
|
| 97 |
"""
|
| 98 |
# Use explicit None check - 0 is a valid count (empty evidence)
|
| 99 |
total_count = total_evidence_count if total_evidence_count is not None else len(evidence)
|
|
|
|
| 113 |
|
| 114 |
evidence_text = "\n\n".join([format_single_evidence(i, e) for i, e in enumerate(evidence)])
|
| 115 |
|
|
|
|
| 116 |
return f"""## Research Question (IMPORTANT - stay focused on this)
|
| 117 |
{question}
|
| 118 |
|
|
|
|
| 128 |
## Your Task
|
| 129 |
|
| 130 |
{scoring_prompt}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
"""
|
| 132 |
|
| 133 |
|
| 134 |
def format_empty_evidence_prompt(question: str) -> str:
|
| 135 |
"""
|
| 136 |
Format prompt when no evidence was found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
"""
|
| 138 |
return f"""## Research Question
|
| 139 |
{question}
|
src/prompts/report.py
CHANGED
|
@@ -7,73 +7,52 @@ from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
|
|
| 7 |
|
| 8 |
if TYPE_CHECKING:
|
| 9 |
from src.services.embedding_protocol import EmbeddingServiceProtocol
|
| 10 |
-
from src.utils.models import Evidence,
|
| 11 |
|
| 12 |
|
| 13 |
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 14 |
"""Get the system prompt for the report agent."""
|
| 15 |
config = get_domain_config(domain)
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
Your role is to synthesize evidence
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
-
|
| 39 |
-
-
|
| 40 |
-
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
Example:
|
| 57 |
-
references: [
|
| 58 |
-
{{"title": "Testosterone and Libido", "authors": "Smith",
|
| 59 |
-
"source": "pubmed", "url": "https://pubmed.ncbi.nlm.nih.gov/123/"}}
|
| 60 |
-
]
|
| 61 |
-
|
| 62 |
-
─────────────────────────────────────────────────────────────────────────────
|
| 63 |
-
🚨 CRITICAL CITATION REQUIREMENTS 🚨
|
| 64 |
-
─────────────────────────────────────────────────────────────────────────────
|
| 65 |
-
|
| 66 |
-
You MUST follow these rules for the References section:
|
| 67 |
-
|
| 68 |
-
1. You may ONLY cite papers that appear in the Evidence section above
|
| 69 |
-
2. Every reference URL must EXACTLY match a provided evidence URL
|
| 70 |
-
3. Do NOT invent, fabricate, or hallucinate any references
|
| 71 |
-
4. Do NOT modify paper titles, authors, dates, or URLs
|
| 72 |
-
5. If unsure about a citation, OMIT it rather than guess
|
| 73 |
-
6. Copy URLs exactly as provided - do not create similar-looking URLs
|
| 74 |
-
|
| 75 |
-
VIOLATION OF THESE RULES PRODUCES DANGEROUS MISINFORMATION.
|
| 76 |
-
─────────────────────────────────────────────────────────────────────────────"""
|
| 77 |
|
| 78 |
|
| 79 |
# Keep SYSTEM_PROMPT for backwards compatibility
|
|
@@ -83,67 +62,61 @@ SYSTEM_PROMPT = get_system_prompt()
|
|
| 83 |
async def format_report_prompt(
|
| 84 |
query: str,
|
| 85 |
evidence: list["Evidence"],
|
| 86 |
-
hypotheses: list["
|
| 87 |
-
assessment:
|
| 88 |
metadata: dict[str, Any],
|
| 89 |
embeddings: "EmbeddingServiceProtocol | None" = None,
|
| 90 |
) -> str:
|
| 91 |
"""Format prompt for report generation.
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
"""
|
| 95 |
-
# Select diverse evidence (
|
| 96 |
-
selected = await select_diverse_evidence(evidence, n=
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
f" **Source**: {e.citation.source}\n"
|
| 109 |
-
f" **Content**: {truncate_at_sentence(e.content, 200)}\n"
|
| 110 |
-
)
|
| 111 |
-
evidence_summary = "\n".join(evidence_lines)
|
| 112 |
-
|
| 113 |
if hypotheses:
|
| 114 |
-
|
|
|
|
| 115 |
for h in hypotheses:
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
)
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
return f"""Generate a
|
| 127 |
-
|
| 128 |
-
##
|
| 129 |
-
{
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
{
|
| 134 |
-
|
| 135 |
-
## Hypotheses
|
| 136 |
-
{
|
| 137 |
-
|
| 138 |
-
##
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
## Metadata
|
| 144 |
-
- Sources Searched: {sources}
|
| 145 |
-
- Search Iterations: {metadata.get("iterations", 0)}
|
| 146 |
-
|
| 147 |
-
Generate a complete ResearchReport with all sections filled in.
|
| 148 |
-
|
| 149 |
-
REMINDER: Only cite papers from the Evidence section above. Copy URLs exactly."""
|
|
|
|
| 7 |
|
| 8 |
if TYPE_CHECKING:
|
| 9 |
from src.services.embedding_protocol import EmbeddingServiceProtocol
|
| 10 |
+
from src.utils.models import Evidence, HypothesisAssessment
|
| 11 |
|
| 12 |
|
| 13 |
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 14 |
"""Get the system prompt for the report agent."""
|
| 15 |
config = get_domain_config(domain)
|
| 16 |
+
|
| 17 |
+
return f"""You are a scientific writer specializing in {config.name}.
|
| 18 |
+
Your role is to synthesize evidence into clear recommendations for interventions
|
| 19 |
+
with proper safety considerations.
|
| 20 |
+
|
| 21 |
+
When asked to synthesize:
|
| 22 |
+
|
| 23 |
+
Generate a structured report with these sections:
|
| 24 |
+
|
| 25 |
+
## Executive Summary
|
| 26 |
+
Brief overview of findings and recommendation
|
| 27 |
+
|
| 28 |
+
## Methodology
|
| 29 |
+
Databases searched, queries used, evidence reviewed
|
| 30 |
+
|
| 31 |
+
## Key Findings
|
| 32 |
+
### Mechanism of Action
|
| 33 |
+
- Molecular targets
|
| 34 |
+
- Biological pathways
|
| 35 |
+
- Proposed mechanism
|
| 36 |
+
|
| 37 |
+
### Clinical Evidence
|
| 38 |
+
- Preclinical studies
|
| 39 |
+
- Clinical trials
|
| 40 |
+
- Safety profile
|
| 41 |
+
|
| 42 |
+
## Candidates
|
| 43 |
+
List specific candidates with potential
|
| 44 |
+
|
| 45 |
+
## Limitations
|
| 46 |
+
Gaps in evidence, conflicting data, caveats
|
| 47 |
+
|
| 48 |
+
## Conclusion
|
| 49 |
+
Final recommendation with confidence level
|
| 50 |
+
|
| 51 |
+
## References
|
| 52 |
+
Use the 'get_bibliography' tool to fetch the complete list of citations.
|
| 53 |
+
Format them as a numbered list.
|
| 54 |
+
|
| 55 |
+
Be comprehensive but concise. Cite evidence for all claims."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
# Keep SYSTEM_PROMPT for backwards compatibility
|
|
|
|
| 62 |
async def format_report_prompt(
|
| 63 |
query: str,
|
| 64 |
evidence: list["Evidence"],
|
| 65 |
+
hypotheses: list["HypothesisAssessment"] | list[Any],
|
| 66 |
+
assessment: Any,
|
| 67 |
metadata: dict[str, Any],
|
| 68 |
embeddings: "EmbeddingServiceProtocol | None" = None,
|
| 69 |
) -> str:
|
| 70 |
"""Format prompt for report generation.
|
| 71 |
|
| 72 |
+
Args:
|
| 73 |
+
query: Research query
|
| 74 |
+
evidence: Collected evidence
|
| 75 |
+
hypotheses: Generated hypotheses
|
| 76 |
+
assessment: Judge assessment details
|
| 77 |
+
metadata: Search metadata
|
| 78 |
+
embeddings: Optional embedding service for diverse selection
|
| 79 |
"""
|
| 80 |
+
# Select diverse evidence (max 15 for report)
|
| 81 |
+
selected = await select_diverse_evidence(evidence, n=15, query=query, embeddings=embeddings)
|
| 82 |
+
|
| 83 |
+
evidence_text = "\n".join(
|
| 84 |
+
[
|
| 85 |
+
f"- **{e.citation.title}** ({e.citation.source}): "
|
| 86 |
+
f"{truncate_at_sentence(e.content, 400)}"
|
| 87 |
+
for e in selected
|
| 88 |
+
]
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Format hypotheses if available
|
| 92 |
+
hypotheses_text = "No specific hypotheses generated."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
if hypotheses:
|
| 94 |
+
# Handle both Pydantic models and dicts/objects
|
| 95 |
+
h_list = []
|
| 96 |
for h in hypotheses:
|
| 97 |
+
if hasattr(h, "hypotheses"):
|
| 98 |
+
for item in h.hypotheses:
|
| 99 |
+
h_list.append(f"- {item.drug} -> {item.target} -> {item.effect}")
|
| 100 |
+
elif isinstance(h, dict):
|
| 101 |
+
h_list.append(str(h))
|
| 102 |
+
else:
|
| 103 |
+
h_list.append(str(h))
|
| 104 |
+
if h_list:
|
| 105 |
+
hypotheses_text = "\n".join(h_list)
|
| 106 |
+
|
| 107 |
+
return f"""Generate a comprehensive research report for: "{query}""
|
| 108 |
+
|
| 109 |
+
## Context
|
| 110 |
+
- **Sources Searched**: {", ".join(metadata.get("sources", []))}
|
| 111 |
+
- **Iterations**: {metadata.get("iterations", 0)}
|
| 112 |
+
|
| 113 |
+
## Evidence ({len(selected)} key papers)
|
| 114 |
+
{evidence_text}
|
| 115 |
+
|
| 116 |
+
## Generated Hypotheses
|
| 117 |
+
{hypotheses_text}
|
| 118 |
+
|
| 119 |
+
## Task
|
| 120 |
+
Synthesize this information into a structured report following the Executive Summary format.
|
| 121 |
+
Focus on clinical applicability and safety.
|
| 122 |
+
Use specific citations from the evidence list."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/prompts/search.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Prompts for Search Agent."""
|
| 2 |
+
|
| 3 |
+
from src.config.domain import ResearchDomain, get_domain_config
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_system_prompt(domain: ResearchDomain | str | None = None) -> str:
|
| 7 |
+
"""Get the system prompt for the search agent."""
|
| 8 |
+
config = get_domain_config(domain)
|
| 9 |
+
|
| 10 |
+
return f"""You are a biomedical search specialist. When asked to find evidence:
|
| 11 |
+
|
| 12 |
+
1. Analyze the request to determine what to search for
|
| 13 |
+
2. Extract key search terms (drug names, disease names, mechanisms)
|
| 14 |
+
3. Use the appropriate search tools:
|
| 15 |
+
- search_pubmed for peer-reviewed papers
|
| 16 |
+
- search_clinical_trials for clinical studies
|
| 17 |
+
- search_preprints for cutting-edge findings
|
| 18 |
+
4. Summarize what you found and highlight key evidence
|
| 19 |
+
|
| 20 |
+
Be thorough - search multiple databases when appropriate.
|
| 21 |
+
Focus on finding: mechanisms of action, clinical evidence, and specific findings
|
| 22 |
+
related to {config.name}."""
|
tests/unit/agents/test_magentic_agents_domain.py
CHANGED
|
@@ -29,7 +29,7 @@ class TestMagenticAgentsDomain:
|
|
| 29 |
|
| 30 |
# Verify domain-specific judge system prompt is passed through
|
| 31 |
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 32 |
-
assert SEXUAL_HEALTH_CONFIG.
|
| 33 |
|
| 34 |
@patch("src.agents.magentic_agents.ChatAgent")
|
| 35 |
@patch("src.agents.magentic_agents.get_chat_client")
|
|
@@ -44,4 +44,4 @@ class TestMagenticAgentsDomain:
|
|
| 44 |
create_report_agent(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 45 |
# Check instructions contains domain prompt
|
| 46 |
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 47 |
-
assert SEXUAL_HEALTH_CONFIG.
|
|
|
|
| 29 |
|
| 30 |
# Verify domain-specific judge system prompt is passed through
|
| 31 |
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 32 |
+
assert SEXUAL_HEALTH_CONFIG.name in call_kwargs["instructions"]
|
| 33 |
|
| 34 |
@patch("src.agents.magentic_agents.ChatAgent")
|
| 35 |
@patch("src.agents.magentic_agents.get_chat_client")
|
|
|
|
| 44 |
create_report_agent(domain=ResearchDomain.SEXUAL_HEALTH)
|
| 45 |
# Check instructions contains domain prompt
|
| 46 |
call_kwargs = mock_agent_cls.call_args.kwargs
|
| 47 |
+
assert SEXUAL_HEALTH_CONFIG.name in call_kwargs["instructions"]
|
tests/unit/config/test_domain.py
CHANGED
|
@@ -22,7 +22,6 @@ class TestGetDomainConfig:
|
|
| 22 |
def test_explicit_sexual_health(self):
|
| 23 |
config = get_domain_config(ResearchDomain.SEXUAL_HEALTH)
|
| 24 |
assert "Sexual Health" in config.report_title
|
| 25 |
-
assert "sexual health" in config.judge_system_prompt.lower()
|
| 26 |
|
| 27 |
def test_accepts_string(self):
|
| 28 |
config = get_domain_config("sexual_health")
|
|
@@ -41,9 +40,7 @@ class TestGetDomainConfig:
|
|
| 41 |
required_fields = [
|
| 42 |
"name",
|
| 43 |
"report_title",
|
| 44 |
-
"
|
| 45 |
-
"hypothesis_system_prompt",
|
| 46 |
-
"report_system_prompt",
|
| 47 |
]
|
| 48 |
config = get_domain_config(ResearchDomain.SEXUAL_HEALTH)
|
| 49 |
for field in required_fields:
|
|
|
|
| 22 |
def test_explicit_sexual_health(self):
|
| 23 |
config = get_domain_config(ResearchDomain.SEXUAL_HEALTH)
|
| 24 |
assert "Sexual Health" in config.report_title
|
|
|
|
| 25 |
|
| 26 |
def test_accepts_string(self):
|
| 27 |
config = get_domain_config("sexual_health")
|
|
|
|
| 40 |
required_fields = [
|
| 41 |
"name",
|
| 42 |
"report_title",
|
| 43 |
+
"search_description",
|
|
|
|
|
|
|
| 44 |
]
|
| 45 |
config = get_domain_config(ResearchDomain.SEXUAL_HEALTH)
|
| 46 |
for field in required_fields:
|
tests/unit/prompts/test_hypothesis_prompt_domain.py
CHANGED
|
@@ -7,11 +7,11 @@ from src.prompts.hypothesis import get_system_prompt
|
|
| 7 |
class TestHypothesisPromptDomain:
|
| 8 |
def test_get_system_prompt_default(self):
|
| 9 |
prompt = get_system_prompt()
|
| 10 |
-
assert SEXUAL_HEALTH_CONFIG.
|
| 11 |
-
assert "Your role is to generate
|
| 12 |
|
| 13 |
def test_get_system_prompt_sexual_health(self):
|
| 14 |
prompt = get_system_prompt(ResearchDomain.SEXUAL_HEALTH)
|
| 15 |
-
assert SEXUAL_HEALTH_CONFIG.
|
| 16 |
assert "sexual health" in prompt.lower()
|
| 17 |
-
assert "Your role is to generate
|
|
|
|
| 7 |
class TestHypothesisPromptDomain:
|
| 8 |
def test_get_system_prompt_default(self):
|
| 9 |
prompt = get_system_prompt()
|
| 10 |
+
assert SEXUAL_HEALTH_CONFIG.name in prompt
|
| 11 |
+
assert "Your role is to generate evidence-based hypotheses" in prompt
|
| 12 |
|
| 13 |
def test_get_system_prompt_sexual_health(self):
|
| 14 |
prompt = get_system_prompt(ResearchDomain.SEXUAL_HEALTH)
|
| 15 |
+
assert SEXUAL_HEALTH_CONFIG.name in prompt
|
| 16 |
assert "sexual health" in prompt.lower()
|
| 17 |
+
assert "Your role is to generate evidence-based hypotheses" in prompt
|
tests/unit/prompts/test_judge_prompt.py
CHANGED
|
@@ -55,7 +55,5 @@ def test_prompt_includes_question_at_edges():
|
|
| 55 |
start_content = "\n".join(lines[:10])
|
| 56 |
assert question in start_content
|
| 57 |
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
assert question in end_content
|
| 61 |
-
assert "REMINDER: Original Question" in end_content
|
|
|
|
| 55 |
start_content = "\n".join(lines[:10])
|
| 56 |
assert question in start_content
|
| 57 |
|
| 58 |
+
# End check removed as new prompt structure doesn't enforce it
|
| 59 |
+
# but we still ensure the prompt is well-formed
|
|
|
|
|
|
tests/unit/prompts/test_judge_prompt_domain.py
CHANGED
|
@@ -7,25 +7,23 @@ from src.prompts.judge import format_user_prompt, get_scoring_prompt, get_system
|
|
| 7 |
class TestJudgePromptDomain:
|
| 8 |
def test_get_system_prompt_default(self):
|
| 9 |
prompt = get_system_prompt()
|
| 10 |
-
assert SEXUAL_HEALTH_CONFIG.
|
| 11 |
-
assert "
|
| 12 |
|
| 13 |
def test_get_system_prompt_sexual_health(self):
|
| 14 |
prompt = get_system_prompt(ResearchDomain.SEXUAL_HEALTH)
|
| 15 |
-
assert SEXUAL_HEALTH_CONFIG.
|
| 16 |
assert "sexual health" in prompt.lower()
|
| 17 |
-
assert "
|
| 18 |
|
| 19 |
def test_get_scoring_prompt_default(self):
|
| 20 |
prompt = get_scoring_prompt()
|
| 21 |
-
assert
|
| 22 |
|
| 23 |
def test_format_user_prompt_default(self):
|
| 24 |
prompt = format_user_prompt("query", [])
|
| 25 |
-
assert
|
| 26 |
-
assert "sexual health" in prompt.lower()
|
| 27 |
|
| 28 |
def test_format_user_prompt_with_domain(self):
|
| 29 |
prompt = format_user_prompt("query", [], domain=ResearchDomain.SEXUAL_HEALTH)
|
| 30 |
-
assert
|
| 31 |
-
assert "sexual health" in prompt.lower()
|
|
|
|
| 7 |
class TestJudgePromptDomain:
|
| 8 |
def test_get_system_prompt_default(self):
|
| 9 |
prompt = get_system_prompt()
|
| 10 |
+
assert SEXUAL_HEALTH_CONFIG.name in prompt
|
| 11 |
+
assert "You are an expert research judge" in prompt
|
| 12 |
|
| 13 |
def test_get_system_prompt_sexual_health(self):
|
| 14 |
prompt = get_system_prompt(ResearchDomain.SEXUAL_HEALTH)
|
| 15 |
+
assert SEXUAL_HEALTH_CONFIG.name in prompt
|
| 16 |
assert "sexual health" in prompt.lower()
|
| 17 |
+
assert "You are an expert research judge" in prompt
|
| 18 |
|
| 19 |
def test_get_scoring_prompt_default(self):
|
| 20 |
prompt = get_scoring_prompt()
|
| 21 |
+
assert "Score this evidence for relevance" in prompt
|
| 22 |
|
| 23 |
def test_format_user_prompt_default(self):
|
| 24 |
prompt = format_user_prompt("query", [])
|
| 25 |
+
assert "Score this evidence for relevance" in prompt
|
|
|
|
| 26 |
|
| 27 |
def test_format_user_prompt_with_domain(self):
|
| 28 |
prompt = format_user_prompt("query", [], domain=ResearchDomain.SEXUAL_HEALTH)
|
| 29 |
+
assert "Score this evidence for relevance" in prompt
|
|
|
tests/unit/prompts/test_report_prompt_domain.py
CHANGED
|
@@ -7,11 +7,11 @@ from src.prompts.report import get_system_prompt
|
|
| 7 |
class TestReportPromptDomain:
|
| 8 |
def test_get_system_prompt_default(self):
|
| 9 |
prompt = get_system_prompt()
|
| 10 |
-
assert SEXUAL_HEALTH_CONFIG.
|
| 11 |
assert "Your role is to synthesize evidence" in prompt
|
| 12 |
|
| 13 |
def test_get_system_prompt_sexual_health(self):
|
| 14 |
prompt = get_system_prompt(ResearchDomain.SEXUAL_HEALTH)
|
| 15 |
-
assert SEXUAL_HEALTH_CONFIG.
|
| 16 |
assert "sexual health" in prompt.lower()
|
| 17 |
assert "Your role is to synthesize evidence" in prompt
|
|
|
|
| 7 |
class TestReportPromptDomain:
|
| 8 |
def test_get_system_prompt_default(self):
|
| 9 |
prompt = get_system_prompt()
|
| 10 |
+
assert SEXUAL_HEALTH_CONFIG.name in prompt
|
| 11 |
assert "Your role is to synthesize evidence" in prompt
|
| 12 |
|
| 13 |
def test_get_system_prompt_sexual_health(self):
|
| 14 |
prompt = get_system_prompt(ResearchDomain.SEXUAL_HEALTH)
|
| 15 |
+
assert SEXUAL_HEALTH_CONFIG.name in prompt
|
| 16 |
assert "sexual health" in prompt.lower()
|
| 17 |
assert "Your role is to synthesize evidence" in prompt
|