|
|
from asgi_correlation_id import correlation_id |
|
|
from fastapi import APIRouter |
|
|
from fastapi.responses import StreamingResponse, JSONResponse |
|
|
|
|
|
from utils.bio_logger import bio_logger as logger |
|
|
from utils.i18n_util import ( |
|
|
get_language, |
|
|
create_error_response, |
|
|
) |
|
|
from utils.i18n_context import with_language |
|
|
|
|
|
from bio_requests.chat_request import ChatRequest |
|
|
|
|
|
from service.chat import ChatService |
|
|
|
|
|
router = APIRouter(prefix="/mcp", tags=["MCP"]) |
|
|
|
|
|
|
|
|
@router.post("/bio_qa", response_model=None, operation_id="bio_qa_stream_chat") |
|
|
async def bio_qa(query: str, lang: str = "en"): |
|
|
""" |
|
|
Biomedical Q&A with Evidence-Based RAG System |
|
|
|
|
|
This MCP tool provides comprehensive, research-backed answers to biological and medical questions |
|
|
using a sophisticated Retrieval-Augmented Generation (RAG) system. The tool performs multi-source |
|
|
research and evidence-based synthesis to deliver accurate, well-cited responses. |
|
|
|
|
|
**Process Overview:** |
|
|
1. **Query Analysis & Rewriting** (30-45 seconds) |
|
|
- Analyzes the user's question and identifies key biomedical concepts |
|
|
- Performs intelligent query rewriting to improve search effectiveness |
|
|
- Generates multiple search variations to capture different aspects of the question |
|
|
- Optimizes search terms for both PubMed and web search engines |
|
|
|
|
|
2. **Multi-Source Literature Search** (60-90 seconds) |
|
|
- **PubMed Database Search**: Searches scientific literature database for peer-reviewed papers |
|
|
- **Web Search**: Conducts web searches for recent developments, clinical guidelines, and additional context |
|
|
- **Concurrent Processing**: Performs both searches simultaneously for efficiency |
|
|
- **Content Extraction**: Extracts and processes relevant content from search results |
|
|
|
|
|
3. **Intelligent Reranking** (30-45 seconds) |
|
|
- Ranks search results by relevance to the specific question |
|
|
- Filters out low-quality or irrelevant content |
|
|
- Prioritizes recent, authoritative, and highly relevant sources |
|
|
- Ensures diversity in source types (papers, guidelines, reviews, etc.) |
|
|
|
|
|
4. **Evidence-Based Answer Generation** (60-90 seconds) |
|
|
- Synthesizes information from multiple high-quality sources |
|
|
- Generates comprehensive, well-structured answers |
|
|
- Includes proper citations and references |
|
|
- Provides evidence-based explanations with source attribution |
|
|
|
|
|
**Input:** |
|
|
- query (string): A biological or medical question |
|
|
Examples: "What causes Alzheimer's disease?", "How do mRNA vaccines work?", |
|
|
"What are the latest treatments for diabetes?", "Explain CRISPR gene editing" |
|
|
- lang (string, optional): Language preference ("en" for English, "zh" for Chinese) |
|
|
- is_pubmed (boolean, optional): Enable PubMed scientific literature search (default: True) |
|
|
- When True: Searches peer-reviewed scientific papers for authoritative evidence |
|
|
- When False: Skips PubMed search to reduce processing time |
|
|
- is_web (boolean, optional): Enable web search for additional context (default: True) |
|
|
- When True: Searches web for recent developments, clinical guidelines, and additional context |
|
|
- When False: Skips web search to reduce processing time |
|
|
|
|
|
**Output:** |
|
|
- A comprehensive answer with the following components: |
|
|
* **Main Answer**: Evidence-based response to the question |
|
|
* **Citations**: Properly formatted references to source materials |
|
|
* **Source Links**: Direct links to PubMed papers and web sources |
|
|
* **Evidence Summary**: Overview of the evidence supporting the answer |
|
|
|
|
|
**Key Features:** |
|
|
- **Real-time Streaming**: Provides progress updates via Server-Sent Events (SSE) |
|
|
- **Multi-Source Research**: Combines PubMed scientific literature with web-based information |
|
|
- **Intelligent Query Processing**: Uses advanced query rewriting for better search results |
|
|
- **Quality Control**: Reranks results to ensure relevance and authority |
|
|
- **Evidence-Based Answers**: All claims are supported by cited sources |
|
|
- **Comprehensive Coverage**: Covers genetics, molecular biology, diseases, treatments, and more |
|
|
|
|
|
**Expected Duration:** 3 minutes (may vary based on query complexity and search configuration) |
|
|
|
|
|
**Performance Notes:** |
|
|
- Full search (is_pubmed=True, is_web=True): ~3 minutes with comprehensive coverage |
|
|
- PubMed only (is_pubmed=True, is_web=False): ~2 minutes, focused on scientific literature |
|
|
- Web only (is_pubmed=False, is_web=True): ~2 minutes, focused on recent developments |
|
|
- Minimal search (is_pubmed=False, is_web=False): ~1 minute, basic query processing only |
|
|
|
|
|
**Use Cases:** |
|
|
- Medical education and learning |
|
|
- Clinical decision support |
|
|
- Research background information |
|
|
- Patient education content |
|
|
- Healthcare professional training |
|
|
- Scientific literature exploration |
|
|
|
|
|
**Evidence Quality:** |
|
|
- Primary sources from peer-reviewed scientific journals |
|
|
- Recent clinical guidelines and recommendations |
|
|
- Authoritative medical websites and databases |
|
|
- Multiple source verification for key claims |
|
|
|
|
|
**Note:** This tool is specifically optimized for biomedical and healthcare questions. |
|
|
For best results, provide specific, well-defined questions about biological or medical topics. |
|
|
""" |
|
|
|
|
|
logger.info(f"{correlation_id.get()} Bio QA for {query}") |
|
|
chat_request = ChatRequest(query=query, language=lang, is_pubmed=True, is_web=True) |
|
|
|
|
|
language = get_language(chat_request.language) |
|
|
|
|
|
|
|
|
with with_language(language): |
|
|
try: |
|
|
chat_service = ChatService() |
|
|
return StreamingResponse( |
|
|
chat_service.generate_stream(chat_request), |
|
|
media_type="text/event-stream", |
|
|
headers={ |
|
|
"Connection": "keep-alive", |
|
|
"Cache-Control": "no-cache", |
|
|
}, |
|
|
) |
|
|
except Exception as e: |
|
|
logger.error(f"{correlation_id.get()} Stream chat error: {e}") |
|
|
error_response = create_error_response( |
|
|
error_key="service_unavailable", |
|
|
details=str(e), |
|
|
error_code=500, |
|
|
) |
|
|
return JSONResponse(content=error_response, status_code=500) |
|
|
|