jackkuo's picture
reinit repo
82bf89e
from asgi_correlation_id import correlation_id
from fastapi import APIRouter
from fastapi.responses import StreamingResponse, JSONResponse
from utils.bio_logger import bio_logger as logger
from utils.i18n_util import (
get_language,
create_error_response,
)
from utils.i18n_context import with_language
from bio_requests.chat_request import ChatRequest
from service.chat import ChatService
router = APIRouter(prefix="/mcp", tags=["MCP"])
@router.post("/bio_qa", response_model=None, operation_id="bio_qa_stream_chat")
async def bio_qa(query: str, lang: str = "en"):
"""
Biomedical Q&A with Evidence-Based RAG System
This MCP tool provides comprehensive, research-backed answers to biological and medical questions
using a sophisticated Retrieval-Augmented Generation (RAG) system. The tool performs multi-source
research and evidence-based synthesis to deliver accurate, well-cited responses.
**Process Overview:**
1. **Query Analysis & Rewriting** (30-45 seconds)
- Analyzes the user's question and identifies key biomedical concepts
- Performs intelligent query rewriting to improve search effectiveness
- Generates multiple search variations to capture different aspects of the question
- Optimizes search terms for both PubMed and web search engines
2. **Multi-Source Literature Search** (60-90 seconds)
- **PubMed Database Search**: Searches scientific literature database for peer-reviewed papers
- **Web Search**: Conducts web searches for recent developments, clinical guidelines, and additional context
- **Concurrent Processing**: Performs both searches simultaneously for efficiency
- **Content Extraction**: Extracts and processes relevant content from search results
3. **Intelligent Reranking** (30-45 seconds)
- Ranks search results by relevance to the specific question
- Filters out low-quality or irrelevant content
- Prioritizes recent, authoritative, and highly relevant sources
- Ensures diversity in source types (papers, guidelines, reviews, etc.)
4. **Evidence-Based Answer Generation** (60-90 seconds)
- Synthesizes information from multiple high-quality sources
- Generates comprehensive, well-structured answers
- Includes proper citations and references
- Provides evidence-based explanations with source attribution
**Input:**
- query (string): A biological or medical question
Examples: "What causes Alzheimer's disease?", "How do mRNA vaccines work?",
"What are the latest treatments for diabetes?", "Explain CRISPR gene editing"
- lang (string, optional): Language preference ("en" for English, "zh" for Chinese)
- is_pubmed (boolean, optional): Enable PubMed scientific literature search (default: True)
- When True: Searches peer-reviewed scientific papers for authoritative evidence
- When False: Skips PubMed search to reduce processing time
- is_web (boolean, optional): Enable web search for additional context (default: True)
- When True: Searches web for recent developments, clinical guidelines, and additional context
- When False: Skips web search to reduce processing time
**Output:**
- A comprehensive answer with the following components:
* **Main Answer**: Evidence-based response to the question
* **Citations**: Properly formatted references to source materials
* **Source Links**: Direct links to PubMed papers and web sources
* **Evidence Summary**: Overview of the evidence supporting the answer
**Key Features:**
- **Real-time Streaming**: Provides progress updates via Server-Sent Events (SSE)
- **Multi-Source Research**: Combines PubMed scientific literature with web-based information
- **Intelligent Query Processing**: Uses advanced query rewriting for better search results
- **Quality Control**: Reranks results to ensure relevance and authority
- **Evidence-Based Answers**: All claims are supported by cited sources
- **Comprehensive Coverage**: Covers genetics, molecular biology, diseases, treatments, and more
**Expected Duration:** 3 minutes (may vary based on query complexity and search configuration)
**Performance Notes:**
- Full search (is_pubmed=True, is_web=True): ~3 minutes with comprehensive coverage
- PubMed only (is_pubmed=True, is_web=False): ~2 minutes, focused on scientific literature
- Web only (is_pubmed=False, is_web=True): ~2 minutes, focused on recent developments
- Minimal search (is_pubmed=False, is_web=False): ~1 minute, basic query processing only
**Use Cases:**
- Medical education and learning
- Clinical decision support
- Research background information
- Patient education content
- Healthcare professional training
- Scientific literature exploration
**Evidence Quality:**
- Primary sources from peer-reviewed scientific journals
- Recent clinical guidelines and recommendations
- Authoritative medical websites and databases
- Multiple source verification for key claims
**Note:** This tool is specifically optimized for biomedical and healthcare questions.
For best results, provide specific, well-defined questions about biological or medical topics.
"""
logger.info(f"{correlation_id.get()} Bio QA for {query}")
chat_request = ChatRequest(query=query, language=lang, is_pubmed=True, is_web=True)
# 解析语言设置
language = get_language(chat_request.language)
# 使用上下文管理器设置语言
with with_language(language):
try:
chat_service = ChatService()
return StreamingResponse(
chat_service.generate_stream(chat_request),
media_type="text/event-stream",
headers={
"Connection": "keep-alive",
"Cache-Control": "no-cache",
},
)
except Exception as e:
logger.error(f"{correlation_id.get()} Stream chat error: {e}")
error_response = create_error_response(
error_key="service_unavailable",
details=str(e),
error_code=500,
)
return JSONResponse(content=error_response, status_code=500)