ai-search-agent / app.py
leonsimon23's picture
Update app.py
3f38bdd verified
import os
import requests
import google.generativeai as genai
import logging
import asyncio
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any
# --- 配置 ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 从环境变量获取 API 密钥和后端 URL
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
SEARCH_API_BASE_URL = os.getenv("SEARCH_API_BASE_URL", "").strip()
# 配置 Google Gemini
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-2.5-flash')
# --- FastAPI 应用设置 ---
app = FastAPI(
title="AI Search Agent",
description="一个使用 Gemini-2.5-Flash 进行查询优化和中文结果摘要的智能中间层。",
version="2.1.0" # 版本更新
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- 数据模型 ---
class SearchRequest(BaseModel):
platform: str
query: str
max_results: int = 10
# --- 核心 AI 功能 ---
async def get_ai_keywords(natural_language_query: str) -> str:
# 关键词提炼函数保持不变
if not GEMINI_API_KEY:
logger.warning("GEMINI_API_KEY 未设置,将使用原始查询。")
return natural_language_query
prompt = f"""
You are an expert academic researcher. Your task is to convert a user's natural language query into a highly effective, concise, boolean-logic keyword string for searching academic databases like PubMed.
- Use boolean operators like AND, OR.
- Use parentheses for grouping.
- Focus on core concepts.
- Keep the string concise and in English.
- Do not add any explanation, markdown, or quotation marks. Just return the pure keyword string.
User Query: "{natural_language_query}"
Keyword String:
"""
try:
logger.info(f"向 Gemini 发送请求 [关键词提炼],查询: '{natural_language_query}'")
response = await gemini_model.generate_content_async(prompt)
optimized_query = response.text.strip()
logger.info(f"原始查询: '{natural_language_query}' -> Gemini 优化关键词: '{optimized_query}'")
if not optimized_query:
logger.warning("Gemini 返回空关键词,回退到原始查询。")
return natural_language_query
return optimized_query
except Exception as e:
logger.error(f"调用 Gemini API [关键词提炼] 失败: {e}")
return natural_language_query
# =================================================================
# BEGIN: 修改 - AI 摘要 Prompt
# =================================================================
async def summarize_results_with_ai(papers: List[Dict[str, Any]], original_query: str) -> str | None:
"""
使用 Gemini 根据搜索结果的摘要生成一个综合性的 **中文总结**。
"""
if not GEMINI_API_KEY or not papers:
return None
papers_for_summary = papers[:5]
context = ""
for i, paper in enumerate(papers_for_summary):
title = paper.get('title', 'No Title')
abstract = paper.get('abstract') or paper.get('summary', 'No Abstract Available.')
context += f"### Paper {i+1}: {title}\nAbstract: {abstract}\n\n"
# ================== 这是本次修改的核心 ==================
# 我们给了AI一个非常明确的指令:用简体中文回答。
# =========================================================
prompt = f"""
You are a professional medical research assistant. Based on the abstracts of the scientific papers provided below (which are in English), write a concise and easy-to-understand summary that directly answers the user's original research question.
**CRITICAL INSTRUCTION: Your entire response MUST be written in Simplified Chinese (简体中文).**
- Start with a direct introductory sentence in Chinese.
- Use bullet points (e.g., using '·' or '-') to list the key findings.
- Base your summary STRICTLY on the information given in the English abstracts. Do not add any outside knowledge.
- The summary should be in clear, accessible Chinese language.
USER'S ORIGINAL QUESTION: "{original_query}"
PROVIDED ENGLISH ABSTRACTS:
{context}
CONCISE SUMMARY (in Simplified Chinese):
"""
try:
logger.info(f"向 Gemini 发送请求 [中文结果摘要],基于 {len(papers_for_summary)} 篇论文。")
response = await gemini_model.generate_content_async(prompt)
summary = response.text.strip()
logger.info("Gemini 中文摘要生成成功。")
return summary
except Exception as e:
logger.error(f"调用 Gemini API [中文结果摘要] 失败: {e}")
return None
# =================================================================
# END: 修改
# =================================================================
# --- API 端点 ---
@app.get("/")
def read_root():
return {"status": "AI Search Agent is running"}
@app.post("/search")
async def intelligent_search(request: SearchRequest):
if not SEARCH_API_BASE_URL:
raise HTTPException(status_code=500, detail="SEARCH_API_BASE_URL 未配置")
# 1. 关键词提炼
optimized_query = await get_ai_keywords(request.query)
search_payload = {
"platform": request.platform,
"query": optimized_query,
"max_results": request.max_results
}
# 2. 调用搜索后端
try:
logger.info(f"向搜索后端发送请求: {search_payload}")
search_url = f"{SEARCH_API_BASE_URL}/search"
loop = asyncio.get_running_loop()
response = await loop.run_in_executor(
None,
lambda: requests.post(search_url, json=search_payload, timeout=30)
)
response.raise_for_status()
search_results_data = response.json()
except requests.exceptions.RequestException as e:
logger.error(f"调用搜索后端失败: {e}")
raise HTTPException(status_code=503, detail=f"无法连接到搜索服务: {str(e)}")
# 3. 生成 AI 中文摘要
ai_summary = None
if search_results_data and search_results_data.get("results"):
ai_summary = await summarize_results_with_ai(search_results_data["results"], request.query)
# 4. 组合最终响应
final_response = {
"original_query": request.query,
"optimized_query": optimized_query,
"ai_summary": ai_summary,
"results": search_results_data.get("results", [])
}
return final_response