QALoop / qa_annotate /api /analysis.py
jackkuo's picture
Seed demo LLM analysis reports by UI language.
4c341cd
Raw
History Blame Contribute Delete
7.52 kB
"""标注结果分析API"""
import logging
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy.orm import Session
from qa_annotate.api.auth import get_current_superuser
from qa_annotate.database.base import get_db
from qa_annotate.database.crud import AnnotationResultAnalysisCRUD, LlmAnalysisCacheCRUD
from qa_annotate.schema.annotation import ProjectAnnotationAnalysis
from qa_annotate.schema.user import User
from qa_annotate.services.llm_service import (
build_notes_analysis_prompt,
call_llm_chat,
get_llm_config,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/analysis", tags=["analysis"])
class LlmAnalysisResponse(BaseModel):
analysis: str
model_name: str
notes_count: int
class CachedAnalysisResponse(BaseModel):
analysis: str
model_name: str
notes_count: int
created_at: str | None = None
updated_at: str | None = None
class LlmTestResponse(BaseModel):
success: bool
message: str
model_name: str | None = None
@router.get(
"/projects/{project_id}/annotation-stats",
response_model=ProjectAnnotationAnalysis,
)
def get_project_annotation_stats(
project_id: int,
db: Session = Depends(get_db),
):
"""获取项目的标注结果统计分析
返回项目下所有数据集的标注结果统计,包括:
- 总体统计:数据集数、QA对数、标注数、完成率
- 按配置统计:每个标注配置的详细统计
- Notes汇总:所有标注理由
"""
stats = AnnotationResultAnalysisCRUD.get_project_annotation_stats(db, project_id)
if not stats:
raise HTTPException(status_code=404, detail="项目不存在")
return stats
@router.post(
"/projects/{project_id}/analyze-notes", response_model=LlmAnalysisResponse
)
async def analyze_notes_with_llm(
project_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_superuser),
lang: str = Query("zh", description="Output language: zh or en"),
):
"""使用 LLM 分析标注备注
需要 superuser 权限。
"""
# 获取 LLM 配置
llm_config = get_llm_config(db)
api_key = llm_config.get("api_key")
base_url = llm_config.get("base_url")
model_name = llm_config.get("model_name")
if not api_key or not base_url or not model_name:
raise HTTPException(
status_code=400,
detail="请先在系统配置中配置 LLM API Key、Base URL 和 Model Name",
)
# 获取统计数据和 notes
stats = AnnotationResultAnalysisCRUD.get_project_annotation_stats(db, project_id)
if not stats:
raise HTTPException(status_code=404, detail="项目不存在")
# stats 是 dict
notes_summary = stats.get("notes_summary", []) if isinstance(stats, dict) else []
if not notes_summary or len(notes_summary) == 0:
raise HTTPException(status_code=400, detail="暂无标注备注可供分析")
# 计算总备注数
total_notes = sum(
item["count"] if isinstance(item, dict) else item.count
for item in notes_summary
)
notes_data = [
{
"config_name": item["config_name"] if isinstance(item, dict) else item.config_name,
"count": item["count"] if isinstance(item, dict) else item.count,
"notes": item["notes"] if isinstance(item, dict) else item.notes,
}
for item in notes_summary
]
# 构造 prompt(传入完整统计信息 + 备注)
stats_dict = stats if isinstance(stats, dict) else {}
system_prompt, user_message = build_notes_analysis_prompt(notes_data, stats=stats_dict, language=lang)
# 调用 LLM
try:
analysis = await call_llm_chat(
api_key=api_key,
base_url=base_url,
model_name=model_name,
system_prompt=system_prompt,
user_message=user_message,
)
except Exception as e:
logger.error(f"LLM API 调用失败: {e}")
raise HTTPException(
status_code=502, detail=f"LLM API 调用失败: {e}"
) from e
# 保存到缓存
try:
LlmAnalysisCacheCRUD.save(
db=db,
project_id=project_id,
analysis_text=analysis,
model_name=model_name,
notes_count=total_notes,
language=lang,
)
except Exception as e:
logger.warning(f"保存分析缓存失败(不影响返回结果): {e}")
return LlmAnalysisResponse(
analysis=analysis,
model_name=model_name,
notes_count=total_notes,
)
@router.get("/projects/{project_id}/cached-analysis")
def get_cached_analysis(
project_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_superuser),
lang: str = Query("zh", description="Report language: zh or en"),
):
"""获取项目缓存的 LLM 分析报告
需要 superuser 权限。
"""
language = "en" if lang.lower().startswith("en") else "zh"
cached = LlmAnalysisCacheCRUD.get_by_project(
db, project_id=project_id, language=language
)
if not cached:
raise HTTPException(status_code=404, detail="暂无缓存的分析报告")
return CachedAnalysisResponse(
analysis=cached["analysis"],
model_name=cached["model_name"],
notes_count=cached["notes_count"],
created_at=cached["created_at"].isoformat() if cached["created_at"] else None,
updated_at=cached["updated_at"].isoformat() if cached["updated_at"] else None,
)
@router.post("/test-llm-connection", response_model=LlmTestResponse)
async def test_llm_connection(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_superuser),
lang: str = Query("zh", description="Output language: zh or en"),
):
"""测试 LLM API 连接是否正常"""
llm_config = get_llm_config(db)
api_key = llm_config.get("api_key")
base_url = llm_config.get("base_url")
model_name = llm_config.get("model_name")
not_configured_msg = (
"请先配置 LLM API Key、Base URL 和 Model Name"
if lang == "zh"
else "Please configure LLM API Key, Base URL and Model Name first"
)
if not api_key or not base_url or not model_name:
return LlmTestResponse(
success=False,
message=not_configured_msg,
)
if lang == "zh":
test_message = "请回复「连接成功」四个字。"
success_prefix = "连接成功,模型回复:"
fail_prefix = "连接失败:"
else:
test_message = 'Please reply with the words "Connection successful".'
success_prefix = "Connection successful, model replied: "
fail_prefix = "Connection failed: "
try:
reply = await call_llm_chat(
api_key=api_key,
base_url=base_url,
model_name=model_name,
system_prompt="You are a helpful assistant.",
user_message=test_message,
)
return LlmTestResponse(
success=True,
message=f"{success_prefix}{reply[:100]}",
model_name=model_name,
)
except Exception as e:
logger.error(f"LLM 连接测试失败: {e}")
return LlmTestResponse(
success=False,
message=f"{fail_prefix}{e}",
model_name=model_name,
)