File size: 7,524 Bytes
35e7795 4c341cd 35e7795 4c341cd 35e7795 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | """标注结果分析API"""
import logging
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel
from sqlalchemy.orm import Session
from qa_annotate.api.auth import get_current_superuser
from qa_annotate.database.base import get_db
from qa_annotate.database.crud import AnnotationResultAnalysisCRUD, LlmAnalysisCacheCRUD
from qa_annotate.schema.annotation import ProjectAnnotationAnalysis
from qa_annotate.schema.user import User
from qa_annotate.services.llm_service import (
build_notes_analysis_prompt,
call_llm_chat,
get_llm_config,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/analysis", tags=["analysis"])
class LlmAnalysisResponse(BaseModel):
analysis: str
model_name: str
notes_count: int
class CachedAnalysisResponse(BaseModel):
analysis: str
model_name: str
notes_count: int
created_at: str | None = None
updated_at: str | None = None
class LlmTestResponse(BaseModel):
success: bool
message: str
model_name: str | None = None
@router.get(
"/projects/{project_id}/annotation-stats",
response_model=ProjectAnnotationAnalysis,
)
def get_project_annotation_stats(
project_id: int,
db: Session = Depends(get_db),
):
"""获取项目的标注结果统计分析
返回项目下所有数据集的标注结果统计,包括:
- 总体统计:数据集数、QA对数、标注数、完成率
- 按配置统计:每个标注配置的详细统计
- Notes汇总:所有标注理由
"""
stats = AnnotationResultAnalysisCRUD.get_project_annotation_stats(db, project_id)
if not stats:
raise HTTPException(status_code=404, detail="项目不存在")
return stats
@router.post(
"/projects/{project_id}/analyze-notes", response_model=LlmAnalysisResponse
)
async def analyze_notes_with_llm(
project_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_superuser),
lang: str = Query("zh", description="Output language: zh or en"),
):
"""使用 LLM 分析标注备注
需要 superuser 权限。
"""
# 获取 LLM 配置
llm_config = get_llm_config(db)
api_key = llm_config.get("api_key")
base_url = llm_config.get("base_url")
model_name = llm_config.get("model_name")
if not api_key or not base_url or not model_name:
raise HTTPException(
status_code=400,
detail="请先在系统配置中配置 LLM API Key、Base URL 和 Model Name",
)
# 获取统计数据和 notes
stats = AnnotationResultAnalysisCRUD.get_project_annotation_stats(db, project_id)
if not stats:
raise HTTPException(status_code=404, detail="项目不存在")
# stats 是 dict
notes_summary = stats.get("notes_summary", []) if isinstance(stats, dict) else []
if not notes_summary or len(notes_summary) == 0:
raise HTTPException(status_code=400, detail="暂无标注备注可供分析")
# 计算总备注数
total_notes = sum(
item["count"] if isinstance(item, dict) else item.count
for item in notes_summary
)
notes_data = [
{
"config_name": item["config_name"] if isinstance(item, dict) else item.config_name,
"count": item["count"] if isinstance(item, dict) else item.count,
"notes": item["notes"] if isinstance(item, dict) else item.notes,
}
for item in notes_summary
]
# 构造 prompt(传入完整统计信息 + 备注)
stats_dict = stats if isinstance(stats, dict) else {}
system_prompt, user_message = build_notes_analysis_prompt(notes_data, stats=stats_dict, language=lang)
# 调用 LLM
try:
analysis = await call_llm_chat(
api_key=api_key,
base_url=base_url,
model_name=model_name,
system_prompt=system_prompt,
user_message=user_message,
)
except Exception as e:
logger.error(f"LLM API 调用失败: {e}")
raise HTTPException(
status_code=502, detail=f"LLM API 调用失败: {e}"
) from e
# 保存到缓存
try:
LlmAnalysisCacheCRUD.save(
db=db,
project_id=project_id,
analysis_text=analysis,
model_name=model_name,
notes_count=total_notes,
language=lang,
)
except Exception as e:
logger.warning(f"保存分析缓存失败(不影响返回结果): {e}")
return LlmAnalysisResponse(
analysis=analysis,
model_name=model_name,
notes_count=total_notes,
)
@router.get("/projects/{project_id}/cached-analysis")
def get_cached_analysis(
project_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_superuser),
lang: str = Query("zh", description="Report language: zh or en"),
):
"""获取项目缓存的 LLM 分析报告
需要 superuser 权限。
"""
language = "en" if lang.lower().startswith("en") else "zh"
cached = LlmAnalysisCacheCRUD.get_by_project(
db, project_id=project_id, language=language
)
if not cached:
raise HTTPException(status_code=404, detail="暂无缓存的分析报告")
return CachedAnalysisResponse(
analysis=cached["analysis"],
model_name=cached["model_name"],
notes_count=cached["notes_count"],
created_at=cached["created_at"].isoformat() if cached["created_at"] else None,
updated_at=cached["updated_at"].isoformat() if cached["updated_at"] else None,
)
@router.post("/test-llm-connection", response_model=LlmTestResponse)
async def test_llm_connection(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_superuser),
lang: str = Query("zh", description="Output language: zh or en"),
):
"""测试 LLM API 连接是否正常"""
llm_config = get_llm_config(db)
api_key = llm_config.get("api_key")
base_url = llm_config.get("base_url")
model_name = llm_config.get("model_name")
not_configured_msg = (
"请先配置 LLM API Key、Base URL 和 Model Name"
if lang == "zh"
else "Please configure LLM API Key, Base URL and Model Name first"
)
if not api_key or not base_url or not model_name:
return LlmTestResponse(
success=False,
message=not_configured_msg,
)
if lang == "zh":
test_message = "请回复「连接成功」四个字。"
success_prefix = "连接成功,模型回复:"
fail_prefix = "连接失败:"
else:
test_message = 'Please reply with the words "Connection successful".'
success_prefix = "Connection successful, model replied: "
fail_prefix = "Connection failed: "
try:
reply = await call_llm_chat(
api_key=api_key,
base_url=base_url,
model_name=model_name,
system_prompt="You are a helpful assistant.",
user_message=test_message,
)
return LlmTestResponse(
success=True,
message=f"{success_prefix}{reply[:100]}",
model_name=model_name,
)
except Exception as e:
logger.error(f"LLM 连接测试失败: {e}")
return LlmTestResponse(
success=False,
message=f"{fail_prefix}{e}",
model_name=model_name,
)
|