Spaces:

youngtsai
/

career_app

Build error

Youngger9765

refactor: reorganize project structure and add new features

e66ee1b 7 months ago

13.1 kB

	"""API endpoints for case report generation"""

	import json
	from collections.abc import AsyncGenerator

	from fastapi import APIRouter, Depends
	from fastapi.responses import StreamingResponse
	from pydantic import BaseModel
	from sqlalchemy import Float, Integer, String, bindparam, text
	from sqlalchemy.ext.asyncio import AsyncSession

	from backend.database import get_db
	from backend.services.openai_service import OpenAIService

	router = APIRouter(prefix="/api/report", tags=["report"])


	class ReportRequest(BaseModel):
	transcript: str
	top_k: int = 5
	similarity_threshold: float = 0.5


	async def generate_report_stream(
	transcript: str,
	top_k: int,
	similarity_threshold: float,
	num_participants: int,
	db: AsyncSession,
	) -> AsyncGenerator[str, None]:
	"""
	Generate case report with real-time progress updates using SSE

	Steps:
	1. Parse transcript structure
	2. Identify key issues and techniques
	3. RAG search for relevant theories
	4. Generate structured report
	"""

	try:
	openai_service = OpenAIService()

	# Step 1: Parse transcript
	yield f"data: {json.dumps({'step': 1, 'status': 'processing', 'message': '正在分析逐字稿結構...'}, ensure_ascii=False)}\n\n"

	parse_prompt = f"""請分析以下職涯諮詢逐字稿，提取關鍵資訊：

	逐字稿：
	{transcript}

	請以 JSON 格式回答（只要 JSON，不要其他文字）：
	{{
	"client_name": "案主化名",
	"gender": "性別",
	"age": "年齡（若未提及則填'未提及'）",
	"occupation": "部門/職業或學校科系",
	"education": "學歷（若未提及則填'未提及'）",
	"location": "現居地（若未提及則填'未提及'）",
	"economic_status": "經濟狀況描述（若未提及則填'未提及'）",
	"family_relations": "家庭關係描述",
	"other_info": ["其他重要資訊1", "其他重要資訊2"],
	"main_concerns": ["主訴問題1", "主訴問題2"],
	"counseling_goals": ["晤談目標1", "晤談目標2"],
	"counselor_techniques": ["使用的諮詢技巧1", "技巧2"],
	"session_content": "晤談內容概述",
	"counselor_self_evaluation": "諮詢師對本次晤談的自我評估"
	}}
	"""

	parse_response = await openai_service.chat_completion(
	messages=[{"role": "user", "content": parse_prompt}], temperature=0.3
	)

	# Parse JSON from response
	try:
	parsed_data = json.loads(parse_response)
	except json.JSONDecodeError:
	# If not valid JSON, extract between { and }
	import re

	json_match = re.search(r"\{.*\}", parse_response, re.DOTALL)
	if json_match:
	parsed_data = json.loads(json_match.group(0))
	else:
	parsed_data = {
	"client_name": "未提供",
	"gender": "未提及",
	"age": "未提及",
	"occupation": "未提及",
	"education": "未提及",
	"location": "未提及",
	"economic_status": "未提及",
	"family_relations": "未提及",
	"other_info": [],
	"main_concerns": [],
	"counseling_goals": [],
	"counselor_techniques": [],
	"session_content": "無法解析",
	"counselor_self_evaluation": "無法解析",
	}

	yield f"data: {json.dumps({'step': 1, 'status': 'completed', 'message': '逐字稿分析完成', 'data': parsed_data}, ensure_ascii=False)}\n\n"

	# Step 2: Identify key issues
	yield f"data: {json.dumps({'step': 2, 'status': 'processing', 'message': '正在識別關鍵議題和技巧...'}, ensure_ascii=False)}\n\n"

	main_concerns = parsed_data.get("main_concerns", [])
	techniques = parsed_data.get("counselor_techniques", [])

	yield f"data: {json.dumps({'step': 2, 'status': 'completed', 'message': f'識別到 {len(main_concerns)} 個關鍵議題', 'data': {'concerns': main_concerns, 'techniques': techniques}}, ensure_ascii=False)}\n\n"

	# Step 3: RAG search for relevant theories
	yield f"data: {json.dumps({'step': 3, 'status': 'processing', 'message': '正在檢索相關理論...'}, ensure_ascii=False)}\n\n"

	# Search for theories related to main concerns
	search_query = " ".join(main_concerns[:3]) # Top 3 concerns
	query_embedding = await openai_service.create_embedding(search_query)
	embedding_str = "[" + ",".join(map(str, query_embedding)) + "]"

	query_sql = text(
	"""
	SELECT
	c.id as chunk_id,
	c.text,
	d.title as document_title,
	1 - (e.embedding <=> CAST(:query_embedding AS vector)) as similarity_score
	FROM chunks c
	JOIN embeddings e ON c.id = e.chunk_id
	JOIN documents d ON c.doc_id = d.id
	WHERE 1 - (e.embedding <=> CAST(:query_embedding AS vector)) >= :threshold
	ORDER BY e.embedding <=> CAST(:query_embedding AS vector)
	LIMIT :top_k
	"""
	).bindparams(
	bindparam("query_embedding", type_=String),
	bindparam("threshold", type_=Float),
	bindparam("top_k", type_=Integer),
	)

	result = await db.execute(
	query_sql,
	{
	"query_embedding": embedding_str,
	"threshold": similarity_threshold,
	"top_k": top_k,
	},
	)

	rows = result.fetchall()
	theories = [
	{"text": row.text, "document": row.document_title, "score": float(row.similarity_score)}
	for row in rows
	]

	yield f"data: {json.dumps({'step': 3, 'status': 'completed', 'message': f'檢索到 {len(theories)} 個相關理論', 'data': {'theories': theories}}, ensure_ascii=False)}\n\n"

	# Step 4: Generate structured report
	yield f"data: {json.dumps({'step': 4, 'status': 'processing', 'message': '正在生成個案報告...'}, ensure_ascii=False)}\n\n"

	# Construct context from theories
	context_parts = [f"[{i+1}] {theory['text']}" for i, theory in enumerate(theories)]
	context = "\n\n".join(context_parts)

	report_prompt = f"""你是一位專業的職涯諮詢督導。請根據以下資訊生成個案報告：

	案主基本資料：
	- 姓名（化名）：{parsed_data.get('client_name', '未提供')}
	- 性別：{parsed_data.get('gender', '未提及')}
	- 年齡：{parsed_data.get('age', '未提及')}
	- 部門/職業（學校科系）：{parsed_data.get('occupation', '未提及')}
	- 學歷：{parsed_data.get('education', '未提及')}
	- 現居地：{parsed_data.get('location', '未提及')}
	- 經濟狀況：{parsed_data.get('economic_status', '未提及')}
	- 家庭關係：{parsed_data.get('family_relations', '未提及')}
	- 其他重要資訊：{', '.join(parsed_data.get('other_info', []))}

	晤談內容概述：
	{parsed_data.get('session_content', '')}

	主訴問題：
	{', '.join(main_concerns)}

	晤談目標：
	{', '.join(parsed_data.get('counseling_goals', []))}

	使用的諮詢技巧：
	{', '.join(techniques)}

	相關理論參考：
	{context}

	請生成結構化的個案報告，包含以下部分：

	【主訴問題】
	個案說的，此次想要討論的議題

	【成因分析】
	諮詢師您認為，個案為何會有這些主訴問題，請結合引用的理論 [1], [2] 等進行分析

	【晤談目標（移動主訴）】
	諮詢師對個案諮詢目標的假設，須與個案確認

	【介入策略】
	諮詢師判斷會需要帶個案做的事，結合理論說明

	【目前成效評估】
	上述目標和策略達成的狀況如何，目前打算如何修正

	重要提醒：
	1. 請使用專業、客觀、具同理心的語氣
	2. 適當引用理論文獻 [1], [2] 等
	3. 不要使用 markdown 格式（如 ##, ###, **, - 等符號）
	4. 使用【標題】的格式來區分段落
	5. 內容直接書寫，不要用項目符號
	"""

	report_content = await openai_service.chat_completion(
	messages=[{"role": "user", "content": report_prompt}], temperature=0.6
	)

	# Step 5: Extract key dialogue excerpts (5-10 exchanges)
	yield f"data: {json.dumps({'step': 5, 'status': 'processing', 'message': '正在提取關鍵對話片段...'}, ensure_ascii=False)}\n\n"

	# Build speaker labels based on num_participants
	if num_participants == 2:
	speaker_instruction = (
	'- speaker 使用 "speaker1"（通常為諮詢師）和 "speaker2"（通常為個案）'
	)
	speaker_example = """ "dialogues": [
	{{"speaker": "speaker1", "order": 1, "text": "諮詢師的話"}},
	{{"speaker": "speaker2", "order": 2, "text": "個案的話"}},
	{{"speaker": "speaker1", "order": 3, "text": "諮詢師的話"}}
	]"""
	else:
	speaker_labels = ", ".join([f'"speaker{i+1}"' for i in range(num_participants)])
	speaker_instruction = f"- speaker 使用 {speaker_labels}，根據逐字稿上下文判斷每位說話者"
	speaker_example = """ "dialogues": [
	{{"speaker": "speaker1", "order": 1, "text": "說話內容"}},
	{{"speaker": "speaker2", "order": 2, "text": "說話內容"}},
	{{"speaker": "speaker1", "order": 3, "text": "說話內容"}}
	]"""

	excerpt_prompt = f"""請從以下逐字稿中，挑選 5-10 句最能體現個案樣貌和諮詢重點的關鍵對話。

	逐字稿：
	{transcript}

	會談人數：{num_participants} 人

	請以 JSON 格式回答（只要 JSON，不要其他文字）：
	{{
	{speaker_example}
	}}

	注意：
	{speaker_instruction}
	- 請根據逐字稿的語境和內容，自動判斷每句話是誰說的
	- order 是對話順序編號
	- 挑選能展現個案核心議題、情緒狀態、或關鍵轉變的對話
	- 如果逐字稿中有明確標示說話者（如 Co:、Cl:、諮詢師：、個案：等），請參考這些標示
	"""

	excerpt_response = await openai_service.chat_completion(
	messages=[{"role": "user", "content": excerpt_prompt}], temperature=0.3
	)

	try:
	excerpt_data = json.loads(excerpt_response)
	dialogues = excerpt_data.get("dialogues", [])
	except json.JSONDecodeError:
	import re

	json_match = re.search(r"\{.*\}", excerpt_response, re.DOTALL)
	if json_match:
	excerpt_data = json.loads(json_match.group(0))
	dialogues = excerpt_data.get("dialogues", [])
	else:
	dialogues = []

	# Parse report into sections
	report = {
	"client_info": {
	"name": parsed_data.get("client_name", "未提供"),
	"gender": parsed_data.get("gender", "未提及"),
	"age": parsed_data.get("age", "未提及"),
	"occupation": parsed_data.get("occupation", "未提及"),
	"education": parsed_data.get("education", "未提及"),
	"location": parsed_data.get("location", "未提及"),
	"economic_status": parsed_data.get("economic_status", "未提及"),
	"family_relations": parsed_data.get("family_relations", "未提及"),
	"other_info": parsed_data.get("other_info", []),
	},
	"session_summary": {
	"content": parsed_data.get("session_content", ""),
	"self_evaluation": parsed_data.get("counselor_self_evaluation", ""),
	},
	"conceptualization": report_content,
	"main_concerns": main_concerns,
	"counseling_goals": parsed_data.get("counseling_goals", []),
	"techniques": techniques,
	"theories": theories,
	"dialogue_excerpts": dialogues,
	}

	yield f"data: {json.dumps({'step': 5, 'status': 'completed', 'message': '個案報告生成完成', 'data': {'report': report}}, ensure_ascii=False)}\n\n"

	# Final message
	yield f"data: {json.dumps({'step': 6, 'status': 'completed', 'message': '全部完成！'}, ensure_ascii=False)}\n\n"

	except Exception as e:
	yield f"data: {json.dumps({'status': 'error', 'message': f'發生錯誤: {str(e)}'}, ensure_ascii=False)}\n\n"


	@router.get("/generate")
	async def generate_report(
	transcript: str,
	top_k: int = 5,
	similarity_threshold: float = 0.5,
	num_participants: int = 2,
	db: AsyncSession = Depends(get_db),
	):
	"""
	Generate case report from transcript with real-time progress updates

	Returns: SSE stream with progress updates
	"""

	return StreamingResponse(
	generate_report_stream(transcript, top_k, similarity_threshold, num_participants, db),
	media_type="text/event-stream",
	headers={
	"Cache-Control": "no-cache",
	"Connection": "keep-alive",
	"X-Accel-Buffering": "no",
	},
	)