Spaces:
Sleeping
Sleeping
Upload 64 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- app/__init__.py +1 -0
- app/__pycache__/__init__.cpython-310.pyc +0 -0
- app/__pycache__/__init__.cpython-38.pyc +0 -0
- app/__pycache__/aggregator.cpython-310.pyc +0 -0
- app/__pycache__/aggregator.cpython-38.pyc +0 -0
- app/__pycache__/audio_utils.cpython-310.pyc +0 -0
- app/__pycache__/config.cpython-310.pyc +0 -0
- app/__pycache__/config.cpython-38.pyc +0 -0
- app/__pycache__/document_utils.cpython-310.pyc +0 -0
- app/__pycache__/evidence_builder.cpython-310.pyc +0 -0
- app/__pycache__/file_utils.cpython-310.pyc +0 -0
- app/__pycache__/file_utils.cpython-38.pyc +0 -0
- app/__pycache__/llm_client.cpython-310.pyc +0 -0
- app/__pycache__/llm_client.cpython-38.pyc +0 -0
- app/__pycache__/pipeline.cpython-310.pyc +0 -0
- app/__pycache__/pipeline.cpython-38.pyc +0 -0
- app/__pycache__/prompts.cpython-310.pyc +0 -0
- app/__pycache__/prompts_rag.cpython-310.pyc +0 -0
- app/__pycache__/rag_client.cpython-310.pyc +0 -0
- app/__pycache__/rag_reporter.cpython-310.pyc +0 -0
- app/__pycache__/reporter.cpython-310.pyc +0 -0
- app/__pycache__/retriever.cpython-310.pyc +0 -0
- app/__pycache__/schemas.cpython-310.pyc +0 -0
- app/__pycache__/schemas.cpython-38.pyc +0 -0
- app/__pycache__/video_payload.cpython-310.pyc +0 -0
- app/__pycache__/video_payload.cpython-38.pyc +0 -0
- app/__pycache__/video_preprocess.cpython-310.pyc +0 -0
- app/agents/__init__.py +1 -0
- app/agents/__pycache__/__init__.cpython-310.pyc +0 -0
- app/agents/__pycache__/audio_agent.cpython-310.pyc +0 -0
- app/agents/__pycache__/base.cpython-310.pyc +0 -0
- app/agents/__pycache__/document_agent.cpython-310.pyc +0 -0
- app/agents/__pycache__/inappropriate_agent.cpython-310.pyc +0 -0
- app/agents/__pycache__/look_agent.cpython-310.pyc +0 -0
- app/agents/__pycache__/point_agent.cpython-310.pyc +0 -0
- app/agents/__pycache__/respond_agent.cpython-310.pyc +0 -0
- app/agents/__pycache__/speak_agent.cpython-310.pyc +0 -0
- app/agents/__pycache__/timeline_agent.cpython-310.pyc +0 -0
- app/agents/audio_agent.py +209 -0
- app/agents/base.py +59 -0
- app/agents/document_agent.py +95 -0
- app/agents/inappropriate_agent.py +6 -0
- app/agents/look_agent.py +6 -0
- app/agents/point_agent.py +6 -0
- app/agents/respond_agent.py +6 -0
- app/agents/speak_agent.py +6 -0
- app/agents/timeline_agent.py +6 -0
- app/aggregator.py +86 -0
- app/audio_utils.py +114 -0
- app/config.py +121 -0
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# empty
|
app/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (127 Bytes). View file
|
|
|
app/__pycache__/__init__.cpython-38.pyc
ADDED
|
Binary file (125 Bytes). View file
|
|
|
app/__pycache__/aggregator.cpython-310.pyc
ADDED
|
Binary file (1.9 kB). View file
|
|
|
app/__pycache__/aggregator.cpython-38.pyc
ADDED
|
Binary file (1.87 kB). View file
|
|
|
app/__pycache__/audio_utils.cpython-310.pyc
ADDED
|
Binary file (3.37 kB). View file
|
|
|
app/__pycache__/config.cpython-310.pyc
ADDED
|
Binary file (4.1 kB). View file
|
|
|
app/__pycache__/config.cpython-38.pyc
ADDED
|
Binary file (3.23 kB). View file
|
|
|
app/__pycache__/document_utils.cpython-310.pyc
ADDED
|
Binary file (3.06 kB). View file
|
|
|
app/__pycache__/evidence_builder.cpython-310.pyc
ADDED
|
Binary file (1.17 kB). View file
|
|
|
app/__pycache__/file_utils.cpython-310.pyc
ADDED
|
Binary file (987 Bytes). View file
|
|
|
app/__pycache__/file_utils.cpython-38.pyc
ADDED
|
Binary file (899 Bytes). View file
|
|
|
app/__pycache__/llm_client.cpython-310.pyc
ADDED
|
Binary file (3.03 kB). View file
|
|
|
app/__pycache__/llm_client.cpython-38.pyc
ADDED
|
Binary file (2.89 kB). View file
|
|
|
app/__pycache__/pipeline.cpython-310.pyc
ADDED
|
Binary file (16.7 kB). View file
|
|
|
app/__pycache__/pipeline.cpython-38.pyc
ADDED
|
Binary file (7.21 kB). View file
|
|
|
app/__pycache__/prompts.cpython-310.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
app/__pycache__/prompts_rag.cpython-310.pyc
ADDED
|
Binary file (2.89 kB). View file
|
|
|
app/__pycache__/rag_client.cpython-310.pyc
ADDED
|
Binary file (2.23 kB). View file
|
|
|
app/__pycache__/rag_reporter.cpython-310.pyc
ADDED
|
Binary file (1.27 kB). View file
|
|
|
app/__pycache__/reporter.cpython-310.pyc
ADDED
|
Binary file (999 Bytes). View file
|
|
|
app/__pycache__/retriever.cpython-310.pyc
ADDED
|
Binary file (5.03 kB). View file
|
|
|
app/__pycache__/schemas.cpython-310.pyc
ADDED
|
Binary file (1.7 kB). View file
|
|
|
app/__pycache__/schemas.cpython-38.pyc
ADDED
|
Binary file (1.64 kB). View file
|
|
|
app/__pycache__/video_payload.cpython-310.pyc
ADDED
|
Binary file (1.58 kB). View file
|
|
|
app/__pycache__/video_payload.cpython-38.pyc
ADDED
|
Binary file (1.51 kB). View file
|
|
|
app/__pycache__/video_preprocess.cpython-310.pyc
ADDED
|
Binary file (3.72 kB). View file
|
|
|
app/agents/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# empty
|
app/agents/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (134 Bytes). View file
|
|
|
app/agents/__pycache__/audio_agent.cpython-310.pyc
ADDED
|
Binary file (6.5 kB). View file
|
|
|
app/agents/__pycache__/base.cpython-310.pyc
ADDED
|
Binary file (2.09 kB). View file
|
|
|
app/agents/__pycache__/document_agent.cpython-310.pyc
ADDED
|
Binary file (3.11 kB). View file
|
|
|
app/agents/__pycache__/inappropriate_agent.cpython-310.pyc
ADDED
|
Binary file (535 Bytes). View file
|
|
|
app/agents/__pycache__/look_agent.cpython-310.pyc
ADDED
|
Binary file (499 Bytes). View file
|
|
|
app/agents/__pycache__/point_agent.cpython-310.pyc
ADDED
|
Binary file (503 Bytes). View file
|
|
|
app/agents/__pycache__/respond_agent.cpython-310.pyc
ADDED
|
Binary file (511 Bytes). View file
|
|
|
app/agents/__pycache__/speak_agent.cpython-310.pyc
ADDED
|
Binary file (503 Bytes). View file
|
|
|
app/agents/__pycache__/timeline_agent.cpython-310.pyc
ADDED
|
Binary file (515 Bytes). View file
|
|
|
app/agents/audio_agent.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
from app.llm_client import chat_completion_json, safe_json_loads, ApiKeyPool
|
| 8 |
+
from app.schemas import AgentResult, Finding
|
| 9 |
+
from app.audio_utils import audio_file_to_data_uri, AudioChunk
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
AUDIO_ANALYSIS_SYSTEM = """
|
| 13 |
+
你是儿童孤独症谱系障碍辅助筛查系统中的“音频智能体”。
|
| 14 |
+
输入是结构化诊断对话视频中提取出的音频转写结果。
|
| 15 |
+
你的任务不是做正式诊断,而是从转写内容中提取与“五不”相关的语音/语言证据。
|
| 16 |
+
必须只输出严格 JSON,不要输出 markdown,不要解释。
|
| 17 |
+
""".strip()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
AUDIO_ANALYSIS_USER_TEMPLATE = """
|
| 21 |
+
请根据以下分段音频转写内容,提取与“五不”相关的证据。
|
| 22 |
+
|
| 23 |
+
重点关注:
|
| 24 |
+
1. 不(少)应:
|
| 25 |
+
- 问答中响应不足
|
| 26 |
+
- 明显不回应
|
| 27 |
+
- 语言互动参与低
|
| 28 |
+
2. 不(少)语:
|
| 29 |
+
- 主动语言少
|
| 30 |
+
- 回答很短
|
| 31 |
+
- 交流性语言不足
|
| 32 |
+
3. 不当:
|
| 33 |
+
- 明显答非所问
|
| 34 |
+
- 重复性语言
|
| 35 |
+
- 明显异常语用
|
| 36 |
+
|
| 37 |
+
输出格式:
|
| 38 |
+
{
|
| 39 |
+
"findings": [
|
| 40 |
+
{
|
| 41 |
+
"warning_type": "不(少)应|不(少)语|不当",
|
| 42 |
+
"start_sec": 12.0,
|
| 43 |
+
"end_sec": 30.0,
|
| 44 |
+
"confidence": 0.82,
|
| 45 |
+
"evidence": "简洁说明",
|
| 46 |
+
"behavior_tags": ["标签1", "标签2"],
|
| 47 |
+
"clinical_note": "临床解释",
|
| 48 |
+
"clip_summary": "一句话摘要",
|
| 49 |
+
"modality_limit": "audio_transcript_based"
|
| 50 |
+
}
|
| 51 |
+
],
|
| 52 |
+
"clip_level_summary": "总体摘要"
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
转写输入如下:
|
| 56 |
+
<<TRANSCRIPT_JSON>>
|
| 57 |
+
""".strip()
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class AudioAgent:
|
| 61 |
+
def __init__(self, model: str, asr_model: str):
|
| 62 |
+
self.model = model
|
| 63 |
+
self.asr_model = asr_model
|
| 64 |
+
|
| 65 |
+
def agent_name(self) -> str:
|
| 66 |
+
return "audio_agent"
|
| 67 |
+
|
| 68 |
+
def transcribe_chunk(self, client, chunk: AudioChunk) -> dict[str, Any]:
|
| 69 |
+
data_uri = audio_file_to_data_uri(chunk.path, mime_type="audio/mpeg")
|
| 70 |
+
completion = client.chat.completions.create(
|
| 71 |
+
model=self.asr_model,
|
| 72 |
+
messages=[
|
| 73 |
+
{
|
| 74 |
+
"role": "user",
|
| 75 |
+
"content": [
|
| 76 |
+
{
|
| 77 |
+
"type": "input_audio",
|
| 78 |
+
"input_audio": {
|
| 79 |
+
"data": data_uri
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
]
|
| 83 |
+
}
|
| 84 |
+
],
|
| 85 |
+
stream=False,
|
| 86 |
+
extra_body={
|
| 87 |
+
"asr_options": {
|
| 88 |
+
"enable_itn": False
|
| 89 |
+
}
|
| 90 |
+
},
|
| 91 |
+
timeout=180,
|
| 92 |
+
)
|
| 93 |
+
text = completion.choices[0].message.content or ""
|
| 94 |
+
return {
|
| 95 |
+
"start_sec": chunk.start_sec,
|
| 96 |
+
"end_sec": chunk.end_sec,
|
| 97 |
+
"transcript": text,
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
def _transcribe_serial(self, key_pool: ApiKeyPool, chunks: list[AudioChunk]) -> list[dict[str, Any]]:
|
| 101 |
+
transcripts = []
|
| 102 |
+
for chunk in chunks:
|
| 103 |
+
client = key_pool.get_client()
|
| 104 |
+
transcripts.append(self.transcribe_chunk(client, chunk))
|
| 105 |
+
transcripts.sort(key=lambda x: x["start_sec"])
|
| 106 |
+
return transcripts
|
| 107 |
+
|
| 108 |
+
def _transcribe_parallel(
|
| 109 |
+
self,
|
| 110 |
+
key_pool: ApiKeyPool,
|
| 111 |
+
chunks: list[AudioChunk],
|
| 112 |
+
max_workers: int,
|
| 113 |
+
) -> list[dict[str, Any]]:
|
| 114 |
+
transcripts = []
|
| 115 |
+
|
| 116 |
+
def run_one(chunk: AudioChunk):
|
| 117 |
+
client = key_pool.get_client()
|
| 118 |
+
return self.transcribe_chunk(client, chunk)
|
| 119 |
+
|
| 120 |
+
with ThreadPoolExecutor(max_workers=min(max_workers, len(chunks))) as ex:
|
| 121 |
+
futures = [ex.submit(run_one, chunk) for chunk in chunks]
|
| 122 |
+
for fut in as_completed(futures):
|
| 123 |
+
transcripts.append(fut.result())
|
| 124 |
+
|
| 125 |
+
transcripts.sort(key=lambda x: x["start_sec"])
|
| 126 |
+
return transcripts
|
| 127 |
+
|
| 128 |
+
def transcribe_chunks(
|
| 129 |
+
self,
|
| 130 |
+
key_pool: ApiKeyPool,
|
| 131 |
+
chunks: list[AudioChunk],
|
| 132 |
+
valid_key_count: int,
|
| 133 |
+
max_workers: int,
|
| 134 |
+
log_cb=None,
|
| 135 |
+
) -> list[dict[str, Any]]:
|
| 136 |
+
log_cb = log_cb or (lambda msg: None)
|
| 137 |
+
|
| 138 |
+
if not chunks:
|
| 139 |
+
return []
|
| 140 |
+
|
| 141 |
+
if valid_key_count <= 1:
|
| 142 |
+
log_cb("音频 ASR 使用串行模式。")
|
| 143 |
+
return self._transcribe_serial(key_pool, chunks)
|
| 144 |
+
|
| 145 |
+
log_cb("音频 ASR 使用并发轮转模式。")
|
| 146 |
+
return self._transcribe_parallel(
|
| 147 |
+
key_pool=key_pool,
|
| 148 |
+
chunks=chunks,
|
| 149 |
+
max_workers=max_workers,
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
def analyze_transcripts(self, client, transcripts: list[dict[str, Any]]) -> AgentResult:
|
| 153 |
+
user_prompt = AUDIO_ANALYSIS_USER_TEMPLATE.replace(
|
| 154 |
+
"<<TRANSCRIPT_JSON>>",
|
| 155 |
+
json.dumps(transcripts, ensure_ascii=False, indent=2),
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
raw_text = chat_completion_json(
|
| 159 |
+
client=client,
|
| 160 |
+
model=self.model,
|
| 161 |
+
messages=[
|
| 162 |
+
{"role": "system", "content": AUDIO_ANALYSIS_SYSTEM},
|
| 163 |
+
{"role": "user", "content": user_prompt},
|
| 164 |
+
],
|
| 165 |
+
temperature=0.1,
|
| 166 |
+
timeout=180,
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
try:
|
| 170 |
+
payload = safe_json_loads(raw_text)
|
| 171 |
+
findings = [Finding(**f) for f in payload.get("findings", [])]
|
| 172 |
+
clip_level_summary = payload.get("clip_level_summary", "")
|
| 173 |
+
except Exception:
|
| 174 |
+
findings = []
|
| 175 |
+
clip_level_summary = "音频智能体输出解析失败,建议人工复核。"
|
| 176 |
+
|
| 177 |
+
return AgentResult(
|
| 178 |
+
agent_name=self.agent_name(),
|
| 179 |
+
clip_start_sec=0.0,
|
| 180 |
+
clip_end_sec=0.0,
|
| 181 |
+
findings=findings,
|
| 182 |
+
clip_level_summary=clip_level_summary,
|
| 183 |
+
raw_text=raw_text,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
def run(
|
| 187 |
+
self,
|
| 188 |
+
key_pool: ApiKeyPool,
|
| 189 |
+
audio_chunks: list[AudioChunk],
|
| 190 |
+
valid_key_count: int,
|
| 191 |
+
max_workers: int,
|
| 192 |
+
log_cb=None,
|
| 193 |
+
) -> tuple[AgentResult, list[dict[str, Any]]]:
|
| 194 |
+
log_cb = log_cb or (lambda msg: None)
|
| 195 |
+
|
| 196 |
+
transcripts = self.transcribe_chunks(
|
| 197 |
+
key_pool=key_pool,
|
| 198 |
+
chunks=audio_chunks,
|
| 199 |
+
valid_key_count=valid_key_count,
|
| 200 |
+
max_workers=max_workers,
|
| 201 |
+
log_cb=log_cb,
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
client = key_pool.get_client()
|
| 205 |
+
result = self.analyze_transcripts(client, transcripts)
|
| 206 |
+
return result, transcripts
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
|
app/agents/base.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from abc import ABC, abstractmethod
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
from app.llm_client import chat_completion_json, safe_json_loads
|
| 7 |
+
from app.schemas import AgentResult, Finding
|
| 8 |
+
from app.prompts import build_agent_prompt
|
| 9 |
+
from app.video_payload import VideoPayload
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class BaseAgent(ABC):
|
| 13 |
+
def __init__(self, model: str):
|
| 14 |
+
self.model = model
|
| 15 |
+
|
| 16 |
+
@abstractmethod
|
| 17 |
+
def agent_name(self) -> str:
|
| 18 |
+
raise NotImplementedError
|
| 19 |
+
|
| 20 |
+
def build_messages(self, video_payload: VideoPayload) -> list[dict[str, Any]]:
|
| 21 |
+
content = [
|
| 22 |
+
{
|
| 23 |
+
"type": "video_url",
|
| 24 |
+
"video_url": {
|
| 25 |
+
"url": video_payload.value
|
| 26 |
+
},
|
| 27 |
+
"fps": video_payload.fps,
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"type": "text",
|
| 31 |
+
"text": build_agent_prompt(self.agent_name()),
|
| 32 |
+
},
|
| 33 |
+
]
|
| 34 |
+
return [{"role": "user", "content": content}]
|
| 35 |
+
|
| 36 |
+
def run(self, client, video_payload: VideoPayload) -> AgentResult:
|
| 37 |
+
messages = self.build_messages(video_payload)
|
| 38 |
+
raw_text = chat_completion_json(client=client, model=self.model, messages=messages)
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
payload = safe_json_loads(raw_text)
|
| 42 |
+
findings = [Finding(**f) for f in payload.get("findings", [])]
|
| 43 |
+
clip_level_summary = payload.get("clip_level_summary", "")
|
| 44 |
+
except Exception:
|
| 45 |
+
findings = []
|
| 46 |
+
clip_level_summary = "模型输出解析失败,建议人工复核。"
|
| 47 |
+
|
| 48 |
+
# 这里补齐 clip_start_sec / clip_end_sec
|
| 49 |
+
# 当前整段视频输入,没有显式分段,因此先统一设为 0
|
| 50 |
+
# 真正的时间信息仍然以 findings 里的 start_sec / end_sec 为准
|
| 51 |
+
return AgentResult(
|
| 52 |
+
agent_name=self.agent_name(),
|
| 53 |
+
clip_start_sec=0.0,
|
| 54 |
+
clip_end_sec=0.0,
|
| 55 |
+
findings=findings,
|
| 56 |
+
clip_level_summary=clip_level_summary,
|
| 57 |
+
raw_text=raw_text,
|
| 58 |
+
)
|
| 59 |
+
|
app/agents/document_agent.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from app.llm_client import chat_completion_json, safe_json_loads
|
| 4 |
+
from app.schemas import AgentResult, Finding
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
DOCUMENT_AGENT_SYSTEM = """
|
| 8 |
+
你是一个儿童孤独症谱系障碍辅助筛查系统中的“文档智能体”。
|
| 9 |
+
你的任务是分析患者/家属/医生提供的文档材料,包括:
|
| 10 |
+
- 病例资料
|
| 11 |
+
- 孤独症检测表/量表
|
| 12 |
+
- 问卷
|
| 13 |
+
- 观察记录
|
| 14 |
+
- 其他结构化或半结构化文档
|
| 15 |
+
|
| 16 |
+
你不是做正式诊断,而是抽取与“五不”辅助筛查相关的文档证据。
|
| 17 |
+
你必须只输出严格 JSON,不要输出 markdown,不要解释。
|
| 18 |
+
""".strip()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
DOCUMENT_AGENT_USER_TEMPLATE = """
|
| 22 |
+
请分析以下文档内容,并输出结构化 JSON。
|
| 23 |
+
|
| 24 |
+
要求:
|
| 25 |
+
1. 关注“五不”:
|
| 26 |
+
- 不(少)看
|
| 27 |
+
- 不(少)应
|
| 28 |
+
- 不(少)指
|
| 29 |
+
- 不(少)语
|
| 30 |
+
- 不当
|
| 31 |
+
2. 每条 finding 使用以下格式:
|
| 32 |
+
{
|
| 33 |
+
"warning_type": "不(少)看|不(少)应|不(少)指|不(少)语|不当",
|
| 34 |
+
"start_sec": 0,
|
| 35 |
+
"end_sec": 0,
|
| 36 |
+
"confidence": 0.0,
|
| 37 |
+
"evidence": "从文档提炼的关键证据",
|
| 38 |
+
"behavior_tags": ["标签1", "标签2"],
|
| 39 |
+
"clinical_note": "简短临床解释",
|
| 40 |
+
"clip_summary": "一句话摘要",
|
| 41 |
+
"modality_limit": "document_only"
|
| 42 |
+
}
|
| 43 |
+
3. 文档证据没有视频时间戳,因此 start_sec 和 end_sec 固定为 0。
|
| 44 |
+
4. 若某一维度没有足够证据,可以不输出。
|
| 45 |
+
5. 输出格式必须为:
|
| 46 |
+
{
|
| 47 |
+
"findings": [...],
|
| 48 |
+
"clip_level_summary": "文档总体摘要"
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
文档内容如下:
|
| 52 |
+
<<DOCUMENT_BUNDLE>>
|
| 53 |
+
""".strip()
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class DocumentAgent:
|
| 57 |
+
def __init__(self, model: str):
|
| 58 |
+
self.model = model
|
| 59 |
+
|
| 60 |
+
def agent_name(self) -> str:
|
| 61 |
+
return "document_agent"
|
| 62 |
+
|
| 63 |
+
def run(self, client, document_bundle: str) -> AgentResult:
|
| 64 |
+
messages = [
|
| 65 |
+
{"role": "system", "content": DOCUMENT_AGENT_SYSTEM},
|
| 66 |
+
{
|
| 67 |
+
"role": "user",
|
| 68 |
+
"content": DOCUMENT_AGENT_USER_TEMPLATE.replace("<<DOCUMENT_BUNDLE>>", document_bundle),
|
| 69 |
+
},
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
raw_text = chat_completion_json(
|
| 73 |
+
client=client,
|
| 74 |
+
model=self.model,
|
| 75 |
+
messages=messages,
|
| 76 |
+
temperature=0.1,
|
| 77 |
+
timeout=180,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
payload = safe_json_loads(raw_text)
|
| 82 |
+
findings = [Finding(**f) for f in payload.get("findings", [])]
|
| 83 |
+
clip_level_summary = payload.get("clip_level_summary", "")
|
| 84 |
+
except Exception:
|
| 85 |
+
findings = []
|
| 86 |
+
clip_level_summary = "文档智能体输出解析失败,建议人工复核。"
|
| 87 |
+
|
| 88 |
+
return AgentResult(
|
| 89 |
+
agent_name=self.agent_name(),
|
| 90 |
+
findings=findings,
|
| 91 |
+
clip_start_sec=0.0,
|
| 92 |
+
clip_end_sec=0.0,
|
| 93 |
+
clip_level_summary=clip_level_summary,
|
| 94 |
+
raw_text=raw_text,
|
| 95 |
+
)
|
app/agents/inappropriate_agent.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.agents.base import BaseAgent
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class InappropriateAgent(BaseAgent):
|
| 5 |
+
def agent_name(self) -> str:
|
| 6 |
+
return "inappropriate_agent"
|
app/agents/look_agent.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.agents.base import BaseAgent
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class LookAgent(BaseAgent):
|
| 5 |
+
def agent_name(self) -> str:
|
| 6 |
+
return "look_agent"
|
app/agents/point_agent.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.agents.base import BaseAgent
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class PointAgent(BaseAgent):
|
| 5 |
+
def agent_name(self) -> str:
|
| 6 |
+
return "point_agent"
|
app/agents/respond_agent.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.agents.base import BaseAgent
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class RespondAgent(BaseAgent):
|
| 5 |
+
def agent_name(self) -> str:
|
| 6 |
+
return "respond_agent"
|
app/agents/speak_agent.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.agents.base import BaseAgent
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class SpeakAgent(BaseAgent):
|
| 5 |
+
def agent_name(self) -> str:
|
| 6 |
+
return "speak_agent"
|
app/agents/timeline_agent.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.agents.base import BaseAgent
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class TimelineAgent(BaseAgent):
|
| 5 |
+
def agent_name(self) -> str:
|
| 6 |
+
return "timeline_agent"
|
app/aggregator.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
from app.schemas import AgentResult, MergedEvent
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def merge_overlapping_results(results: list[AgentResult], iou_gap: float = 2.0) -> list[MergedEvent]:
|
| 8 |
+
grouped = defaultdict(list)
|
| 9 |
+
|
| 10 |
+
for result in results:
|
| 11 |
+
for finding in result.findings:
|
| 12 |
+
grouped[finding.warning_type].append((result.agent_name, finding))
|
| 13 |
+
|
| 14 |
+
merged: list[MergedEvent] = []
|
| 15 |
+
|
| 16 |
+
for warning_type, items in grouped.items():
|
| 17 |
+
items.sort(key=lambda x: x[1].start_sec)
|
| 18 |
+
|
| 19 |
+
current = None
|
| 20 |
+
|
| 21 |
+
for agent_name, f in items:
|
| 22 |
+
if current is None:
|
| 23 |
+
current = {
|
| 24 |
+
"warning_type": warning_type,
|
| 25 |
+
"start_sec": f.start_sec,
|
| 26 |
+
"end_sec": f.end_sec,
|
| 27 |
+
"confidence_sum": f.confidence,
|
| 28 |
+
"count": 1,
|
| 29 |
+
"evidences": [f.evidence],
|
| 30 |
+
"sources": [agent_name],
|
| 31 |
+
"behavior_tags": set(f.behavior_tags),
|
| 32 |
+
"clinical_note": f.clinical_note or "",
|
| 33 |
+
}
|
| 34 |
+
continue
|
| 35 |
+
|
| 36 |
+
overlap = f.start_sec <= current["end_sec"] + iou_gap
|
| 37 |
+
if overlap:
|
| 38 |
+
current["end_sec"] = max(current["end_sec"], f.end_sec)
|
| 39 |
+
current["confidence_sum"] += f.confidence
|
| 40 |
+
current["count"] += 1
|
| 41 |
+
current["evidences"].append(f.evidence)
|
| 42 |
+
current["sources"].append(agent_name)
|
| 43 |
+
current["behavior_tags"].update(f.behavior_tags)
|
| 44 |
+
if len(f.clinical_note) > len(current["clinical_note"]):
|
| 45 |
+
current["clinical_note"] = f.clinical_note
|
| 46 |
+
else:
|
| 47 |
+
merged.append(
|
| 48 |
+
MergedEvent(
|
| 49 |
+
warning_type=current["warning_type"],
|
| 50 |
+
start_sec=current["start_sec"],
|
| 51 |
+
end_sec=current["end_sec"],
|
| 52 |
+
confidence=min(1.0, current["confidence_sum"] / current["count"]),
|
| 53 |
+
evidences=current["evidences"],
|
| 54 |
+
sources=current["sources"],
|
| 55 |
+
behavior_tags=sorted(current["behavior_tags"]),
|
| 56 |
+
clinical_note=current["clinical_note"],
|
| 57 |
+
)
|
| 58 |
+
)
|
| 59 |
+
current = {
|
| 60 |
+
"warning_type": warning_type,
|
| 61 |
+
"start_sec": f.start_sec,
|
| 62 |
+
"end_sec": f.end_sec,
|
| 63 |
+
"confidence_sum": f.confidence,
|
| 64 |
+
"count": 1,
|
| 65 |
+
"evidences": [f.evidence],
|
| 66 |
+
"sources": [agent_name],
|
| 67 |
+
"behavior_tags": set(f.behavior_tags),
|
| 68 |
+
"clinical_note": f.clinical_note or "",
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
if current is not None:
|
| 72 |
+
merged.append(
|
| 73 |
+
MergedEvent(
|
| 74 |
+
warning_type=current["warning_type"],
|
| 75 |
+
start_sec=current["start_sec"],
|
| 76 |
+
end_sec=current["end_sec"],
|
| 77 |
+
confidence=min(1.0, current["confidence_sum"] / current["count"]),
|
| 78 |
+
evidences=current["evidences"],
|
| 79 |
+
sources=current["sources"],
|
| 80 |
+
behavior_tags=sorted(current["behavior_tags"]),
|
| 81 |
+
clinical_note=current["clinical_note"],
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
merged.sort(key=lambda x: (x.start_sec, x.warning_type))
|
| 86 |
+
return merged
|
app/audio_utils.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import base64
|
| 4 |
+
import os
|
| 5 |
+
import shutil
|
| 6 |
+
import subprocess
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from typing import List
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class AudioChunk:
|
| 13 |
+
path: str
|
| 14 |
+
start_sec: float
|
| 15 |
+
end_sec: float
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def check_ffmpeg_available() -> bool:
|
| 19 |
+
return shutil.which("ffmpeg") is not None and shutil.which("ffprobe") is not None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def ensure_dir(path: str) -> None:
|
| 23 |
+
os.makedirs(path, exist_ok=True)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_media_duration(path: str) -> float:
|
| 27 |
+
cmd = [
|
| 28 |
+
"ffprobe",
|
| 29 |
+
"-v", "error",
|
| 30 |
+
"-show_entries", "format=duration",
|
| 31 |
+
"-of", "default=noprint_wrappers=1:nokey=1",
|
| 32 |
+
path,
|
| 33 |
+
]
|
| 34 |
+
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 35 |
+
if proc.returncode != 0:
|
| 36 |
+
raise RuntimeError(f"ffprobe 获取时长失败: {proc.stderr}")
|
| 37 |
+
return float(proc.stdout.strip())
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def extract_audio_from_video(
|
| 41 |
+
video_path: str,
|
| 42 |
+
output_audio_path: str,
|
| 43 |
+
bitrate: str = "64k",
|
| 44 |
+
) -> str:
|
| 45 |
+
"""
|
| 46 |
+
从视频中抽取音频,转成 16k 单声道 mp3,便于后续 ASR。
|
| 47 |
+
"""
|
| 48 |
+
if not os.path.exists(video_path):
|
| 49 |
+
raise FileNotFoundError(f"视频不存在: {video_path}")
|
| 50 |
+
if not check_ffmpeg_available():
|
| 51 |
+
raise RuntimeError("未检测到 ffmpeg/ffprobe,请先安装 ffmpeg。")
|
| 52 |
+
|
| 53 |
+
ensure_dir(os.path.dirname(output_audio_path))
|
| 54 |
+
|
| 55 |
+
cmd = [
|
| 56 |
+
"ffmpeg",
|
| 57 |
+
"-y",
|
| 58 |
+
"-i", video_path,
|
| 59 |
+
"-vn",
|
| 60 |
+
"-ac", "1",
|
| 61 |
+
"-ar", "16000",
|
| 62 |
+
"-c:a", "mp3",
|
| 63 |
+
"-b:a", bitrate,
|
| 64 |
+
output_audio_path,
|
| 65 |
+
]
|
| 66 |
+
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 67 |
+
if proc.returncode != 0:
|
| 68 |
+
raise RuntimeError(f"抽取音频失败:\n{proc.stderr}")
|
| 69 |
+
return output_audio_path
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def split_audio_to_chunks(
|
| 73 |
+
audio_path: str,
|
| 74 |
+
output_dir: str,
|
| 75 |
+
chunk_seconds: int = 290,
|
| 76 |
+
) -> List[AudioChunk]:
|
| 77 |
+
"""
|
| 78 |
+
按固定时长切音频,避免超过 qwen3-asr-flash 的单次时长限制。
|
| 79 |
+
"""
|
| 80 |
+
ensure_dir(output_dir)
|
| 81 |
+
duration = get_media_duration(audio_path)
|
| 82 |
+
chunks: List[AudioChunk] = []
|
| 83 |
+
|
| 84 |
+
start = 0.0
|
| 85 |
+
idx = 0
|
| 86 |
+
while start < duration:
|
| 87 |
+
end = min(duration, start + chunk_seconds)
|
| 88 |
+
chunk_path = os.path.join(output_dir, f"audio_chunk_{idx:03d}.mp3")
|
| 89 |
+
|
| 90 |
+
cmd = [
|
| 91 |
+
"ffmpeg",
|
| 92 |
+
"-y",
|
| 93 |
+
"-i", audio_path,
|
| 94 |
+
"-ss", str(start),
|
| 95 |
+
"-t", str(end - start),
|
| 96 |
+
"-acodec", "copy",
|
| 97 |
+
chunk_path,
|
| 98 |
+
]
|
| 99 |
+
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 100 |
+
if proc.returncode != 0:
|
| 101 |
+
raise RuntimeError(f"切分音频失败:\n{proc.stderr}")
|
| 102 |
+
|
| 103 |
+
chunks.append(AudioChunk(path=chunk_path, start_sec=start, end_sec=end))
|
| 104 |
+
start = end
|
| 105 |
+
idx += 1
|
| 106 |
+
|
| 107 |
+
return chunks
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def audio_file_to_data_uri(audio_path: str, mime_type: str = "audio/mpeg") -> str:
|
| 111 |
+
with open(audio_path, "rb") as f:
|
| 112 |
+
b64 = base64.b64encode(f.read()).decode("utf-8")
|
| 113 |
+
return f"data:{mime_type};base64,{b64}"
|
| 114 |
+
|
app/config.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _clean_env_value(value: str) -> str:
|
| 11 |
+
value = (value or "").strip()
|
| 12 |
+
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
|
| 13 |
+
value = value[1:-1].strip()
|
| 14 |
+
return value
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _to_bool(value: str, default: bool = False) -> bool:
|
| 18 |
+
if value is None:
|
| 19 |
+
return default
|
| 20 |
+
value = _clean_env_value(value).lower()
|
| 21 |
+
return value in {"1", "true", "yes", "y", "on"}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class Settings:
|
| 26 |
+
api_keys: list[str]
|
| 27 |
+
base_url: str
|
| 28 |
+
model: str
|
| 29 |
+
max_workers: int
|
| 30 |
+
video_input_mode: str
|
| 31 |
+
video_mime_type: str
|
| 32 |
+
video_fps: int
|
| 33 |
+
output_dir: str
|
| 34 |
+
|
| 35 |
+
enable_rag_final: bool
|
| 36 |
+
|
| 37 |
+
alibaba_cloud_access_key_id: str
|
| 38 |
+
alibaba_cloud_access_key_secret: str
|
| 39 |
+
bailian_workspace_id: str
|
| 40 |
+
bailian_index_id: str
|
| 41 |
+
bailian_retrieve_topn: int
|
| 42 |
+
bailian_retrieve_enable_rerank: bool
|
| 43 |
+
bailian_retrieve_dense_topk: int
|
| 44 |
+
bailian_retrieve_sparse_topk: int
|
| 45 |
+
bailian_retrieve_min_score: float
|
| 46 |
+
|
| 47 |
+
enable_video_preprocess: bool
|
| 48 |
+
video_preprocess_mode: str
|
| 49 |
+
video_preprocess_remove_audio: bool
|
| 50 |
+
preprocessed_video_dir: str
|
| 51 |
+
|
| 52 |
+
# 新增:音频智能体
|
| 53 |
+
enable_audio_agent: bool
|
| 54 |
+
audio_asr_model: str
|
| 55 |
+
audio_chunk_seconds: int
|
| 56 |
+
extracted_audio_dir: str
|
| 57 |
+
|
| 58 |
+
@staticmethod
|
| 59 |
+
def load() -> "Settings":
|
| 60 |
+
raw_keys = _clean_env_value(os.getenv("DASHSCOPE_API_KEYS", ""))
|
| 61 |
+
api_keys = []
|
| 62 |
+
for item in raw_keys.split(","):
|
| 63 |
+
k = _clean_env_value(item)
|
| 64 |
+
if k:
|
| 65 |
+
api_keys.append(k)
|
| 66 |
+
|
| 67 |
+
if not api_keys:
|
| 68 |
+
raise ValueError("DASHSCOPE_API_KEYS 为空,请在 .env 中配置至少一个有效 API Key。")
|
| 69 |
+
|
| 70 |
+
video_input_mode = _clean_env_value(os.getenv("VIDEO_INPUT_MODE", "base64")).lower()
|
| 71 |
+
if video_input_mode not in {"base64", "remote_url"}:
|
| 72 |
+
raise ValueError("VIDEO_INPUT_MODE 只能是 base64 或 remote_url。")
|
| 73 |
+
|
| 74 |
+
enable_rag_final = _to_bool(os.getenv("ENABLE_RAG_FINAL", "false"))
|
| 75 |
+
|
| 76 |
+
settings = Settings(
|
| 77 |
+
api_keys=api_keys,
|
| 78 |
+
base_url=_clean_env_value(os.getenv("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")),
|
| 79 |
+
model=_clean_env_value(os.getenv("QWEN_MODEL", "qwen3.5-plus")),
|
| 80 |
+
max_workers=int(_clean_env_value(os.getenv("MAX_WORKERS", "6"))),
|
| 81 |
+
video_input_mode=video_input_mode,
|
| 82 |
+
video_mime_type=_clean_env_value(os.getenv("VIDEO_MIME_TYPE", "video/mp4")),
|
| 83 |
+
video_fps=int(_clean_env_value(os.getenv("VIDEO_FPS", "2"))),
|
| 84 |
+
output_dir=_clean_env_value(os.getenv("OUTPUT_DIR", "outputs")),
|
| 85 |
+
enable_rag_final=enable_rag_final,
|
| 86 |
+
alibaba_cloud_access_key_id=_clean_env_value(os.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID", "")),
|
| 87 |
+
alibaba_cloud_access_key_secret=_clean_env_value(os.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "")),
|
| 88 |
+
bailian_workspace_id=_clean_env_value(os.getenv("BAILIAN_WORKSPACE_ID", "")),
|
| 89 |
+
bailian_index_id=_clean_env_value(os.getenv("BAILIAN_INDEX_ID", "")),
|
| 90 |
+
bailian_retrieve_topn=int(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_TOPN", "6"))),
|
| 91 |
+
bailian_retrieve_enable_rerank=_to_bool(os.getenv("BAILIAN_RETRIEVE_ENABLE_RERANK", "true")),
|
| 92 |
+
bailian_retrieve_dense_topk=int(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_DENSE_TOPK", "20"))),
|
| 93 |
+
bailian_retrieve_sparse_topk=int(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_SPARSE_TOPK", "20"))),
|
| 94 |
+
bailian_retrieve_min_score=float(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_MIN_SCORE", "0.15"))),
|
| 95 |
+
enable_video_preprocess=_to_bool(os.getenv("ENABLE_VIDEO_PREPROCESS", "true")),
|
| 96 |
+
video_preprocess_mode=_clean_env_value(os.getenv("VIDEO_PREPROCESS_MODE", "analysis")),
|
| 97 |
+
video_preprocess_remove_audio=_to_bool(os.getenv("VIDEO_PREPROCESS_REMOVE_AUDIO", "false")),
|
| 98 |
+
preprocessed_video_dir=_clean_env_value(os.getenv("PREPROCESSED_VIDEO_DIR", "preprocessed_videos")),
|
| 99 |
+
|
| 100 |
+
enable_audio_agent=_to_bool(os.getenv("ENABLE_AUDIO_AGENT", "true")),
|
| 101 |
+
audio_asr_model=_clean_env_value(os.getenv("AUDIO_ASR_MODEL", "qwen3-asr-flash")),
|
| 102 |
+
audio_chunk_seconds=int(_clean_env_value(os.getenv("AUDIO_CHUNK_SECONDS", "290"))),
|
| 103 |
+
extracted_audio_dir=_clean_env_value(os.getenv("EXTRACTED_AUDIO_DIR", "extracted_audio")),
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
if settings.enable_rag_final:
|
| 107 |
+
missing = []
|
| 108 |
+
if not settings.alibaba_cloud_access_key_id:
|
| 109 |
+
missing.append("ALIBABA_CLOUD_ACCESS_KEY_ID")
|
| 110 |
+
if not settings.alibaba_cloud_access_key_secret:
|
| 111 |
+
missing.append("ALIBABA_CLOUD_ACCESS_KEY_SECRET")
|
| 112 |
+
if not settings.bailian_workspace_id:
|
| 113 |
+
missing.append("BAILIAN_WORKSPACE_ID")
|
| 114 |
+
if not settings.bailian_index_id:
|
| 115 |
+
missing.append("BAILIAN_INDEX_ID")
|
| 116 |
+
if missing:
|
| 117 |
+
raise ValueError(
|
| 118 |
+
"ENABLE_RAG_FINAL=true 时,以下环境变量必须配置:" + ", ".join(missing)
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
return settings
|