File size: 5,484 Bytes
04cc0de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from __future__ import annotations

import os
from dataclasses import dataclass
from dotenv import load_dotenv

load_dotenv()


def _clean_env_value(value: str) -> str:
    value = (value or "").strip()
    if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
        value = value[1:-1].strip()
    return value


def _to_bool(value: str, default: bool = False) -> bool:
    if value is None:
        return default
    value = _clean_env_value(value).lower()
    return value in {"1", "true", "yes", "y", "on"}


@dataclass
class Settings:
    api_keys: list[str]
    base_url: str
    model: str
    max_workers: int
    video_input_mode: str
    video_mime_type: str
    video_fps: int
    output_dir: str

    enable_rag_final: bool

    alibaba_cloud_access_key_id: str
    alibaba_cloud_access_key_secret: str
    bailian_workspace_id: str
    bailian_index_id: str
    bailian_retrieve_topn: int
    bailian_retrieve_enable_rerank: bool
    bailian_retrieve_dense_topk: int
    bailian_retrieve_sparse_topk: int
    bailian_retrieve_min_score: float

    enable_video_preprocess: bool
    video_preprocess_mode: str
    video_preprocess_remove_audio: bool
    preprocessed_video_dir: str

    enable_audio_agent: bool
    audio_asr_model: str
    audio_chunk_seconds: int
    extracted_audio_dir: str

    @staticmethod
    def load() -> "Settings":
        raw_keys = _clean_env_value(os.getenv("DASHSCOPE_API_KEYS", ""))
        api_keys = []
        for item in raw_keys.split(","):
            k = _clean_env_value(item)
            if k:
                api_keys.append(k)

        if not api_keys:
            raise ValueError("DASHSCOPE_API_KEYS 为空,请在 .env 中配置至少一个有效 API Key。")

        video_input_mode = _clean_env_value(os.getenv("VIDEO_INPUT_MODE", "base64")).lower()
        if video_input_mode not in {"base64", "remote_url"}:
            raise ValueError("VIDEO_INPUT_MODE 只能是 base64 或 remote_url。")

        enable_rag_final = _to_bool(os.getenv("ENABLE_RAG_FINAL", "false"))

        settings = Settings(
            api_keys=api_keys,
            base_url=_clean_env_value(
                os.getenv("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
            ),
            model=_clean_env_value(os.getenv("QWEN_MODEL", "qwen3.5-plus")),

            # 云端更稳:默认从 6 改成 2
            max_workers=int(_clean_env_value(os.getenv("MAX_WORKERS", "2"))),

            video_input_mode=video_input_mode,
            video_mime_type=_clean_env_value(os.getenv("VIDEO_MIME_TYPE", "video/mp4")),
            video_fps=int(_clean_env_value(os.getenv("VIDEO_FPS", "2"))),
            output_dir=_clean_env_value(os.getenv("OUTPUT_DIR", "outputs")),

            enable_rag_final=enable_rag_final,

            alibaba_cloud_access_key_id=_clean_env_value(os.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID", "")),
            alibaba_cloud_access_key_secret=_clean_env_value(os.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET", "")),
            bailian_workspace_id=_clean_env_value(os.getenv("BAILIAN_WORKSPACE_ID", "")),
            bailian_index_id=_clean_env_value(os.getenv("BAILIAN_INDEX_ID", "")),
            bailian_retrieve_topn=int(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_TOPN", "6"))),
            bailian_retrieve_enable_rerank=_to_bool(os.getenv("BAILIAN_RETRIEVE_ENABLE_RERANK", "true")),
            bailian_retrieve_dense_topk=int(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_DENSE_TOPK", "20"))),
            bailian_retrieve_sparse_topk=int(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_SPARSE_TOPK", "20"))),
            bailian_retrieve_min_score=float(_clean_env_value(os.getenv("BAILIAN_RETRIEVE_MIN_SCORE", "0.15"))),

            enable_video_preprocess=_to_bool(os.getenv("ENABLE_VIDEO_PREPROCESS", "true")),

            # 云端更稳:默认从 analysis 改成 preview
            video_preprocess_mode=_clean_env_value(os.getenv("VIDEO_PREPROCESS_MODE", "preview")),

            video_preprocess_remove_audio=_to_bool(os.getenv("VIDEO_PREPROCESS_REMOVE_AUDIO", "false")),
            preprocessed_video_dir=_clean_env_value(os.getenv("PREPROCESSED_VIDEO_DIR", "preprocessed_videos")),

            enable_audio_agent=_to_bool(os.getenv("ENABLE_AUDIO_AGENT", "true")),
            audio_asr_model=_clean_env_value(os.getenv("AUDIO_ASR_MODEL", "qwen3-asr-flash")),
            audio_chunk_seconds=int(_clean_env_value(os.getenv("AUDIO_CHUNK_SECONDS", "290"))),
            extracted_audio_dir=_clean_env_value(os.getenv("EXTRACTED_AUDIO_DIR", "extracted_audio")),
        )

        if settings.enable_rag_final:
            missing = []
            if not settings.alibaba_cloud_access_key_id:
                missing.append("ALIBABA_CLOUD_ACCESS_KEY_ID")
            if not settings.alibaba_cloud_access_key_secret:
                missing.append("ALIBABA_CLOUD_ACCESS_KEY_SECRET")
            if not settings.bailian_workspace_id:
                missing.append("BAILIAN_WORKSPACE_ID")
            if not settings.bailian_index_id:
                missing.append("BAILIAN_INDEX_ID")
            if missing:
                raise ValueError(
                    "ENABLE_RAG_FINAL=true 时,以下环境变量必须配置:" + ", ".join(missing)
                )
        return settings