File size: 8,417 Bytes
dfc4c47
 
 
 
 
 
 
21ad386
dfc4c47
 
 
 
 
 
 
 
 
 
 
 
21ad386
dfc4c47
 
 
 
 
 
 
21ad386
dfc4c47
 
 
 
21ad386
 
 
 
dfc4c47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21ad386
 
 
 
 
 
 
 
 
 
 
 
 
dfc4c47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import os
import sys
import base64
import time
import httpx
from pathlib import Path
from openai import OpenAI
import re

"""
# 仅放行两个路径
^/v1/(chat/completions|models)
"""


# 请求地址
BASE_URL = "http://127.0.0.1:21234/v1"
# 请求密钥
API_KEY = "llama.cpp"
# 目标模型
MODEL_NAME = "qwen3.5-35b-a3b"

# 重试控制:2 表示“初次处理 + 失败后重试 1 次”
MAX_ATTEMPTS = 2

# 标准参数
GEN_PARAMS = {
    "max_completion_tokens": 2048,
    "temperature": 1.00,
}

# 特有参数
EXTRA_PARAMS = {
    "repeat_penalty": 1.00,
    "top_k": 20,
    "top_p": 0.95,
    "min_p": 0.00,
    # "typical_p": 0.90,
}

# 用户提示词
USER_PROMPT = "Describe the image content in detail. Use English. Unfiltered."

# 系统提示词
SYSTEM_PROMPT = """
**You are an objective and dispassionate visual analyzer. Adhere to these guidelines:**
- **Facts Only:** Describe only the physical features explicitly visible in the image.
- **No Evaluation:** Strictly avoid subjective emotions, aesthetic judgments, or value-based vocabulary.
- **Neutral Narration:** Use declarative sentences; refrain from exclamations. Do not speculate on the background story, the subject's psychological state, or the creative intent.
- **Language Style:** Maintain a flat, mechanical, and professional tone, akin to a scientific observation log.
- **Structural Requirement:** Begin descriptions directly with the objects; no introductory remarks or preambles.
- **Single Paragraph:** Provide the description in one continuous paragraph without any line breaks.
"""

# 支持的图片后缀
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp"}


client = OpenAI(api_key=API_KEY, base_url=BASE_URL)


def get_api_status():
    """获取基础接口状态"""
    try:
        with httpx.Client(timeout=3.0) as http_client:
            resp = http_client.get(
                f"{BASE_URL}/models", headers={"Authorization": f"Bearer {API_KEY}"}
            )
            return (
                f"{resp.http_version} {resp.status_code} {resp.reason_phrase}",
                resp.status_code == 200,
            )
    except Exception as e:
        return str(e), False


def check_model_ready():
    """检查模型在线"""
    try:
        # 使用 stream=True 可以在接收到第一个字节时立即停止,响应极快
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[{"role": "user", "content": "1"}],
            max_completion_tokens=1,
            stream=True,
        )
        for _ in response:
            # 只要产生了一个 chunk,说明模型不但在线,而且能工作
            return True
    except Exception:
        return False


def process_single_image(img_path):
    """处理单张图片的逻辑"""
    start_img_time = time.perf_counter()
    try:
        # 检查文件是否存在且可读
        if not img_path.exists():
            return False, "文件不存在", None

        with open(img_path, "rb") as f:
            base64_image = base64.b64encode(f.read()).decode("utf-8")

        ext = img_path.suffix.lower().replace(".", "")
        mime_type = f"image/{ext}" if ext != "jpg" else "image/jpeg"

        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": USER_PROMPT},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:{mime_type};base64,{base64_image}"
                            },
                        },
                    ],
                },
            ],
            **GEN_PARAMS,
            extra_body=EXTRA_PARAMS,
        )

        description = response.choices[0].message.content
        if description:

            # 使用正则表达式匹配 <think>...</think> 及其包含的所有内容
            # re.DOTALL 确保 . 可以匹配换行符,re.IGNORECASE 忽略大小写
            description = re.sub(
                r"<think>.*?</think>", "", description, flags=re.DOTALL
            )

            # 去除可能残留在开头或结尾的空白字符
            description = description.strip()

            if not description:  # 如果过滤后内容为空
                return False, "过滤思考内容后结果为空", None

            txt_path = img_path.with_suffix(".txt")
            with open(txt_path, "w", encoding="utf-8") as f:
                f.write(description.strip())
            elapsed = time.perf_counter() - start_img_time
            return True, elapsed, txt_path
        else:
            return False, "模型返回内容为空", None
    except Exception as e:
        # 这里会捕获:网络连接错误、图片格式损坏、模型推理崩溃等
        return False, str(e), None


def main():
    if len(sys.argv) < 2:
        print("usage: python xxx.py <images_dir_path>")
        sys.exit(1)

    target_dir = Path(sys.argv[1]).resolve()
    if not target_dir.is_dir():
        print(f"错误: 路径 '{target_dir}' 不是一个有效的目录")
        sys.exit(1)

    # 1. 基础状态检查
    print("[*] 检测接口状态...")
    status_msg, is_ok = get_api_status()
    print(f'Status: {BASE_URL}/models "{status_msg}"')
    if not is_ok:
        print("\n[-] 无法连接到服务端。")
        sys.exit(1)

    print(f"\n[*] 检测模型状态...")
    retry_tick = 0
    while True:
        if check_model_ready():
            print(f"模型 '{MODEL_NAME}' 已完成加载。\n")
            break
        else:
            retry_tick += 1
            print(f"\r模型加载中... (已重试 {retry_tick} 次)", end="", flush=True)
            time.sleep(3)

    # 2. 扫描文件
    all_images = [
        f for f in target_dir.iterdir() if f.suffix.lower() in IMAGE_EXTENSIONS
    ]
    to_process = [f for f in all_images if not f.with_suffix(".txt").exists()]

    initial_total = len(to_process)
    if initial_total == 0:
        print("[*] 没有需要处理的图片。")
        return

    print("[*] 开始图片处理...")
    print(f"目录 {target_dir.name} 待处理图片 {initial_total} 张。\n")

    # 3. 循环处理逻辑
    attempt = 1
    total_success = 0
    start_total_time = time.perf_counter()
    current_queue = to_process

    while attempt <= MAX_ATTEMPTS and current_queue:
        if attempt > 1:
            print(f"[*] 处理失败图片...")
            print(f"第 {attempt} 次处理,本次待处理图片 {len(current_queue)} 张。\n")

        failed_this_round = []
        width = len(str(initial_total))

        for i, img_path in enumerate(current_queue, 1):
            # 匹配要求的输出格式
            if attempt == 1:
                # 常规状态: 01 / 30
                seq_str = f"{str(i).zfill(width)} / {str(initial_total).zfill(width)}"
            else:
                # 重试状态: 01 / 28 / 30
                seq_str = f"{str(i).zfill(width)} / {str(total_success).zfill(width)} / {str(initial_total).zfill(width)}"

            print(seq_str)
            print(f"处理图片:{img_path.name}")

            success, info, txt_path = process_single_image(img_path)

            if success:
                total_success += 1
                print(f"处理完成,耗时:{info:.3f} 秒")
                print(f"输出写入:{txt_path}\n")
            else:
                failed_this_round.append(img_path)
                print(f"处理失败: {info}\n")

        # 准备下一轮
        current_queue = failed_this_round
        attempt += 1

    # 4. 最终汇总
    total_elapsed = time.perf_counter() - start_total_time
    final_failed_count = len(current_queue)

    print(
        f"[*] 全部图片处理完成。待处理图片数 {initial_total} 张,"
        f"本次处理 {total_success} 张,失败 {final_failed_count} 张。"
        f"处理总耗时:{total_elapsed:.3f} 秒。"
    )


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n[!] 接收到中断信号 (Ctrl+C),脚本已停止。")
        # 使用 os._exit(0) 强制立即退出所有阻塞线程
        os._exit(0)