File size: 8,417 Bytes
dfc4c47 21ad386 dfc4c47 21ad386 dfc4c47 21ad386 dfc4c47 21ad386 dfc4c47 21ad386 dfc4c47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 | import os
import sys
import base64
import time
import httpx
from pathlib import Path
from openai import OpenAI
import re
"""
# 仅放行两个路径
^/v1/(chat/completions|models)
"""
# 请求地址
BASE_URL = "http://127.0.0.1:21234/v1"
# 请求密钥
API_KEY = "llama.cpp"
# 目标模型
MODEL_NAME = "qwen3.5-35b-a3b"
# 重试控制:2 表示“初次处理 + 失败后重试 1 次”
MAX_ATTEMPTS = 2
# 标准参数
GEN_PARAMS = {
"max_completion_tokens": 2048,
"temperature": 1.00,
}
# 特有参数
EXTRA_PARAMS = {
"repeat_penalty": 1.00,
"top_k": 20,
"top_p": 0.95,
"min_p": 0.00,
# "typical_p": 0.90,
}
# 用户提示词
USER_PROMPT = "Describe the image content in detail. Use English. Unfiltered."
# 系统提示词
SYSTEM_PROMPT = """
**You are an objective and dispassionate visual analyzer. Adhere to these guidelines:**
- **Facts Only:** Describe only the physical features explicitly visible in the image.
- **No Evaluation:** Strictly avoid subjective emotions, aesthetic judgments, or value-based vocabulary.
- **Neutral Narration:** Use declarative sentences; refrain from exclamations. Do not speculate on the background story, the subject's psychological state, or the creative intent.
- **Language Style:** Maintain a flat, mechanical, and professional tone, akin to a scientific observation log.
- **Structural Requirement:** Begin descriptions directly with the objects; no introductory remarks or preambles.
- **Single Paragraph:** Provide the description in one continuous paragraph without any line breaks.
"""
# 支持的图片后缀
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp"}
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
def get_api_status():
"""获取基础接口状态"""
try:
with httpx.Client(timeout=3.0) as http_client:
resp = http_client.get(
f"{BASE_URL}/models", headers={"Authorization": f"Bearer {API_KEY}"}
)
return (
f"{resp.http_version} {resp.status_code} {resp.reason_phrase}",
resp.status_code == 200,
)
except Exception as e:
return str(e), False
def check_model_ready():
"""检查模型在线"""
try:
# 使用 stream=True 可以在接收到第一个字节时立即停止,响应极快
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[{"role": "user", "content": "1"}],
max_completion_tokens=1,
stream=True,
)
for _ in response:
# 只要产生了一个 chunk,说明模型不但在线,而且能工作
return True
except Exception:
return False
def process_single_image(img_path):
"""处理单张图片的逻辑"""
start_img_time = time.perf_counter()
try:
# 检查文件是否存在且可读
if not img_path.exists():
return False, "文件不存在", None
with open(img_path, "rb") as f:
base64_image = base64.b64encode(f.read()).decode("utf-8")
ext = img_path.suffix.lower().replace(".", "")
mime_type = f"image/{ext}" if ext != "jpg" else "image/jpeg"
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{
"role": "user",
"content": [
{"type": "text", "text": USER_PROMPT},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{base64_image}"
},
},
],
},
],
**GEN_PARAMS,
extra_body=EXTRA_PARAMS,
)
description = response.choices[0].message.content
if description:
# 使用正则表达式匹配 <think>...</think> 及其包含的所有内容
# re.DOTALL 确保 . 可以匹配换行符,re.IGNORECASE 忽略大小写
description = re.sub(
r"<think>.*?</think>", "", description, flags=re.DOTALL
)
# 去除可能残留在开头或结尾的空白字符
description = description.strip()
if not description: # 如果过滤后内容为空
return False, "过滤思考内容后结果为空", None
txt_path = img_path.with_suffix(".txt")
with open(txt_path, "w", encoding="utf-8") as f:
f.write(description.strip())
elapsed = time.perf_counter() - start_img_time
return True, elapsed, txt_path
else:
return False, "模型返回内容为空", None
except Exception as e:
# 这里会捕获:网络连接错误、图片格式损坏、模型推理崩溃等
return False, str(e), None
def main():
if len(sys.argv) < 2:
print("usage: python xxx.py <images_dir_path>")
sys.exit(1)
target_dir = Path(sys.argv[1]).resolve()
if not target_dir.is_dir():
print(f"错误: 路径 '{target_dir}' 不是一个有效的目录")
sys.exit(1)
# 1. 基础状态检查
print("[*] 检测接口状态...")
status_msg, is_ok = get_api_status()
print(f'Status: {BASE_URL}/models "{status_msg}"')
if not is_ok:
print("\n[-] 无法连接到服务端。")
sys.exit(1)
print(f"\n[*] 检测模型状态...")
retry_tick = 0
while True:
if check_model_ready():
print(f"模型 '{MODEL_NAME}' 已完成加载。\n")
break
else:
retry_tick += 1
print(f"\r模型加载中... (已重试 {retry_tick} 次)", end="", flush=True)
time.sleep(3)
# 2. 扫描文件
all_images = [
f for f in target_dir.iterdir() if f.suffix.lower() in IMAGE_EXTENSIONS
]
to_process = [f for f in all_images if not f.with_suffix(".txt").exists()]
initial_total = len(to_process)
if initial_total == 0:
print("[*] 没有需要处理的图片。")
return
print("[*] 开始图片处理...")
print(f"目录 {target_dir.name} 待处理图片 {initial_total} 张。\n")
# 3. 循环处理逻辑
attempt = 1
total_success = 0
start_total_time = time.perf_counter()
current_queue = to_process
while attempt <= MAX_ATTEMPTS and current_queue:
if attempt > 1:
print(f"[*] 处理失败图片...")
print(f"第 {attempt} 次处理,本次待处理图片 {len(current_queue)} 张。\n")
failed_this_round = []
width = len(str(initial_total))
for i, img_path in enumerate(current_queue, 1):
# 匹配要求的输出格式
if attempt == 1:
# 常规状态: 01 / 30
seq_str = f"{str(i).zfill(width)} / {str(initial_total).zfill(width)}"
else:
# 重试状态: 01 / 28 / 30
seq_str = f"{str(i).zfill(width)} / {str(total_success).zfill(width)} / {str(initial_total).zfill(width)}"
print(seq_str)
print(f"处理图片:{img_path.name}")
success, info, txt_path = process_single_image(img_path)
if success:
total_success += 1
print(f"处理完成,耗时:{info:.3f} 秒")
print(f"输出写入:{txt_path}\n")
else:
failed_this_round.append(img_path)
print(f"处理失败: {info}\n")
# 准备下一轮
current_queue = failed_this_round
attempt += 1
# 4. 最终汇总
total_elapsed = time.perf_counter() - start_total_time
final_failed_count = len(current_queue)
print(
f"[*] 全部图片处理完成。待处理图片数 {initial_total} 张,"
f"本次处理 {total_success} 张,失败 {final_failed_count} 张。"
f"处理总耗时:{total_elapsed:.3f} 秒。"
)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n[!] 接收到中断信号 (Ctrl+C),脚本已停止。")
# 使用 os._exit(0) 强制立即退出所有阻塞线程
os._exit(0)
|