Spaces:

Kirito-Lab
/

PaperX

Running

App Files Files Community

Laramie2 commited on Mar 10

Commit

338ff5b

verified ·

1 Parent(s): da7bfe5

Update src/refinement/refinement.py

Browse files

Files changed (1) hide show

src/refinement/refinement.py +41 -26

src/refinement/refinement.py CHANGED Viewed

@@ -5,6 +5,7 @@ import json
 import time
 import PIL.Image
 import shutil
 from PIL import Image
 from pathlib import Path
 from openai import OpenAI
@@ -401,8 +402,7 @@ def refine_one_slide(input_path, output_path, prompts, outline, max_iterations,
     take_screenshot(current_input, final_screenshot_path)
     print(f"\n📷 Final screenshot saved: {final_screenshot_path}")
-def refinement_ppt(input_index, prompts, max_iterations=3, model="gpt-4o", config=None):
     # 1. 定义路径
     outline_path = os.path.join(input_index, "outline.json")
     output_index = os.path.join(input_index, "final")
@@ -412,7 +412,6 @@ def refinement_ppt(input_index, prompts, max_iterations=3, model="gpt-4o", confi
     os.makedirs(output_index, exist_ok=True)
     # 将图片复制到final/images目录下
-    import shutil
     source_images_dir = os.path.join(input_index, "images")
     if os.path.exists(source_images_dir):
         shutil.copytree(source_images_dir, output_index_images, dirs_exist_ok=True)
@@ -441,25 +440,40 @@ def refinement_ppt(input_index, prompts, max_iterations=3, model="gpt-4o", confi
     # 3.2 定义排序 Key：直接提取开头的数字
     def get_file_number(filename):
-        # 因为上一步已经过滤过了，这里可以直接提取
         return int(filename.split('_')[0])
     # 3.3 执行排序 (这步是关键，确保 2 在 10 前面)
     sorted_files = sorted(target_files, key=get_file_number)
-    # Debug: 打印前几个文件确认顺序
-    print(f"👀 排序后文件列表前5个: {sorted_files[:5]}")
-    # 4. 遍历排序后的列表
     for file_name in sorted_files:
-        # 直接提取序号 (之前已经验证过格式了)
         num = str(get_file_number(file_name))
         # 获取当前 html 对应的 outline
         outline = outline_full.get(int(num)-1)
-        # 【容错逻辑】处理索引偏移 (例如文件是 1_ppt，但列表是从 0 开始)
-        # 如果 outline 为空，且 num-1 存在，则尝试自动回退
         if outline is None and str(int(num)-1) in outline_full:
             print(f"ℹ️ 尝试修正索引: 文件 {num} -> 使用大纲 {int(num)-1}")
             outline = outline_full.get(str(int(num)-1))
@@ -468,27 +482,28 @@ def refinement_ppt(input_index, prompts, max_iterations=3, model="gpt-4o", confi
             print(f"⚠️ 跳过 {file_name}: 在 outline.json 中找不到序号 {num} 或 {int(num)-1}")
             continue
-        # 构建路径
         html_file_path = os.path.join(input_index, file_name)
         html_file_path_refine = os.path.join(output_index, file_name)
-        print(f"📝 [顺序处理中] 正在优化: {file_name} (对应大纲 Key: {num})")
-        # 6. 调用优化函数
-        try:
-            refine_one_slide(
-                input_path=html_file_path,
-                output_path=html_file_path_refine,
-                prompts=prompts,
-                outline=outline,
-                max_iterations=max_iterations,
-                model=model,
-                config=config
-            )
-        except Exception as e:
-            print(f"❌ 处理 {file_name} 时出错: {e}")
-    print(f"✅ 所有文件处理完成，结果保存在: {output_index}")
 def refinement_poster(input_html_path, prompts, output_html_path, model, config=None):
     # ---------------- 0. 配置准备 ----------------

 import time
 import PIL.Image
 import shutil
+import concurrent.futures
 from PIL import Image
 from pathlib import Path
 from openai import OpenAI
     take_screenshot(current_input, final_screenshot_path)
     print(f"\n📷 Final screenshot saved: {final_screenshot_path}")
+def refinement_ppt(input_index, prompts, max_iterations=3, model="gpt-4o", config=None, max_workers=5):
     # 1. 定义路径
     outline_path = os.path.join(input_index, "outline.json")
     output_index = os.path.join(input_index, "final")
     os.makedirs(output_index, exist_ok=True)
     # 将图片复制到final/images目录下
     source_images_dir = os.path.join(input_index, "images")
     if os.path.exists(source_images_dir):
         shutil.copytree(source_images_dir, output_index_images, dirs_exist_ok=True)
     # 3.2 定义排序 Key：直接提取开头的数字
     def get_file_number(filename):
         return int(filename.split('_')[0])
     # 3.3 执行排序 (这步是关键，确保 2 在 10 前面)
     sorted_files = sorted(target_files, key=get_file_number)
+    print(f"👀 找到文件 {len(sorted_files)} 个，准备进行并发优化...")
+    # 定义单个任务的处理逻辑
+    def _worker(file_name, num, outline, html_input, html_output):
+        print(f"📝 [并发处理中] 正在优化: {file_name} (对应大纲 Key: {num})")
+        try:
+            refine_one_slide(
+                input_path=html_input,
+                output_path=html_output,
+                prompts=prompts,
+                outline=outline,
+                max_iterations=max_iterations,
+                model=model,
+                config=config
+            )
+            return file_name, True, None
+        except Exception as e:
+            return file_name, False, str(e)
+    # 4. 收集需要并发执行的任务
+    tasks = []
     for file_name in sorted_files:
+        # 直接提取序号
         num = str(get_file_number(file_name))
         # 获取当前 html 对应的 outline
         outline = outline_full.get(int(num)-1)
+        # 【容错逻辑】处理索引偏移
         if outline is None and str(int(num)-1) in outline_full:
             print(f"ℹ️ 尝试修正索引: 文件 {num} -> 使用大纲 {int(num)-1}")
             outline = outline_full.get(str(int(num)-1))
             print(f"⚠️ 跳过 {file_name}: 在 outline.json 中找不到序号 {num} 或 {int(num)-1}")
             continue
+        # 构建路径并添加到任务列表
         html_file_path = os.path.join(input_index, file_name)
         html_file_path_refine = os.path.join(output_index, file_name)
+        tasks.append((file_name, num, outline, html_file_path, html_file_path_refine))
+    # 5. 使用线程池并发执行任务
+    print(f"⚡ 启动线程池，最大并发数: {max_workers}")
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+        # 提交所有任务
+        futures = {executor.submit(_worker, *task): task for task in tasks}
+        # 收集结果
+        for future in concurrent.futures.as_completed(futures):
+            file_name, success, error_msg = future.result()
+            if success:
+                print(f"✅ 成功完成: {file_name}")
+            else:
+                print(f"❌ 处理 {file_name} 时出错: {error_msg}")
+    print(f"🎉 所有文件处理完成，结果保存在: {output_index}")
 def refinement_poster(input_html_path, prompts, output_html_path, model, config=None):
     # ---------------- 0. 配置准备 ----------------