Spaces:

leonsimon23
/

exactEMR

Sleeping

App Files Files Community

leonsimon23 commited on Nov 3, 2025

Commit

251413d

verified ·

1 Parent(s): 052cb21

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -22

app.py CHANGED Viewed

@@ -149,6 +149,8 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
     if not input_text.strip():
         raise gr.Error("⚠️ 请输入待提取的文本内容")
     try:
         examples_data = json.loads(examples_json)
         examples = [
@@ -159,7 +161,9 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
         ]
     except (json.JSONDecodeError, KeyError) as e:
         raise gr.Error(f"❌ 示例JSON格式错误: {e}")
     try:
         os.environ['LANGEXTRACT_API_KEY'] = api_key
@@ -167,16 +171,14 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
             text_or_documents=input_text,
             prompt_description=prompt,
             examples=examples,
-            model_id="gemini-2.5-flash",
         )
-        # langextract 返回的是列表，取第一个结果
         if isinstance(results, list) and len(results) > 0:
             result = results[0]
         else:
             result = results
-        # 手动构建 extraction 字典
         extractions_list = []
         if hasattr(result, 'extractions'):
             for ext in result.extractions:
@@ -188,33 +190,39 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
                 }
                 extractions_list.append(ext_dict)
-        # 构建输出字典
         output_dict = {
             "source_text": result.text if hasattr(result, 'text') else input_text,
             "extractions": extractions_list
         }
-        # 保存文件
-        #with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
-            #if isinstance(results, list):
-                #lx.io.save_annotated_documents(results, file_path=tmp_file.name)
-            #else:
-                #lx.io.save_annotated_documents([results], file_path=tmp_file.name)
-            #download_path = tmp_file.name
-        # 保存文件
-        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
-            if isinstance(results, list):
-                # 修正：直接传递路径作为第二个参数
-                lx.io.save_annotated_documents(results, tmp_file.name)
             else:
-                # 修正：直接传递路径作为第二个参数
-                lx.io.save_annotated_documents([results], tmp_file.name)
-            download_path = tmp_file.name
         stats = generate_statistics(output_dict)
         history = save_to_history(input_text, output_dict, template_name)
@@ -226,6 +234,7 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
         error_detail = traceback.format_exc()
         print(f"详细错误信息:\n{error_detail}")
         raise gr.Error(f"❌ 提取失败: {str(e)}")
 def load_template(template_name):
     """加载预设模板"""

     if not input_text.strip():
         raise gr.Error("⚠️ 请输入待提取的文本内容")
     try:
         examples_data = json.loads(examples_json)
         examples = [
         ]
     except (json.JSONDecodeError, KeyError) as e:
         raise gr.Error(f"❌ 示例JSON格式错误: {e}")
+    # ... 其他代码保持不变 ...
     try:
         os.environ['LANGEXTRACT_API_KEY'] = api_key
             text_or_documents=input_text,
             prompt_description=prompt,
             examples=examples,
+            model_id="gemini-2.5-flash-latest", # 建议使用最新的模型
         )
         if isinstance(results, list) and len(results) > 0:
             result = results[0]
         else:
             result = results
         extractions_list = []
         if hasattr(result, 'extractions'):
             for ext in result.extractions:
                 }
                 extractions_list.append(ext_dict)
         output_dict = {
             "source_text": result.text if hasattr(result, 'text') else input_text,
             "extractions": extractions_list
         }
+        # --- START: 修改后的文件保存逻辑 ---
+        # 1. 创建一个临时的、持久化的文件，用于最终的下载
+        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as final_output_file:
+            download_path = final_output_file.name
+        # 2. 创建一个临时目录，供 langextract 库使用
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # 准备好要保存的文档列表
+            documents_to_save = results if isinstance(results, list) else [results]
+            # 3. 让库将结果保存到这个临时目录中
+            lx.io.save_annotated_documents(documents_to_save, tmp_dir)
+            # 4. 库通常会生成一个名为 'output.jsonl' 的文件，找到它
+            source_file_path = os.path.join(tmp_dir, 'output.jsonl')
+            # 5. 将生成的文件内容，复制到我们为 Gradio 准备的最终文件中
+            if os.path.exists(source_file_path):
+                with open(source_file_path, 'r', encoding='utf-8') as src_file:
+                    with open(download_path, 'w', encoding='utf-8') as dest_file:
+                        dest_file.write(src_file.read())
             else:
+                # 如果没有生成文件，就直接保存我们构造的JSON字典
+                with open(download_path, 'w', encoding='utf-8') as dest_file:
+                    json.dump(output_dict, dest_file, ensure_ascii=False, indent=2)
+        # --- END: 修改后的文件保存逻辑 ---
         stats = generate_statistics(output_dict)
         history = save_to_history(input_text, output_dict, template_name)
         error_detail = traceback.format_exc()
         print(f"详细错误信息:\n{error_detail}")
         raise gr.Error(f"❌ 提取失败: {str(e)}")
 def load_template(template_name):
     """加载预设模板"""