Spaces:

leonsimon23
/

pharmextract

Sleeping

App Files Files Community

leonsimon23 commited on Nov 3, 2025

Commit

20b7ba6

verified ·

1 Parent(s): f1edc45

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -11

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ def get_extraction_config():
     ]
     return prompt_description, examples
-# --- 2. Gradio 的核心处理函数 (已修复) ---
 def process_input_and_visualize(input_text, input_file):
     """
     接收文本或文件输入，解析内容，调用 LangExtract 的高级功能进行处理，并返回结果。
@@ -69,14 +69,11 @@ def process_input_and_visualize(input_text, input_file):
     except Exception as e:
         raise gr.Error(f"信息提取过程中发生错误: {e}")
-    # --- 关键修复：过滤掉无法定位的实体 ---
-    # 只有那些 char_interval 不为 None 的实体才是有位置信息的，才能被排序和高亮
     grounded_extractions = [e for e in result.extractions if e.char_interval]
     # 1. 准备命名实体识别 (NER) 的高亮文本输出
     highlighted_text = []
     last_pos = 0
-    # 现在我们对过滤后的、保证有位置信息的列表进行排序
     sorted_extractions = sorted(grounded_extractions, key=lambda e: e.char_interval.start_pos)
     for entity in sorted_extractions:
         start, end = entity.char_interval.start_pos, entity.char_interval.end_pos
@@ -88,7 +85,6 @@ def process_input_and_visualize(input_text, input_file):
     # 2. 准备关系提取 (RE) 的结构化 Markdown 输出
     medication_groups = {}
-    # 注意：这里我们仍然遍历所有实体（包括未定位的），因为它们可能仍有有用的属性信息
     for extraction in result.extractions:
         group_name = extraction.attributes.get("medication_group", "未分组")
         medication_groups.setdefault(group_name, []).append(extraction)
@@ -99,12 +95,10 @@ def process_input_and_visualize(input_text, input_file):
     else:
         for med_name, extractions in medication_groups.items():
             structured_output += f"#### 药物组: {med_name}\n"
-            # 我们在显示时检查 char_interval 是否存在
             for extraction in sorted(extractions, key=lambda e: e.char_interval.start_pos if e.char_interval else -1):
                 pos_info = ""
                 if extraction.char_interval:
                     pos_info = f" (位置: {extraction.char_interval.start_pos}-{extraction.char_interval.end_pos})"
-                # 即使没有位置信息，我们仍然显示实体本身
                 structured_output += f"- **{extraction.extraction_class}**: {extraction.extraction_text}{pos_info}\n"
             structured_output += "\n"
@@ -115,7 +109,7 @@ def process_input_and_visualize(input_text, input_file):
     jsonl_filename = f"extraction_{session_id}.jsonl"
     jsonl_path = os.path.join(output_dir, jsonl_filename)
     html_path = os.path.join(output_dir, f"visualization_{session_id}.html")
-    lx.io.save_annotated_documents([result], output_name=jsonl_filename, output_dir=output_dir)
     html_content = lx.visualize(jsonl_path)
     with open(html_path, "w", encoding="utf-8") as f:
         f.write(html_content)
@@ -123,9 +117,8 @@ def process_input_and_visualize(input_text, input_file):
     return highlighted_text, structured_output, html_path, html_path, jsonl_path
-# --- 3. 创建 Gradio 应用界面 (保持不变) ---
 with gr.Blocks(theme=gr.themes.Soft(), title="药物信息提取器") as demo:
-    # ... (界面部分代码无需修改) ...
     gr.Markdown("# ⚕️ LangExtract 药物信息提取器")
     gr.Markdown("一个基于大型语言模型的智能工具，可从**临床文本**或 **PDF 文件**中自动提取药物、剂量等信息，并进行结构化关联。")
@@ -159,12 +152,17 @@ with gr.Blocks(theme=gr.themes.Soft(), title="药物信息提取器") as demo:
                 with gr.TabItem("📁 文件下载"):
                     gr.Markdown("### 下载提取结果")
                     download_html = gr.File(label="下载交互式 HTML 文件")
                     download_jsonl = gr.File(label="下载 JSONL 数据文件")
     submit_btn.click(
         fn=process_input_and_visualize,
         inputs=[input_textbox, input_file_uploader],
-        outputs=[output_highlight, output_structured, output_html_viewer, download_html, jsonl_path]
     ).then(
         lambda: (None, None),
         inputs=None,

     ]
     return prompt_description, examples
+# --- 2. Gradio 的核心处理函数 (保持不变) ---
 def process_input_and_visualize(input_text, input_file):
     """
     接收文本或文件输入，解析内容，调用 LangExtract 的高级功能进行处理，并返回结果。
     except Exception as e:
         raise gr.Error(f"信息提取过程中发生错误: {e}")
     grounded_extractions = [e for e in result.extractions if e.char_interval]
     # 1. 准备命名实体识别 (NER) 的高亮文本输出
     highlighted_text = []
     last_pos = 0
     sorted_extractions = sorted(grounded_extractions, key=lambda e: e.char_interval.start_pos)
     for entity in sorted_extractions:
         start, end = entity.char_interval.start_pos, entity.char_interval.end_pos
     # 2. 准备关系提取 (RE) 的结构化 Markdown 输出
     medication_groups = {}
     for extraction in result.extractions:
         group_name = extraction.attributes.get("medication_group", "未分组")
         medication_groups.setdefault(group_name, []).append(extraction)
     else:
         for med_name, extractions in medication_groups.items():
             structured_output += f"#### 药物组: {med_name}\n"
             for extraction in sorted(extractions, key=lambda e: e.char_interval.start_pos if e.char_interval else -1):
                 pos_info = ""
                 if extraction.char_interval:
                     pos_info = f" (位置: {extraction.char_interval.start_pos}-{extraction.char_interval.end_pos})"
                 structured_output += f"- **{extraction.extraction_class}**: {extraction.extraction_text}{pos_info}\n"
             structured_output += "\n"
     jsonl_filename = f"extraction_{session_id}.jsonl"
     jsonl_path = os.path.join(output_dir, jsonl_filename)
     html_path = os.path.join(output_dir, f"visualization_{session_id}.html")
+    lx.io.save_annot_ated_documents([result], output_name=jsonl_filename, output_dir=output_dir)
     html_content = lx.visualize(jsonl_path)
     with open(html_path, "w", encoding="utf-8") as f:
         f.write(html_content)
     return highlighted_text, structured_output, html_path, html_path, jsonl_path
+# --- 3. 创建 Gradio 应用界面 (已修复) ---
 with gr.Blocks(theme=gr.themes.Soft(), title="药物信息提取器") as demo:
     gr.Markdown("# ⚕️ LangExtract 药物信息提取器")
     gr.Markdown("一个基于大型语言模型的智能工具，可从**临床文本**或 **PDF 文件**中自动提取药物、剂量等信息，并进行结构化关联。")
                 with gr.TabItem("📁 文件下载"):
                     gr.Markdown("### 下载提取结果")
                     download_html = gr.File(label="下载交互式 HTML 文件")
+                    # 这是我们定义的用于接收 JSONL 文件的 Gradio 组件
                     download_jsonl = gr.File(label="下载 JSONL 数据文件")
+    # --- 关键修复 ---
+    # 原代码错误: ... outputs=[..., jsonl_path]
+    # 正确代码: ... outputs=[..., download_jsonl]
+    # 必须引用在上面 Blocks 布局中定义的 Gradio 组件变量。
     submit_btn.click(
         fn=process_input_and_visualize,
         inputs=[input_textbox, input_file_uploader],
+        outputs=[output_highlight, output_structured, output_html_viewer, download_html, download_jsonl]
     ).then(
         lambda: (None, None),
         inputs=None,