Spaces:

leonsimon23
/

exactEMR

Sleeping

App Files Files Community

leonsimon23 commited on Nov 3, 2025

Commit

e4819a3

verified ·

1 Parent(s): 16bb26c

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -32

app.py CHANGED Viewed

@@ -4,39 +4,52 @@ import json
 import os
 import tempfile
 import textwrap
-from datetime import datetime
-# --- 默认模板和示例 ---
-# 为了方便用户快速上手，我们提供一个默认的提取模板。
-# 用户可以在界面上自由修改。
 # 1. 默认提取指令 (Prompt)
 DEFAULT_PROMPT = textwrap.dedent("""\
-    请按顺序提取文本中出现的角色、他们的情绪以及他们之间的关系。
-    提取时请使用原文中的确切文本，不要转述或重叠实体。
-    为每个实体提供有意义的属性以增加上下文信息。""")
 # 2. 默认提取示例 (Examples)
-# Gradio 界面中我们使用 JSON 格式让用户输入，后端再将其转换为 LangExtract 需要的格式。
 DEFAULT_EXAMPLES_DICT = [
     {
-        "text": "罗密欧. 但是轻声！那边窗子里亮起来的是什么光？那就是东方，朱丽叶就是太阳。",
         "extractions": [
             {
-                "extraction_class": "character",
-                "extraction_text": "罗密欧",
-                "attributes": {"emotional_state": "惊奇"}
             },
             {
-                "extraction_class": "emotion",
-                "extraction_text": "但是轻声！",
-                "attributes": {"feeling": "温柔的敬畏"}
             },
             {
-                "extraction_class": "relationship",
-                "extraction_text": "朱丽叶就是太阳",
-                "attributes": {"type": "隐喻", "subject": "朱丽叶", "object": "太阳"}
             },
         ]
     }
 ]
@@ -45,8 +58,7 @@ DEFAULT_EXAMPLES_DICT = [
 DEFAULT_EXAMPLES_JSON = json.dumps(DEFAULT_EXAMPLES_DICT, ensure_ascii=False, indent=2)
-# --- 后端处理函数 ---
-# 这个函数是整个应用的核心，它接收前端输入并调用 LangExtract。
 def extract_information(api_key, prompt, examples_json, input_text):
     """
@@ -86,14 +98,12 @@ def extract_information(api_key, prompt, examples_json, input_text):
         )
         # 将结果转换为可序列化的字典以便在 Gradio 中显示
-        # 注意：result 是一个 AnnotatedDocument 对象
         output_for_display = {
             "source_text": result.source_text,
             "extractions": [ext.to_dict() for ext in result.extractions]
         }
         # 4. 创建可供下载的文件
-        # 使用临时文件来保存结果
         with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
             lx.io.save_annotated_documents([result], file_path=tmp_file.name)
             download_path = tmp_file.name
@@ -105,9 +115,9 @@ def extract_information(api_key, prompt, examples_json, input_text):
         raise gr.Error(f"提取过程中发生错误: {e}")
-# --- Gradio UI 界面 ---
-with gr.Blocks(theme=gr.themes.Soft(), title="LangExtract Web UI") as demo:
     gr.Markdown("# LangExtract 交互式信息提取工具")
     gr.Markdown(
         "在左侧定义您的提取���则和输入文本，然后点击“开始提取”在右侧查看结果。\n"
@@ -131,25 +141,23 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LangExtract Web UI") as demo:
                 label="提取指令 (Prompt)",
                 value=DEFAULT_PROMPT,
                 lines=5,
-                # info="告诉模型您想提取什么，以及遵循什么规则。"  <- 已移除
             )
-            gr.Markdown("告诉模型您想提取什么，以及遵循什么规则。") # 使用 Markdown 替代 info
             examples_input = gr.Code(
                 label="提取示例 (JSON 格式)",
                 value=DEFAULT_EXAMPLES_JSON,
                 language="json",
-                lines=15,
-                # info="提供一两个高质量的示例，指导模型的输出格式。" <- 已移除
             )
-            gr.Markdown("提供一两个高质量的示例，指导模型的输出格式。") # 使用 Markdown 替代 info
             gr.Markdown("## 3. 输入待提取的文本")
             text_input = gr.Textbox(
                 label="源文本",
                 lines=10,
-                placeholder="在此处粘贴您要从中提取信息的文本..."
             )
             submit_btn = gr.Button("🚀 开始提取", variant="primary")
@@ -160,12 +168,10 @@ with gr.Blocks(theme=gr.themes.Soft(), title="LangExtract Web UI") as demo:
             json_output = gr.JSON(
                 label="结构化输出 (JSON)",
-                # info="这里显示从文本中提取出的结构化数据。" <- 已移除
             )
             file_output = gr.File(
                 label="⬇️ 下载结果文件",
-                # info="点击此处下载包含所有元数据的 .jsonl 文件。" <- 已移除
             )
     # --- 事件绑定 ---

 import os
 import tempfile
 import textwrap
+# --- 默认模板和示例 (已更新为临床影像报告场景) ---
 # 1. 默认提取指令 (Prompt)
 DEFAULT_PROMPT = textwrap.dedent("""\
+    请从影像检查报告中，按顺序提取关键的影像学发现、涉及的解剖部位、尺寸测量、影像学特征以及阴性发现。
+    - 提取时必须使用报告中的确切文本。
+    - 不要转述或概括。
+    - 为每个提取的实体提供详细的属性，以增加结构化信息。""")
 # 2. 默认提取示例 (Examples)
+# 提供一个高质量的CT报告提取示例
 DEFAULT_EXAMPLES_DICT = [
     {
+        "text": "腹部CT平扫增强检查显示：肝脏右叶可见一大小约3.2 x 2.8 cm的低密度占位灶，边缘清晰，增强扫描后呈轻度环形强化。胰腺及双肾未见明确异常。",
         "extractions": [
             {
+                "extraction_class": "anatomy",
+                "extraction_text": "肝脏右叶",
+                "attributes": {"organ": "肝脏", "lobe": "右叶"}
             },
             {
+                "extraction_class": "size_measurement",
+                "extraction_text": "3.2 x 2.8 cm",
+                "attributes": {"value": "3.2 x 2.8", "unit": "cm"}
             },
             {
+                "extraction_class": "finding",
+                "extraction_text": "低密度占位灶",
+                "attributes": {"density": "低密度", "type": "占位灶"}
             },
+            {
+                "extraction_class": "radiologic_feature",
+                "extraction_text": "边缘清晰",
+                "attributes": {"feature_type": "边缘", "description": "清晰"}
+            },
+            {
+                "extraction_class": "radiologic_feature",
+                "extraction_text": "轻度环形强化",
+                "attributes": {"feature_type": "增强扫描", "degree": "轻度", "pattern": "环形强化"}
+            },
+            {
+                "extraction_class": "normal_finding",
+                "extraction_text": "胰腺及双肾未见明确异常",
+                "attributes": {"organs": ["胰腺", "双肾"]}
+            }
         ]
     }
 ]
 DEFAULT_EXAMPLES_JSON = json.dumps(DEFAULT_EXAMPLES_DICT, ensure_ascii=False, indent=2)
+# --- 后端处理函数 (无需修改) ---
 def extract_information(api_key, prompt, examples_json, input_text):
     """
         )
         # 将结果转换为可序列化的字典以便在 Gradio 中显示
         output_for_display = {
             "source_text": result.source_text,
             "extractions": [ext.to_dict() for ext in result.extractions]
         }
         # 4. 创建可供下载的文件
         with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
             lx.io.save_annotated_documents([result], file_path=tmp_file.name)
             download_path = tmp_file.name
         raise gr.Error(f"提取过程中发生错误: {e}")
+# --- Gradio UI 界面 (无需修改) ---
+with gr.Blocks(theme=gr.themes.Soft(), title="LangExtract 交互式信息提取工具") as demo:
     gr.Markdown("# LangExtract 交互式信息提取工具")
     gr.Markdown(
         "在左侧定义您的提取���则和输入文本，然后点击“开始提取”在右侧查看结果。\n"
                 label="提取指令 (Prompt)",
                 value=DEFAULT_PROMPT,
                 lines=5,
             )
+            gr.Markdown("告诉模型您想提取什么，以及遵循什么规则。")
             examples_input = gr.Code(
                 label="提取示例 (JSON 格式)",
                 value=DEFAULT_EXAMPLES_JSON,
                 language="json",
+                lines=20, # 增加了行数以更好地显示复杂的JSON
             )
+            gr.Markdown("提供一两个高质量的示例，指导模型的输出格式。")
             gr.Markdown("## 3. 输入待提取的文本")
             text_input = gr.Textbox(
                 label="源文本",
                 lines=10,
+                placeholder="在此处粘贴您要从中提取信息的临床病历或影像报告..."
             )
             submit_btn = gr.Button("🚀 开始提取", variant="primary")
             json_output = gr.JSON(
                 label="结构化输出 (JSON)",
             )
             file_output = gr.File(
                 label="⬇️ 下载结果文件",
             )
     # --- 事件绑定 ---