Spaces:
Sleeping
Sleeping
Update app.py
Browse files1 - gradio's Dropdown module is used to create dropdown option boxes, setting filterable=True and allow_custom_value=True.
2 - If the string entered by the user is in the set valid_strings, the string is returned directly. If the string entered by the user is not in the set, the string with the smallest distance from Levenshtein is calculated and returned.
app.py
CHANGED
|
@@ -8,11 +8,41 @@ import os
|
|
| 8 |
import platform
|
| 9 |
print(platform.python_version())
|
| 10 |
|
|
|
|
| 11 |
import gradio as gr
|
| 12 |
from openai import OpenAI
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def demo(project_TCGA, output_language="Chinese"):
|
|
|
|
| 16 |
name_English, name_Chinese = project_name_TCGA[project_TCGA]
|
| 17 |
tcga_link = f"https://portal.gdc.cancer.gov/projects/{project_TCGA}"
|
| 18 |
output1, output2 = None, None
|
|
@@ -29,8 +59,8 @@ def demo(project_TCGA, output_language="Chinese"):
|
|
| 29 |
output1 = f"✍️ Abbreviation: {project_TCGA}\n❤️ Full name in Chinese: {name_Chinese}\n💛 Full Name in English: {name_English}\n🔗 Link: {tcga_link}"
|
| 30 |
system_instruction = f"You are an expert in the fields of public health, epidemiology, cancer research, and precision medicine, with a deep comprehension of {name_English}."
|
| 31 |
prompt_template = f"""
|
| 32 |
-
Your task is to analyze and write an in-depth summary
|
| 33 |
-
Specific content needs to include: 1 - Basic definition and overview of {name_English}, clinicopathologic features; 2 - Etiology and risk factors of {name_English}; 3 - Epidemiologic findings, prevalence, and mortality rates of {name_English}; 4 - Clinical signs and early recognition of {name_English}; 5 - Disease progression and metastasis of {name_English} and its closely related biomarkers and aberrant gene alterations; 6 - Survival and prognosis of {name_English}; and 7 -
|
| 34 |
""".strip()
|
| 35 |
|
| 36 |
try:
|
|
@@ -124,6 +154,8 @@ project_name_TCGA = {
|
|
| 124 |
"TCGA-UCS": ["uterine carcinosarcoma", "子宫癌肉瘤"],
|
| 125 |
"TCGA-UVM": ["uveal melanoma", "眼内(葡萄膜)黑色素瘤"],
|
| 126 |
}
|
|
|
|
|
|
|
| 127 |
# print(len(project_name_TCGA.keys()))
|
| 128 |
# input_query = input("请输入您要查询的 TCGA 项目名称:")
|
| 129 |
# print(project_name_TCGA[input_query])
|
|
@@ -149,7 +181,9 @@ my_demo = gr.Interface(
|
|
| 149 |
gr.Dropdown(
|
| 150 |
choices=[k for k in project_name_TCGA.keys()],
|
| 151 |
value="TCGA-READ",
|
| 152 |
-
allow_custom_value=
|
|
|
|
|
|
|
| 153 |
label="⌨️ 请输入您要查询的 TCGA 项目名称,如 TCGA-READ",
|
| 154 |
), # Please enter the name of the TCGA project you want to query, such as TCGA-READ.
|
| 155 |
gr.Dropdown(
|
|
|
|
| 8 |
import platform
|
| 9 |
print(platform.python_version())
|
| 10 |
|
| 11 |
+
import Levenshtein
|
| 12 |
import gradio as gr
|
| 13 |
from openai import OpenAI
|
| 14 |
|
| 15 |
|
| 16 |
+
def find_closest_string(user_input, valid_strings):
|
| 17 |
+
"""查找与输入字符串最接近的字符串,精确的编辑距离,根据 Levenshtein 编辑距离最小原则。"""
|
| 18 |
+
min_distance = -9999 # 初始化最小编辑距离为负数
|
| 19 |
+
closest_string = None
|
| 20 |
+
|
| 21 |
+
for valid_string in valid_strings:
|
| 22 |
+
# 计算 Levenshtein 距离:
|
| 23 |
+
distance = Levenshtein.distance(user_input, valid_string)
|
| 24 |
+
if distance < min_distance:
|
| 25 |
+
min_distance = distance
|
| 26 |
+
closest_string = valid_string
|
| 27 |
+
|
| 28 |
+
return closest_string
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def process_input(user_input):
|
| 32 |
+
"""
|
| 33 |
+
处理用户输入的字符串:
|
| 34 |
+
1 - 若在有效集合中则直接返回;
|
| 35 |
+
2 - 否则返回与用户输入的最接近的有效字符串。
|
| 36 |
+
"""
|
| 37 |
+
if user_input in valid_strings:
|
| 38 |
+
return user_input # 如果用户输入的字符串在集合里,直接返回。
|
| 39 |
+
else:
|
| 40 |
+
# 否则,找到编辑距离最小的字符串。
|
| 41 |
+
return find_closest_string(user_input, valid_strings)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
def demo(project_TCGA, output_language="Chinese"):
|
| 45 |
+
project_TCGA = process_input(project_TCGA)
|
| 46 |
name_English, name_Chinese = project_name_TCGA[project_TCGA]
|
| 47 |
tcga_link = f"https://portal.gdc.cancer.gov/projects/{project_TCGA}"
|
| 48 |
output1, output2 = None, None
|
|
|
|
| 59 |
output1 = f"✍️ Abbreviation: {project_TCGA}\n❤️ Full name in Chinese: {name_Chinese}\n💛 Full Name in English: {name_English}\n🔗 Link: {tcga_link}"
|
| 60 |
system_instruction = f"You are an expert in the fields of public health, epidemiology, cancer research, and precision medicine, with a deep comprehension of {name_English}."
|
| 61 |
prompt_template = f"""
|
| 62 |
+
Your task is to analyze and write an in-depth summary about the complex disease of {name_English} that must be accurate, informative, logical, and readable, which is very important for the general public to understand this complex disease.
|
| 63 |
+
Specific content needs to include: 1 - Basic definition and overview of {name_English}, clinicopathologic features; 2 - Etiology and risk factors of {name_English}; 3 - Epidemiologic findings, prevalence, and mortality rates of {name_English}; 4 - Clinical signs and early recognition of {name_English}; 5 - Disease progression and metastasis of {name_English} and its closely related biomarkers and aberrant gene alterations; 6 - Survival and prognosis of {name_English}; and 7 - Diagnostics, therapeutic approaches, and future research of {name_English}.
|
| 64 |
""".strip()
|
| 65 |
|
| 66 |
try:
|
|
|
|
| 154 |
"TCGA-UCS": ["uterine carcinosarcoma", "子宫癌肉瘤"],
|
| 155 |
"TCGA-UVM": ["uveal melanoma", "眼内(葡萄膜)黑色素瘤"],
|
| 156 |
}
|
| 157 |
+
# 预定义的字符串集合:
|
| 158 |
+
valid_strings = {input_query for input_query in project_name_TCGA.keys()}
|
| 159 |
# print(len(project_name_TCGA.keys()))
|
| 160 |
# input_query = input("请输入您要查询的 TCGA 项目名称:")
|
| 161 |
# print(project_name_TCGA[input_query])
|
|
|
|
| 181 |
gr.Dropdown(
|
| 182 |
choices=[k for k in project_name_TCGA.keys()],
|
| 183 |
value="TCGA-READ",
|
| 184 |
+
allow_custom_value=True,
|
| 185 |
+
filterable=True,
|
| 186 |
+
interactive=True,
|
| 187 |
label="⌨️ 请输入您要查询的 TCGA 项目名称,如 TCGA-READ",
|
| 188 |
), # Please enter the name of the TCGA project you want to query, such as TCGA-READ.
|
| 189 |
gr.Dropdown(
|