Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -149,6 +149,8 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
|
|
| 149 |
if not input_text.strip():
|
| 150 |
raise gr.Error("⚠️ 请输入待提取的文本内容")
|
| 151 |
|
|
|
|
|
|
|
| 152 |
try:
|
| 153 |
examples_data = json.loads(examples_json)
|
| 154 |
examples = [
|
|
@@ -159,7 +161,9 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
|
|
| 159 |
]
|
| 160 |
except (json.JSONDecodeError, KeyError) as e:
|
| 161 |
raise gr.Error(f"❌ 示例JSON格式错误: {e}")
|
|
|
|
| 162 |
|
|
|
|
| 163 |
try:
|
| 164 |
os.environ['LANGEXTRACT_API_KEY'] = api_key
|
| 165 |
|
|
@@ -167,16 +171,14 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
|
|
| 167 |
text_or_documents=input_text,
|
| 168 |
prompt_description=prompt,
|
| 169 |
examples=examples,
|
| 170 |
-
model_id="gemini-2.5-flash",
|
| 171 |
)
|
| 172 |
|
| 173 |
-
# langextract 返回的是列表,取第一个结果
|
| 174 |
if isinstance(results, list) and len(results) > 0:
|
| 175 |
result = results[0]
|
| 176 |
else:
|
| 177 |
result = results
|
| 178 |
|
| 179 |
-
# 手动构建 extraction 字典
|
| 180 |
extractions_list = []
|
| 181 |
if hasattr(result, 'extractions'):
|
| 182 |
for ext in result.extractions:
|
|
@@ -188,33 +190,39 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
|
|
| 188 |
}
|
| 189 |
extractions_list.append(ext_dict)
|
| 190 |
|
| 191 |
-
# 构建输出字典
|
| 192 |
output_dict = {
|
| 193 |
"source_text": result.text if hasattr(result, 'text') else input_text,
|
| 194 |
"extractions": extractions_list
|
| 195 |
}
|
| 196 |
|
| 197 |
-
#
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
#lx.io.save_annotated_documents([results], file_path=tmp_file.name)
|
| 203 |
-
#download_path = tmp_file.name
|
| 204 |
-
|
| 205 |
|
| 206 |
-
#
|
| 207 |
-
with tempfile.
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
else:
|
| 212 |
-
#
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
|
| 217 |
-
|
| 218 |
|
| 219 |
stats = generate_statistics(output_dict)
|
| 220 |
history = save_to_history(input_text, output_dict, template_name)
|
|
@@ -226,6 +234,7 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
|
|
| 226 |
error_detail = traceback.format_exc()
|
| 227 |
print(f"详细错误信息:\n{error_detail}")
|
| 228 |
raise gr.Error(f"❌ 提取失败: {str(e)}")
|
|
|
|
| 229 |
|
| 230 |
def load_template(template_name):
|
| 231 |
"""加载预设模板"""
|
|
|
|
| 149 |
if not input_text.strip():
|
| 150 |
raise gr.Error("⚠️ 请输入待提取的文本内容")
|
| 151 |
|
| 152 |
+
|
| 153 |
+
|
| 154 |
try:
|
| 155 |
examples_data = json.loads(examples_json)
|
| 156 |
examples = [
|
|
|
|
| 161 |
]
|
| 162 |
except (json.JSONDecodeError, KeyError) as e:
|
| 163 |
raise gr.Error(f"❌ 示例JSON格式错误: {e}")
|
| 164 |
+
|
| 165 |
|
| 166 |
+
# ... 其他代码保持不变 ...
|
| 167 |
try:
|
| 168 |
os.environ['LANGEXTRACT_API_KEY'] = api_key
|
| 169 |
|
|
|
|
| 171 |
text_or_documents=input_text,
|
| 172 |
prompt_description=prompt,
|
| 173 |
examples=examples,
|
| 174 |
+
model_id="gemini-2.5-flash-latest", # 建议使用最新的模型
|
| 175 |
)
|
| 176 |
|
|
|
|
| 177 |
if isinstance(results, list) and len(results) > 0:
|
| 178 |
result = results[0]
|
| 179 |
else:
|
| 180 |
result = results
|
| 181 |
|
|
|
|
| 182 |
extractions_list = []
|
| 183 |
if hasattr(result, 'extractions'):
|
| 184 |
for ext in result.extractions:
|
|
|
|
| 190 |
}
|
| 191 |
extractions_list.append(ext_dict)
|
| 192 |
|
|
|
|
| 193 |
output_dict = {
|
| 194 |
"source_text": result.text if hasattr(result, 'text') else input_text,
|
| 195 |
"extractions": extractions_list
|
| 196 |
}
|
| 197 |
|
| 198 |
+
# --- START: 修改后的文件保存逻辑 ---
|
| 199 |
+
|
| 200 |
+
# 1. 创建一个临时的、持久化的文件,用于最终的下载
|
| 201 |
+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as final_output_file:
|
| 202 |
+
download_path = final_output_file.name
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
+
# 2. 创建一个临时目录,供 langextract 库使用
|
| 205 |
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
| 206 |
+
# 准备好要保存的文档列表
|
| 207 |
+
documents_to_save = results if isinstance(results, list) else [results]
|
| 208 |
+
|
| 209 |
+
# 3. 让库将结果保存到这个临时目录中
|
| 210 |
+
lx.io.save_annotated_documents(documents_to_save, tmp_dir)
|
| 211 |
+
|
| 212 |
+
# 4. 库通常会生成一个名为 'output.jsonl' 的文件,找到它
|
| 213 |
+
source_file_path = os.path.join(tmp_dir, 'output.jsonl')
|
| 214 |
+
|
| 215 |
+
# 5. 将生成的文件内容,复制到我们为 Gradio 准备的最终文件中
|
| 216 |
+
if os.path.exists(source_file_path):
|
| 217 |
+
with open(source_file_path, 'r', encoding='utf-8') as src_file:
|
| 218 |
+
with open(download_path, 'w', encoding='utf-8') as dest_file:
|
| 219 |
+
dest_file.write(src_file.read())
|
| 220 |
else:
|
| 221 |
+
# 如果没有生成文件,就直接保存我们构造的JSON字典
|
| 222 |
+
with open(download_path, 'w', encoding='utf-8') as dest_file:
|
| 223 |
+
json.dump(output_dict, dest_file, ensure_ascii=False, indent=2)
|
|
|
|
| 224 |
|
| 225 |
+
# --- END: 修改后的文件保存逻辑 ---
|
| 226 |
|
| 227 |
stats = generate_statistics(output_dict)
|
| 228 |
history = save_to_history(input_text, output_dict, template_name)
|
|
|
|
| 234 |
error_detail = traceback.format_exc()
|
| 235 |
print(f"详细错误信息:\n{error_detail}")
|
| 236 |
raise gr.Error(f"❌ 提取失败: {str(e)}")
|
| 237 |
+
|
| 238 |
|
| 239 |
def load_template(template_name):
|
| 240 |
"""加载预设模板"""
|