leonsimon23 commited on
Commit
251413d
·
verified ·
1 Parent(s): 052cb21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -22
app.py CHANGED
@@ -149,6 +149,8 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
149
  if not input_text.strip():
150
  raise gr.Error("⚠️ 请输入待提取的文本内容")
151
 
 
 
152
  try:
153
  examples_data = json.loads(examples_json)
154
  examples = [
@@ -159,7 +161,9 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
159
  ]
160
  except (json.JSONDecodeError, KeyError) as e:
161
  raise gr.Error(f"❌ 示例JSON格式错误: {e}")
 
162
 
 
163
  try:
164
  os.environ['LANGEXTRACT_API_KEY'] = api_key
165
 
@@ -167,16 +171,14 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
167
  text_or_documents=input_text,
168
  prompt_description=prompt,
169
  examples=examples,
170
- model_id="gemini-2.5-flash",
171
  )
172
 
173
- # langextract 返回的是列表,取第一个结果
174
  if isinstance(results, list) and len(results) > 0:
175
  result = results[0]
176
  else:
177
  result = results
178
 
179
- # 手动构建 extraction 字典
180
  extractions_list = []
181
  if hasattr(result, 'extractions'):
182
  for ext in result.extractions:
@@ -188,33 +190,39 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
188
  }
189
  extractions_list.append(ext_dict)
190
 
191
- # 构建输出字典
192
  output_dict = {
193
  "source_text": result.text if hasattr(result, 'text') else input_text,
194
  "extractions": extractions_list
195
  }
196
 
197
- # 保存文件
198
- #with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
199
- #if isinstance(results, list):
200
- #lx.io.save_annotated_documents(results, file_path=tmp_file.name)
201
- #else:
202
- #lx.io.save_annotated_documents([results], file_path=tmp_file.name)
203
- #download_path = tmp_file.name
204
-
205
 
206
- # 保存文件
207
- with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as tmp_file:
208
- if isinstance(results, list):
209
- # 修正:直接传递路径作为第二个参数
210
- lx.io.save_annotated_documents(results, tmp_file.name)
 
 
 
 
 
 
 
 
 
 
 
211
  else:
212
- # 修正:直接传递路径作为第二个参数
213
- lx.io.save_annotated_documents([results], tmp_file.name)
214
- download_path = tmp_file.name
215
-
216
 
217
-
218
 
219
  stats = generate_statistics(output_dict)
220
  history = save_to_history(input_text, output_dict, template_name)
@@ -226,6 +234,7 @@ def extract_information(api_key, prompt, examples_json, input_text, template_nam
226
  error_detail = traceback.format_exc()
227
  print(f"详细错误信息:\n{error_detail}")
228
  raise gr.Error(f"❌ 提取失败: {str(e)}")
 
229
 
230
  def load_template(template_name):
231
  """加载预设模板"""
 
149
  if not input_text.strip():
150
  raise gr.Error("⚠️ 请输入待提取的文本内容")
151
 
152
+
153
+
154
  try:
155
  examples_data = json.loads(examples_json)
156
  examples = [
 
161
  ]
162
  except (json.JSONDecodeError, KeyError) as e:
163
  raise gr.Error(f"❌ 示例JSON格式错误: {e}")
164
+
165
 
166
+ # ... 其他代码保持不变 ...
167
  try:
168
  os.environ['LANGEXTRACT_API_KEY'] = api_key
169
 
 
171
  text_or_documents=input_text,
172
  prompt_description=prompt,
173
  examples=examples,
174
+ model_id="gemini-2.5-flash-latest", # 建议使用最新的模型
175
  )
176
 
 
177
  if isinstance(results, list) and len(results) > 0:
178
  result = results[0]
179
  else:
180
  result = results
181
 
 
182
  extractions_list = []
183
  if hasattr(result, 'extractions'):
184
  for ext in result.extractions:
 
190
  }
191
  extractions_list.append(ext_dict)
192
 
 
193
  output_dict = {
194
  "source_text": result.text if hasattr(result, 'text') else input_text,
195
  "extractions": extractions_list
196
  }
197
 
198
+ # --- START: 修改后的文件保存逻辑 ---
199
+
200
+ # 1. 创建一个临时的、持久化的文件,用于最终的下载
201
+ with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.jsonl', encoding='utf-8') as final_output_file:
202
+ download_path = final_output_file.name
 
 
 
203
 
204
+ # 2. 创建一个临时目录,供 langextract 库使用
205
+ with tempfile.TemporaryDirectory() as tmp_dir:
206
+ # 准备好要保存的文档列表
207
+ documents_to_save = results if isinstance(results, list) else [results]
208
+
209
+ # 3. 让库将结果保存到这个临时目录中
210
+ lx.io.save_annotated_documents(documents_to_save, tmp_dir)
211
+
212
+ # 4. 库通常会生成一个名为 'output.jsonl' 的文件,找到它
213
+ source_file_path = os.path.join(tmp_dir, 'output.jsonl')
214
+
215
+ # 5. 将生成的文件内容,复制到我们为 Gradio 准备的最终文件中
216
+ if os.path.exists(source_file_path):
217
+ with open(source_file_path, 'r', encoding='utf-8') as src_file:
218
+ with open(download_path, 'w', encoding='utf-8') as dest_file:
219
+ dest_file.write(src_file.read())
220
  else:
221
+ # 如果没有生成文件,就直接保存我们构造的JSON字典
222
+ with open(download_path, 'w', encoding='utf-8') as dest_file:
223
+ json.dump(output_dict, dest_file, ensure_ascii=False, indent=2)
 
224
 
225
+ # --- END: 修改后的文件保存逻辑 ---
226
 
227
  stats = generate_statistics(output_dict)
228
  history = save_to_history(input_text, output_dict, template_name)
 
234
  error_detail = traceback.format_exc()
235
  print(f"详细错误信息:\n{error_detail}")
236
  raise gr.Error(f"❌ 提取失败: {str(e)}")
237
+
238
 
239
  def load_template(template_name):
240
  """加载预设模板"""