Spaces:
Sleeping
Sleeping
Fix knowledge map to scan upload directory when no session data is available
Browse files- gemini_chatbot.py +96 -2
gemini_chatbot.py
CHANGED
|
@@ -454,6 +454,13 @@ def upload_file():
|
|
| 454 |
file.save(file_path)
|
| 455 |
logging.info(f"File saved: {file_path}")
|
| 456 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
# Extract text
|
| 458 |
text_content = None
|
| 459 |
if file_ext.lower() == '.pdf':
|
|
@@ -483,6 +490,10 @@ def upload_file():
|
|
| 483 |
summary = "Could not generate summary, but you can still ask questions about the document."
|
| 484 |
tags = []
|
| 485 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
# Store session data - uploaded file context
|
| 487 |
if 'uploaded_files_context' not in session:
|
| 488 |
session['uploaded_files_context'] = []
|
|
@@ -509,8 +520,23 @@ def upload_file():
|
|
| 509 |
|
| 510 |
# Ensure session changes are saved
|
| 511 |
session.modified = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
logging.info(f"Stored context for {safe_filename} in session with {len(tags)} tags")
|
| 513 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
return jsonify({
|
| 515 |
"success": True,
|
| 516 |
"filename": safe_filename,
|
|
@@ -576,7 +602,7 @@ def get_knowledge_map():
|
|
| 576 |
uploaded_files = session.get('uploaded_files', [])
|
| 577 |
|
| 578 |
logging.debug(f"Knowledge map API - Session data keys: {session.keys()}")
|
| 579 |
-
logging.debug(f"Knowledge map API - Files found: {len(uploaded_files)}")
|
| 580 |
|
| 581 |
# 如果没有上传文件,检查context数据作为备选
|
| 582 |
if not uploaded_files and 'uploaded_files_context' in session:
|
|
@@ -590,9 +616,77 @@ def get_knowledge_map():
|
|
| 590 |
})
|
| 591 |
logging.debug(f"Used context data as fallback, found {len(uploaded_files)} files")
|
| 592 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
# 如果还是没有上传文件,返回空数据
|
| 594 |
if not uploaded_files:
|
| 595 |
-
logging.warning("No uploaded files found in session for knowledge map")
|
| 596 |
return jsonify({
|
| 597 |
"centralTopic": "Knowledge Center",
|
| 598 |
"documents": []
|
|
|
|
| 454 |
file.save(file_path)
|
| 455 |
logging.info(f"File saved: {file_path}")
|
| 456 |
|
| 457 |
+
# 检查文件是否真的已经保存
|
| 458 |
+
if not os.path.exists(file_path):
|
| 459 |
+
logging.error(f"文件保存失败,路径不存在: {file_path}")
|
| 460 |
+
return jsonify({"error": "Failed to save file", "success": False}), 500
|
| 461 |
+
else:
|
| 462 |
+
logging.info(f"文件成功保存,大小: {os.path.getsize(file_path)} 字节")
|
| 463 |
+
|
| 464 |
# Extract text
|
| 465 |
text_content = None
|
| 466 |
if file_ext.lower() == '.pdf':
|
|
|
|
| 490 |
summary = "Could not generate summary, but you can still ask questions about the document."
|
| 491 |
tags = []
|
| 492 |
|
| 493 |
+
# 记录当前会话状态
|
| 494 |
+
logging.debug(f"当前会话ID: {session.sid if hasattr(session, 'sid') else '未知'}")
|
| 495 |
+
logging.debug(f"当前会话内容 (before update): {list(session.keys())}")
|
| 496 |
+
|
| 497 |
# Store session data - uploaded file context
|
| 498 |
if 'uploaded_files_context' not in session:
|
| 499 |
session['uploaded_files_context'] = []
|
|
|
|
| 520 |
|
| 521 |
# Ensure session changes are saved
|
| 522 |
session.modified = True
|
| 523 |
+
|
| 524 |
+
# 打印更新后的会话信息
|
| 525 |
+
logging.debug(f"会话更新后的内容: {list(session.keys())}")
|
| 526 |
+
logging.debug(f"uploaded_files_context 长度: {len(session.get('uploaded_files_context', []))}")
|
| 527 |
+
logging.debug(f"uploaded_files 长度: {len(session.get('uploaded_files', []))}")
|
| 528 |
+
|
| 529 |
logging.info(f"Stored context for {safe_filename} in session with {len(tags)} tags")
|
| 530 |
|
| 531 |
+
# 添加对上传目录的列表,确认文件已保存
|
| 532 |
+
try:
|
| 533 |
+
upload_dir = app.config['UPLOAD_FOLDER']
|
| 534 |
+
if os.path.exists(upload_dir):
|
| 535 |
+
files_in_dir = os.listdir(upload_dir)
|
| 536 |
+
logging.info(f"上传目录中的文件: {files_in_dir}")
|
| 537 |
+
except Exception as e:
|
| 538 |
+
logging.error(f"列出上传目录文件失败: {e}")
|
| 539 |
+
|
| 540 |
return jsonify({
|
| 541 |
"success": True,
|
| 542 |
"filename": safe_filename,
|
|
|
|
| 602 |
uploaded_files = session.get('uploaded_files', [])
|
| 603 |
|
| 604 |
logging.debug(f"Knowledge map API - Session data keys: {session.keys()}")
|
| 605 |
+
logging.debug(f"Knowledge map API - Files found in session: {len(uploaded_files)}")
|
| 606 |
|
| 607 |
# 如果没有上传文件,检查context数据作为备选
|
| 608 |
if not uploaded_files and 'uploaded_files_context' in session:
|
|
|
|
| 616 |
})
|
| 617 |
logging.debug(f"Used context data as fallback, found {len(uploaded_files)} files")
|
| 618 |
|
| 619 |
+
# 如果会话中仍然没有文件信息,直接从上传目录扫描文件
|
| 620 |
+
if not uploaded_files:
|
| 621 |
+
logging.info("No files found in session, scanning upload directory...")
|
| 622 |
+
try:
|
| 623 |
+
upload_dir = app.config['UPLOAD_FOLDER']
|
| 624 |
+
if os.path.exists(upload_dir):
|
| 625 |
+
files_in_dir = os.listdir(upload_dir)
|
| 626 |
+
logging.info(f"Files found in upload directory: {files_in_dir}")
|
| 627 |
+
|
| 628 |
+
# 过滤出PDF和TXT文件
|
| 629 |
+
valid_files = [f for f in files_in_dir if f.lower().endswith(('.pdf', '.txt'))]
|
| 630 |
+
|
| 631 |
+
for filename in valid_files:
|
| 632 |
+
# 从文件名中提取原始文件名(去除UUID前缀)
|
| 633 |
+
original_filename = re.sub(r'^[a-f0-9\-]+_', '', filename)
|
| 634 |
+
|
| 635 |
+
# 尝试从文件中提取内容并生成摘要
|
| 636 |
+
file_path = os.path.join(upload_dir, filename)
|
| 637 |
+
try:
|
| 638 |
+
# 提取文本内容
|
| 639 |
+
text_content = None
|
| 640 |
+
if filename.lower().endswith('.pdf'):
|
| 641 |
+
text_content = extract_text_from_pdf(file_path)
|
| 642 |
+
elif filename.lower().endswith('.txt'):
|
| 643 |
+
text_content = extract_text_from_txt(file_path)
|
| 644 |
+
|
| 645 |
+
# 如果成功提取了文本,生成摘要和标签
|
| 646 |
+
if text_content:
|
| 647 |
+
try:
|
| 648 |
+
summary, tags = get_summary_and_tags_from_gemini(text_content)
|
| 649 |
+
except Exception as e:
|
| 650 |
+
logging.error(f"Error generating summary for scanned file: {e}")
|
| 651 |
+
summary = "Auto-generated summary for " + original_filename
|
| 652 |
+
tags = ["Uncategorized"]
|
| 653 |
+
|
| 654 |
+
# 将文件信息添加到上传文件列表中
|
| 655 |
+
uploaded_files.append({
|
| 656 |
+
'filename': original_filename,
|
| 657 |
+
'summary': summary,
|
| 658 |
+
'tags': tags
|
| 659 |
+
})
|
| 660 |
+
|
| 661 |
+
# 同时更新会话中的文件信息,以便后续使用
|
| 662 |
+
if 'uploaded_files_context' not in session:
|
| 663 |
+
session['uploaded_files_context'] = []
|
| 664 |
+
|
| 665 |
+
# 检查文件是否已经在会话中
|
| 666 |
+
file_exists = False
|
| 667 |
+
for existing_file in session.get('uploaded_files_context', []):
|
| 668 |
+
if existing_file.get('unique_filename') == filename:
|
| 669 |
+
file_exists = True
|
| 670 |
+
break
|
| 671 |
+
|
| 672 |
+
if not file_exists:
|
| 673 |
+
session['uploaded_files_context'].append({
|
| 674 |
+
'filename': original_filename,
|
| 675 |
+
'unique_filename': filename,
|
| 676 |
+
'summary': summary,
|
| 677 |
+
'tags': tags
|
| 678 |
+
})
|
| 679 |
+
|
| 680 |
+
# 确保会话更改被保存
|
| 681 |
+
session.modified = True
|
| 682 |
+
except Exception as e:
|
| 683 |
+
logging.error(f"Error processing scanned file {filename}: {e}")
|
| 684 |
+
except Exception as e:
|
| 685 |
+
logging.error(f"Error scanning upload directory: {e}")
|
| 686 |
+
|
| 687 |
# 如果还是没有上传文件,返回空数据
|
| 688 |
if not uploaded_files:
|
| 689 |
+
logging.warning("No uploaded files found in session or directory for knowledge map")
|
| 690 |
return jsonify({
|
| 691 |
"centralTopic": "Knowledge Center",
|
| 692 |
"documents": []
|