roxqtang commited on
Commit
9ea16ce
·
1 Parent(s): ede9ff2

Fix knowledge map to scan upload directory when no session data is available

Browse files
Files changed (1) hide show
  1. gemini_chatbot.py +96 -2
gemini_chatbot.py CHANGED
@@ -454,6 +454,13 @@ def upload_file():
454
  file.save(file_path)
455
  logging.info(f"File saved: {file_path}")
456
 
 
 
 
 
 
 
 
457
  # Extract text
458
  text_content = None
459
  if file_ext.lower() == '.pdf':
@@ -483,6 +490,10 @@ def upload_file():
483
  summary = "Could not generate summary, but you can still ask questions about the document."
484
  tags = []
485
 
 
 
 
 
486
  # Store session data - uploaded file context
487
  if 'uploaded_files_context' not in session:
488
  session['uploaded_files_context'] = []
@@ -509,8 +520,23 @@ def upload_file():
509
 
510
  # Ensure session changes are saved
511
  session.modified = True
 
 
 
 
 
 
512
  logging.info(f"Stored context for {safe_filename} in session with {len(tags)} tags")
513
 
 
 
 
 
 
 
 
 
 
514
  return jsonify({
515
  "success": True,
516
  "filename": safe_filename,
@@ -576,7 +602,7 @@ def get_knowledge_map():
576
  uploaded_files = session.get('uploaded_files', [])
577
 
578
  logging.debug(f"Knowledge map API - Session data keys: {session.keys()}")
579
- logging.debug(f"Knowledge map API - Files found: {len(uploaded_files)}")
580
 
581
  # 如果没有上传文件,检查context数据作为备选
582
  if not uploaded_files and 'uploaded_files_context' in session:
@@ -590,9 +616,77 @@ def get_knowledge_map():
590
  })
591
  logging.debug(f"Used context data as fallback, found {len(uploaded_files)} files")
592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  # 如果还是没有上传文件,返回空数据
594
  if not uploaded_files:
595
- logging.warning("No uploaded files found in session for knowledge map")
596
  return jsonify({
597
  "centralTopic": "Knowledge Center",
598
  "documents": []
 
454
  file.save(file_path)
455
  logging.info(f"File saved: {file_path}")
456
 
457
+ # 检查文件是否真的已经保存
458
+ if not os.path.exists(file_path):
459
+ logging.error(f"文件保存失败,路径不存在: {file_path}")
460
+ return jsonify({"error": "Failed to save file", "success": False}), 500
461
+ else:
462
+ logging.info(f"文件成功保存,大小: {os.path.getsize(file_path)} 字节")
463
+
464
  # Extract text
465
  text_content = None
466
  if file_ext.lower() == '.pdf':
 
490
  summary = "Could not generate summary, but you can still ask questions about the document."
491
  tags = []
492
 
493
+ # 记录当前会话状态
494
+ logging.debug(f"当前会话ID: {session.sid if hasattr(session, 'sid') else '未知'}")
495
+ logging.debug(f"当前会话内容 (before update): {list(session.keys())}")
496
+
497
  # Store session data - uploaded file context
498
  if 'uploaded_files_context' not in session:
499
  session['uploaded_files_context'] = []
 
520
 
521
  # Ensure session changes are saved
522
  session.modified = True
523
+
524
+ # 打印更新后的会话信息
525
+ logging.debug(f"会话更新后的内容: {list(session.keys())}")
526
+ logging.debug(f"uploaded_files_context 长度: {len(session.get('uploaded_files_context', []))}")
527
+ logging.debug(f"uploaded_files 长度: {len(session.get('uploaded_files', []))}")
528
+
529
  logging.info(f"Stored context for {safe_filename} in session with {len(tags)} tags")
530
 
531
+ # 添加对上传目录的列表,确认文件已保存
532
+ try:
533
+ upload_dir = app.config['UPLOAD_FOLDER']
534
+ if os.path.exists(upload_dir):
535
+ files_in_dir = os.listdir(upload_dir)
536
+ logging.info(f"上传目录中的文件: {files_in_dir}")
537
+ except Exception as e:
538
+ logging.error(f"列出上传目录文件失败: {e}")
539
+
540
  return jsonify({
541
  "success": True,
542
  "filename": safe_filename,
 
602
  uploaded_files = session.get('uploaded_files', [])
603
 
604
  logging.debug(f"Knowledge map API - Session data keys: {session.keys()}")
605
+ logging.debug(f"Knowledge map API - Files found in session: {len(uploaded_files)}")
606
 
607
  # 如果没有上传文件,检查context数据作为备选
608
  if not uploaded_files and 'uploaded_files_context' in session:
 
616
  })
617
  logging.debug(f"Used context data as fallback, found {len(uploaded_files)} files")
618
 
619
+ # 如果会话中仍然没有文件信息,直接从上传目录扫描文件
620
+ if not uploaded_files:
621
+ logging.info("No files found in session, scanning upload directory...")
622
+ try:
623
+ upload_dir = app.config['UPLOAD_FOLDER']
624
+ if os.path.exists(upload_dir):
625
+ files_in_dir = os.listdir(upload_dir)
626
+ logging.info(f"Files found in upload directory: {files_in_dir}")
627
+
628
+ # 过滤出PDF和TXT文件
629
+ valid_files = [f for f in files_in_dir if f.lower().endswith(('.pdf', '.txt'))]
630
+
631
+ for filename in valid_files:
632
+ # 从文件名中提取原始文件名(去除UUID前缀)
633
+ original_filename = re.sub(r'^[a-f0-9\-]+_', '', filename)
634
+
635
+ # 尝试从文件中提取内容并生成摘要
636
+ file_path = os.path.join(upload_dir, filename)
637
+ try:
638
+ # 提取文本内容
639
+ text_content = None
640
+ if filename.lower().endswith('.pdf'):
641
+ text_content = extract_text_from_pdf(file_path)
642
+ elif filename.lower().endswith('.txt'):
643
+ text_content = extract_text_from_txt(file_path)
644
+
645
+ # 如果成功提取了文本,生成摘要和标签
646
+ if text_content:
647
+ try:
648
+ summary, tags = get_summary_and_tags_from_gemini(text_content)
649
+ except Exception as e:
650
+ logging.error(f"Error generating summary for scanned file: {e}")
651
+ summary = "Auto-generated summary for " + original_filename
652
+ tags = ["Uncategorized"]
653
+
654
+ # 将文件信息添加到上传文件列表中
655
+ uploaded_files.append({
656
+ 'filename': original_filename,
657
+ 'summary': summary,
658
+ 'tags': tags
659
+ })
660
+
661
+ # 同时更新会话中的文件信息,以便后续使用
662
+ if 'uploaded_files_context' not in session:
663
+ session['uploaded_files_context'] = []
664
+
665
+ # 检查文件是否已经在会话中
666
+ file_exists = False
667
+ for existing_file in session.get('uploaded_files_context', []):
668
+ if existing_file.get('unique_filename') == filename:
669
+ file_exists = True
670
+ break
671
+
672
+ if not file_exists:
673
+ session['uploaded_files_context'].append({
674
+ 'filename': original_filename,
675
+ 'unique_filename': filename,
676
+ 'summary': summary,
677
+ 'tags': tags
678
+ })
679
+
680
+ # 确保会话更改被保存
681
+ session.modified = True
682
+ except Exception as e:
683
+ logging.error(f"Error processing scanned file {filename}: {e}")
684
+ except Exception as e:
685
+ logging.error(f"Error scanning upload directory: {e}")
686
+
687
  # 如果还是没有上传文件,返回空数据
688
  if not uploaded_files:
689
+ logging.warning("No uploaded files found in session or directory for knowledge map")
690
  return jsonify({
691
  "centralTopic": "Knowledge Center",
692
  "documents": []