lanny xu commited on
Commit
ae2e9ee
·
1 Parent(s): 47b875d

delete vectara

Browse files
Files changed (1) hide show
  1. kaggle_simple_multimodal.py +49 -2
kaggle_simple_multimodal.py CHANGED
@@ -108,6 +108,42 @@ def query_with_multimodal(rag_system: AdaptiveRAGSystem, query: str, image_paths
108
  print(f"❌ 查询失败: {e}")
109
  return None
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def main():
112
  """主函数"""
113
  print("🚀 Kaggle简化多模态测试")
@@ -116,12 +152,15 @@ def main():
116
  # 设置环境
117
  setup_kaggle_environment()
118
 
119
- # 检查上传的文件
 
 
 
120
  working_dir = '/kaggle/working'
121
  pdf_files = [f for f in os.listdir(working_dir) if f.endswith('.pdf')]
122
  image_files = [f for f in os.listdir(working_dir) if any(f.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp'])]
123
 
124
- print(f"\n📁 发现文件:")
125
  print(f" - PDF文件: {len(pdf_files)} 个")
126
  for pdf in pdf_files:
127
  print(f" * {pdf}")
@@ -130,6 +169,14 @@ def main():
130
  for img in image_files:
131
  print(f" * {img}")
132
 
 
 
 
 
 
 
 
 
133
  # 处理文件
134
  pdf_path = os.path.join(working_dir, pdf_files[0]) if pdf_files else None
135
  image_paths = [os.path.join(working_dir, img) for img in image_files] if image_files else None
 
108
  print(f"❌ 查询失败: {e}")
109
  return None
110
 
111
+ def scan_and_copy_files():
112
+ """扫描 /kaggle/input/ 并复制文件到 /kaggle/working/"""
113
+ import shutil
114
+
115
+ input_dir = '/kaggle/input'
116
+ working_dir = '/kaggle/working'
117
+
118
+ if not os.path.exists(input_dir):
119
+ print("⚠️ /kaggle/input/ 目录不存在,跳过文件扫描")
120
+ return
121
+
122
+ print("📂 扫描 /kaggle/input/ 目录...")
123
+
124
+ copied_pdfs = []
125
+ copied_images = []
126
+
127
+ # 递归扫描所有文件
128
+ for root, dirs, files in os.walk(input_dir):
129
+ for file in files:
130
+ src = os.path.join(root, file)
131
+ dst = os.path.join(working_dir, file)
132
+
133
+ if file.endswith('.pdf'):
134
+ shutil.copy(src, dst)
135
+ copied_pdfs.append(file)
136
+ print(f" ✅ 复制 PDF: {file}")
137
+ elif any(file.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']):
138
+ shutil.copy(src, dst)
139
+ copied_images.append(file)
140
+ print(f" ✅ 复制图片: {file}")
141
+
142
+ if copied_pdfs or copied_images:
143
+ print(f"\n📁 复制完成: {len(copied_pdfs)} 个 PDF, {len(copied_images)} 张图片")
144
+ else:
145
+ print("⚠️ 未找到 PDF 或图片文件")
146
+
147
  def main():
148
  """主函数"""
149
  print("🚀 Kaggle简化多模态测试")
 
152
  # 设置环境
153
  setup_kaggle_environment()
154
 
155
+ # 从 /kaggle/input/ 复制文件到 /kaggle/working/
156
+ scan_and_copy_files()
157
+
158
+ # 检查文件
159
  working_dir = '/kaggle/working'
160
  pdf_files = [f for f in os.listdir(working_dir) if f.endswith('.pdf')]
161
  image_files = [f for f in os.listdir(working_dir) if any(f.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp'])]
162
 
163
+ print(f"\n📁 /kaggle/working/ 中的文件:")
164
  print(f" - PDF文件: {len(pdf_files)} 个")
165
  for pdf in pdf_files:
166
  print(f" * {pdf}")
 
169
  for img in image_files:
170
  print(f" * {img}")
171
 
172
+ if not pdf_files and not image_files:
173
+ print("\n💡 使用说明:")
174
+ print(" 1. 在 Kaggle Notebook 右侧点击 '+ Add data'")
175
+ print(" 2. 选择 'Upload' 标签")
176
+ print(" 3. 上传你的 PDF 和图片文件")
177
+ print(" 4. 重新运行此脚本")
178
+ return
179
+
180
  # 处理文件
181
  pdf_path = os.path.join(working_dir, pdf_files[0]) if pdf_files else None
182
  image_paths = [os.path.join(working_dir, img) for img in image_files] if image_files else None