financial-rag-chatbot / scripts /run_on_codespaces.sh
Claude
Add remote indexing options and pipeline testing tools
047f43e unverified
raw
history blame contribute delete
638 Bytes
#!/bin/bash
# GitHub Codespaces์—์„œ ์‹คํ–‰ํ•˜๋Š” ์Šคํฌ๋ฆฝํŠธ
# PDF๋ฅผ ํด๋ผ์šฐ๋“œ ์Šคํ† ๋ฆฌ์ง€์—์„œ ๋‹ค์šด๋กœ๋“œ ํ›„ ์ธ๋ฑ์‹ฑ
echo "ํด๋ผ์šฐ๋“œ ํ™˜๊ฒฝ์—์„œ ์ธ๋ฑ์‹ฑ"
# 1. PDF ๋‹ค์šด๋กœ๋“œ (์˜ˆ: Google Drive, Dropbox ๋“ฑ)
# rclone์ด๋‚˜ gdown ๋“ฑ์„ ์‚ฌ์šฉ
# Google Drive ์˜ˆ์‹œ:
# pip install gdown
# gdown --folder YOUR_GOOGLE_DRIVE_FOLDER_ID -O data/pdfs/
# Dropbox ์˜ˆ์‹œ:
# wget "YOUR_DROPBOX_SHARE_LINK&dl=1" -O pdfs.zip
# unzip pdfs.zip -d data/pdfs/
# 2. ์ธ๋ฑ์‹ฑ ์‹คํ–‰
python scripts/index_pdfs.py
# 3. GitHub์— ์ž๋™ ํ‘ธ์‹œ
git add data/chroma_db/
git commit -m "Add vector database"
git push
echo "์™„๋ฃŒ!"