Spaces:
Runtime error
Runtime error
| # 設定檔案的基礎名稱 | |
| # base_file="output_0402_1_篩選結果.xlsx - Sheet1_" | |
| # base_file="output_0402_2_篩選結果.xlsx - Sheet1_" | |
| # base_file="output_0402_3_篩選結果.xlsx - Sheet1_" | |
| base_file="output_0402_4_篩選結果.xlsx - Sheet1_" | |
| # 設定總共要處理的檔案數量 | |
| start_index=0 | |
| total_files=17 | |
| # 設定每次處理消耗的數量和 API 限制 | |
| # consumption_per_run=1000 | |
| # api_rate_limit=3000 | |
| api_rate_limit=20000 | |
| wait_time_in_seconds=60 # 1500 # 25 mins | |
| # 迴圈執行 | |
| for i in $(seq $start_index $total_files); do | |
| # 動態生成檔案名稱 | |
| file_name="${base_file}${i}.csv" | |
| crawled_file_path="${base_file}${i}/crawled_results.joblib" | |
| # 執行 python 指令 | |
| python sheet.py --data_path "data/production/${file_name}" --task new \ | |
| --step crawl \ | |
| --output_dir data/gpt-4o-mini \ | |
| --n_processes 4 \ | |
| --serp_provider serp \ | |
| --crawled_file_path "${crawled_file_path}" \ | |
| --extraction_provider openai \ | |
| --extraction_model gpt-4o-mini \ | |
| --regularization_provider openai \ | |
| --regularization_model gpt-4o-mini | |
| # 等待以避免 API rate limit | |
| echo "Completed task for ${file_name}. Waiting for ${wait_time_in_seconds} seconds..." | |
| sleep $wait_time_in_seconds | |
| done | |
| echo "All tasks completed." | |