| | |
| | """ |
| | # load datasets(train)的方法: |
| | from datasets import load_dataset |
| | db = load_dataset(...)["train"] |
| | for x in db: |
| | # x 是一個 set{}, , e.g. |
| | # {"corpus-id": "6519.png", "image": <PIL.PngImagePlugin.PngImageFile\ |
| | # image mode=RGBA size=1263x700 at 0x7F0303CD6AD0>} |
| | ... |
| | ## load datasets(test)的方法: |
| | from datasets import load_dataset |
| | dbcorpus = load_dataset(..., "corpus")["train"] |
| | dbqrels = load_dataset(..., "qrels")["train"] |
| | dbqueries = load_dataset(..., "queries")["train"] |
| | ## 如果是圖片集合 |
| | for x in dbcorpus: |
| | # x 是一個 set{}, , e.g. |
| | # {"corpus-id": "圖片的id", "image": <PIL.PngImagePlugin.PngImageFile\ |
| | # image mode=RGBA size=1263x700 at 0x7F0303CD6AD0>} |
| | ... |
| | for x in dbqrels: |
| | # x 是一個 set{}, , e.g. |
| | # {"query-id": "問題的id", "corpus-id": "圖片的id",} |
| | ... |
| | for x in dbqueries: |
| | # x 是一個 set{}, , e.g. |
| | # {"query-id": "問題的id", "query": "問題", "answer":"問題的答案"} |
| | ... |
| | ## 如果是OCR資料集 |
| | for x in dbcorpus: |
| | # x 是一個 set{}, , e.g. |
| | # {"corpus-id": "6519.png", "text": "string to describe a photo"} |
| | ... |
| | for x in dbqrels: |
| | # x 是一個 set{}, , e.g. |
| | # {"query-id": "問題的id", "corpus-id": "圖片的id",} |
| | ... |
| | for x in dbqueries: |
| | # x 是一個 set{}, , e.g. |
| | # {"query-id": "問題的id", "query": "問題", "answer":"問題的答案"} |
| | ... |
| | """ |
| |
|
| | from datasets import load_dataset |
| |
|
| | save_root = r"/group-volume/Human-Action-Analysis/users/hsiang.chen/Robust/datasets/" |
| | |
| | |
| | load_dataset("openbmb/VisRAG-Ret-Train-In-domain-data", cache_dir=save_root)["train"] |
| | |
| | load_dataset("openbmb/VisRAG-Ret-Train-Synthetic-data", cache_dir=save_root)["train"] |
| |
|
| | |
| | |
| | |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | oad_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-PlotQA-deg", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-SlideVQA-deg", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-InfoVQA-deg", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-ArxivQA-deg", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-ChartQA-deg", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-MP-DocVQA-deg", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "queries", cache_dir=save_root)["train"] |
| |
|
| | |
| | load_dataset("rweics5cs7/exo7-realworld-db-combined", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo7-realworld-db-combined", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo7-realworld-db-combined", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo7-realworld-db-combined-deg", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo7-realworld-db-combined-deg", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo7-realworld-db-combined-deg", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo9-realworld-db-combined", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo9-realworld-db-combined", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo9-realworld-db-combined", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo10-realworld-db-combined", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo10-realworld-db-combined", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo10-realworld-db-combined", "queries", cache_dir=save_root)["train"] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | load_dataset("rweics5cs7/exo3-original-PlotQA-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-SlideVQA-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-InfoVQA-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | oad_dataset("rweics5cs7/exo3-original-ArxivQA-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-ChartQA-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-MP-DocVQA-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-PlotQA-text-deg-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-PlotQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-SlideVQA-text-deg-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-SlideVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-InfoVQA-text-deg-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-InfoVQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-ArxivQA-text-deg-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ArxivQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-ChartQA-text-deg-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-ChartQA", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo3-original-MP-DocVQA-text-deg-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("openbmb/VisRAG-Ret-Test-MP-DocVQA", "queries", cache_dir=save_root)["train"] |
| |
|
| | |
| | load_dataset("rweics5cs7/exo8-realworld-db-combined-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo8-realworld-db-combined-text-v3", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo8-realworld-db-combined-text-v3", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo8-realworld-db-combined-text-deg-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo8-realworld-db-combined-text-deg-v3", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo8-realworld-db-combined-text-deg-v3", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo9-realworld-db-combined-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo9-realworld-db-combined-text-v3", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo9-realworld-db-combined-text-v3", "queries", cache_dir=save_root)["train"] |
| | |
| | load_dataset("rweics5cs7/exo10-realworld-db-combined-text-v3", "corpus", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo10-realworld-db-combined-text-v3", "qrels", cache_dir=save_root)["train"] |
| | load_dataset("rweics5cs7/exo10-realworld-db-combined-text-v3", "queries", cache_dir=save_root)["train"] |
| |
|