| # GUI-Shift Requirements |
| # Core dependencies for VLM training and evaluation |
|
|
| # Deep Learning & Transformers |
| torch>=2.1.0 |
| transformers>=4.40.0 |
| trl>=0.13.0 |
| accelerate>=0.28.0 |
| deepspeed>=0.14.0 |
|
|
| # Vision & Multimodal |
| qwen-vl-utils |
| Pillow>=10.0.0 |
|
|
| # Data Processing |
| datasets>=2.18.0 |
| numpy>=1.24.0 |
| jsonlines>=4.0.0 |
|
|
| # Training Utilities |
| wandb>=0.16.0 |
| trackio>=0.5.0 |
|
|
| # VLM-R1 Framework (install from GitHub) |
| # git+https://github.com/om-ai-lab/VLM-R1.git |
|
|
| # Evaluation |
| scikit-learn>=1.4.0 |
|
|
| # Utilities |
| tqdm>=4.66.0 |
| python-dotenv>=1.0.0 |
| babel>=2.15.0 |
| levenshtein>=0.25.0 |
| json-repair>=0.20.0 |
| math-verify>=0.2.0 |
|
|
| # Optional: Flash Attention (use hub kernels instead) |
| # flash-attn>=2.5.0 |
|
|