Image-Text-to-Text
Transformers
English
vision-language-model
vlm
surveillance
iot
gemma
vl-jepa
multimodal
object-detection
video-analytics
Instructions to use hardiksa/arcisvlm with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use hardiksa/arcisvlm with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="hardiksa/arcisvlm")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("hardiksa/arcisvlm", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use hardiksa/arcisvlm with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "hardiksa/arcisvlm" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "hardiksa/arcisvlm", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/hardiksa/arcisvlm
- SGLang
How to use hardiksa/arcisvlm with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "hardiksa/arcisvlm" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "hardiksa/arcisvlm", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "hardiksa/arcisvlm" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "hardiksa/arcisvlm", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use hardiksa/arcisvlm with Docker Model Runner:
docker model run hf.co/hardiksa/arcisvlm
| """ | |
| Data validation utilities for ArcisVLM training. | |
| Call validate_dataset() before ANY training run to ensure data is real and clean. | |
| Catches: dummy images, blank questions, list-string answers, data poisoning. | |
| """ | |
| import json | |
| import os | |
| import re | |
| import torch | |
| from torch.utils.data import Dataset | |
| def validate_dataset(dataset: Dataset, stage_name: str, min_samples: int = 100): | |
| """Validate that a dataset contains real, clean data. | |
| Checks: | |
| 1. Dataset has enough samples | |
| 2. Images are not solid color (dummy) | |
| 3. Images vary across samples (not all identical) | |
| Raises RuntimeError if validation fails. | |
| """ | |
| n = len(dataset) | |
| if n < min_samples: | |
| raise RuntimeError( | |
| f"FATAL: {stage_name} dataset has only {n} samples (need {min_samples}+).\n" | |
| f"Download real data: python3 scripts/download_all_data.py" | |
| ) | |
| check_count = min(10, n) | |
| stds = [] | |
| for i in range(check_count): | |
| sample = dataset[i] | |
| img = sample.get("image", None) | |
| if img is None: | |
| raise RuntimeError(f"FATAL: {stage_name} sample {i} has no image tensor.") | |
| if not isinstance(img, torch.Tensor): | |
| raise RuntimeError(f"FATAL: {stage_name} sample {i} image is {type(img)}, expected torch.Tensor.") | |
| std = img.std().item() | |
| stds.append(std) | |
| if std < 0.001: | |
| raise RuntimeError( | |
| f"FATAL: {stage_name} sample {i} appears to be solid color (std={std:.6f}).\n" | |
| f"This looks like dummy data. Download real images first." | |
| ) | |
| std_range = max(stds) - min(stds) | |
| if std_range < 0.0001 and check_count > 3: | |
| raise RuntimeError( | |
| f"FATAL: {stage_name} all {check_count} checked images have identical statistics.\n" | |
| f"std range: {std_range:.6f}. This looks like dummy/synthetic data." | |
| ) | |
| print(f" Data validation PASSED for {stage_name}: " | |
| f"{n} samples, image std range={std_range:.4f}") | |
| return True | |
| def validate_jsonl_data(jsonl_dir: str, stage_name: str): | |
| """Validate JSONL training data BEFORE loading into Dataset. | |
| Catches data poisoning issues: | |
| 1. Blank/empty questions | |
| 2. Answers that are Python list strings (['a', 'b', ...]) | |
| 3. Single dominant answer (>30% of data) | |
| 4. Questions or answers that are always the same | |
| Raises RuntimeError if critical issues found. | |
| """ | |
| if not os.path.exists(jsonl_dir): | |
| return | |
| from collections import Counter | |
| total = 0 | |
| empty_questions = 0 | |
| list_string_answers = 0 | |
| answer_counts = Counter() | |
| question_counts = Counter() | |
| for fname in sorted(os.listdir(jsonl_dir)): | |
| if not fname.endswith('.jsonl'): | |
| continue | |
| fpath = os.path.join(jsonl_dir, fname) | |
| with open(fpath) as f: | |
| for line in f: | |
| try: | |
| item = json.loads(line.strip()) | |
| except json.JSONDecodeError: | |
| continue | |
| total += 1 | |
| q = item.get("question", "") | |
| a = item.get("answer", "") | |
| # Check 1: Empty questions | |
| if not q or len(q.strip()) == 0: | |
| empty_questions += 1 | |
| # Check 2: Answer looks like Python list string | |
| if isinstance(a, str) and a.startswith("[") and "'" in a: | |
| list_string_answers += 1 | |
| # Track distributions | |
| answer_counts[str(a)[:50]] += 1 | |
| question_counts[str(q)[:50]] += 1 | |
| issues = [] | |
| # Report | |
| print(f"\n [{stage_name}] Data Quality Check: {total} samples across {jsonl_dir}") | |
| if empty_questions > 0: | |
| pct = empty_questions / max(total, 1) * 100 | |
| msg = f" WARNING: {empty_questions} samples ({pct:.1f}%) have EMPTY questions" | |
| print(msg) | |
| if pct > 5: | |
| issues.append(msg) | |
| if list_string_answers > 0: | |
| pct = list_string_answers / max(total, 1) * 100 | |
| msg = f" WARNING: {list_string_answers} samples ({pct:.1f}%) have Python list-string answers" | |
| print(msg) | |
| if pct > 5: | |
| issues.append(msg) | |
| # Check dominant answers | |
| if answer_counts: | |
| top_answer, top_count = answer_counts.most_common(1)[0] | |
| top_pct = top_count / max(total, 1) * 100 | |
| if top_pct > 10: | |
| msg = f" WARNING: Most common answer '{top_answer}' appears {top_count}x ({top_pct:.1f}%)" | |
| print(msg) | |
| if top_pct > 30: | |
| issues.append(msg) | |
| # Check dominant questions | |
| if question_counts: | |
| top_q, top_q_count = question_counts.most_common(1)[0] | |
| top_q_pct = top_q_count / max(total, 1) * 100 | |
| if top_q_pct > 20: | |
| msg = f" WARNING: Most common question '{top_q[:40]}' appears {top_q_count}x ({top_q_pct:.1f}%)" | |
| print(msg) | |
| if issues: | |
| print(f"\n [{stage_name}] DATA QUALITY ISSUES FOUND:") | |
| for issue in issues: | |
| print(f" {issue}") | |
| raise RuntimeError( | |
| f"FATAL: {stage_name} has data quality issues that will poison training.\n" | |
| f"Fix the download script and re-download: python3 scripts/download_all_data.py\n" | |
| f"Issues:\n" + "\n".join(issues) | |
| ) | |
| else: | |
| print(f" [{stage_name}] Data quality PASSED") | |