| { | |
| "phase6_text_to_text": { | |
| "Our 3B Model": { | |
| "adjective_density": 3.62, | |
| "model_size": "3B", | |
| "cost": "Local", | |
| "inference_speed_ms": 2.1 | |
| }, | |
| "Claude Sonnet": { | |
| "adjective_density": 2.0, | |
| "model_size": "70B", | |
| "cost": "API", | |
| "inference_speed_ms": 1500 | |
| }, | |
| "GPT-4": { | |
| "adjective_density": 2.8, | |
| "model_size": "~1.7T", | |
| "cost": "API", | |
| "inference_speed_ms": 2000 | |
| } | |
| }, | |
| "current_comprehensive": { | |
| "Visual Narrator VLM": { | |
| "adjective_density": 0.494, | |
| "spatial_accuracy": 0.833, | |
| "multi_object_reasoning": 1.0, | |
| "inference_speed_ms": 2.5, | |
| "integration_quality": 0.622, | |
| "cost_efficiency": 0.95, | |
| "model_size": "3B", | |
| "deployment": "Local", | |
| "training_cost": 344.69 | |
| }, | |
| "GPT-4 Turbo": { | |
| "adjective_density": 0.049, | |
| "spatial_accuracy": 1.0, | |
| "multi_object_reasoning": 0.633, | |
| "inference_speed_ms": 5403.1, | |
| "integration_quality": 0.149, | |
| "cost_efficiency": 0.006, | |
| "model_size": "~1.7T", | |
| "deployment": "API", | |
| "training_cost": "Millions+" | |
| }, | |
| "Claude 3.5 Sonnet": { | |
| "adjective_density": 0.233, | |
| "spatial_accuracy": 0.74, | |
| "multi_object_reasoning": 0.797, | |
| "inference_speed_ms": 2000, | |
| "integration_quality": 0.309, | |
| "cost_efficiency": 0.09, | |
| "model_size": "70B", | |
| "deployment": "API", | |
| "training_cost": "Millions+" | |
| }, | |
| "BLIP-2": { | |
| "adjective_density": 0.118, | |
| "spatial_accuracy": 0.551, | |
| "multi_object_reasoning": 0.579, | |
| "inference_speed_ms": 100, | |
| "integration_quality": 0.341, | |
| "cost_efficiency": 0.533, | |
| "model_size": "3.4B", | |
| "deployment": "Local", | |
| "training_cost": "~$50K" | |
| }, | |
| "LLaVA": { | |
| "adjective_density": 0.205, | |
| "spatial_accuracy": 0.636, | |
| "multi_object_reasoning": 0.704, | |
| "inference_speed_ms": 800, | |
| "integration_quality": 0.316, | |
| "cost_efficiency": 0.35, | |
| "model_size": "7B", | |
| "deployment": "Local", | |
| "training_cost": "~$100K" | |
| } | |
| }, | |
| "metadata": { | |
| "report_date": "2025-11-07T12:23:26.231847", | |
| "training_cost_total": 344.69, | |
| "models_compared": [ | |
| "Visual Narrator VLM", | |
| "GPT-4 Turbo", | |
| "Claude 3.5 Sonnet", | |
| "BLIP-2", | |
| "LLaVA" | |
| ] | |
| } | |
| } |