Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import spaces
|
|
| 5 |
from transformers import pipeline
|
| 6 |
|
| 7 |
BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
|
| 8 |
-
FINE_TUNED_MODEL_ID = "ninjals/FoodExtract-Vision-SmolVLM2-500M-fine-tune-v1
|
| 9 |
OUTPUT_TOKENS = 256
|
| 10 |
|
| 11 |
# Load original base model (no fine-tuning)
|
|
@@ -54,7 +54,7 @@ def extract_foods_from_image(input_image):
|
|
| 54 |
demo_title = "🥑➡️📝 FoodExtract-Vision with a fine-tuned SmolVLM2-500M"
|
| 55 |
demo_description = """* **Base model:** https://huggingface.co/HuggingFaceTB/SmolVLM-500M-Instruct
|
| 56 |
* **Fine-tuning dataset:** https://huggingface.co/datasets/mrdbourke/FoodExtract-1k-Vision (1k food images and 500 not food images)
|
| 57 |
-
* **Fine-tuned model:** https://huggingface.co/ninjals/FoodExtract-Vision-SmolVLM2-500M-fine-tune-v1
|
| 58 |
## Overview
|
| 59 |
Extract food and drink items in a structured way from images.
|
| 60 |
The original model outputs fail to capture the desired structure. But the fine-tuned model sticks to the output structure quite well.
|
|
|
|
| 5 |
from transformers import pipeline
|
| 6 |
|
| 7 |
BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
|
| 8 |
+
FINE_TUNED_MODEL_ID = "ninjals/FoodExtract-Vision-SmolVLM2-500M-fine-tune-v1"
|
| 9 |
OUTPUT_TOKENS = 256
|
| 10 |
|
| 11 |
# Load original base model (no fine-tuning)
|
|
|
|
| 54 |
demo_title = "🥑➡️📝 FoodExtract-Vision with a fine-tuned SmolVLM2-500M"
|
| 55 |
demo_description = """* **Base model:** https://huggingface.co/HuggingFaceTB/SmolVLM-500M-Instruct
|
| 56 |
* **Fine-tuning dataset:** https://huggingface.co/datasets/mrdbourke/FoodExtract-1k-Vision (1k food images and 500 not food images)
|
| 57 |
+
* **Fine-tuned model:** https://huggingface.co/ninjals/FoodExtract-Vision-SmolVLM2-500M-fine-tune-v1
|
| 58 |
## Overview
|
| 59 |
Extract food and drink items in a structured way from images.
|
| 60 |
The original model outputs fail to capture the desired structure. But the fine-tuned model sticks to the output structure quite well.
|