Spaces:

ninjals
/

FoodExtract-Vision-v1

Running on Zero

ninjals commited on 23 days ago

Commit

f5c862c

verified ·

1 Parent(s): 77f345f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,16 +8,13 @@ BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
 FINE_TUNED_MODEL_ID = "mrdbourke/FoodExtract-Vision-SmolVLM2-500M-fine-tune-v1"
 OUTPUT_TOKENS = 256
-# Change this in your app.py to match your training bf16=True setting
-DTYPE = torch.float16
 # Load original base model (no fine-tuning)
 print(f"[INFO] Loading Original Model")
 original_pipeline = pipeline(
     "image-text-to-text",
     model=BASE_MODEL_ID,
-    torch_dtype=DTYPE,
-    device_map="auto",
 )
 # Load fine-tuned model
@@ -25,14 +22,10 @@ print(f"[INFO] Loading Fine-tuned Model")
 ft_pipe = pipeline(
     "image-text-to-text",
     model=FINE_TUNED_MODEL_ID,
-    torch_dtype=DTYPE,
-    device_map="auto",
 )
-print(f"[INFO] Original Model Datatype: {original_pipeline.model.dtype}")
-print(f"[INFO] Fine-tuned Model Datatype: {ft_pipe.model.dtype}")
 def create_message(input_image):
     return [{'role': 'user',
  'content': [{'type': 'image',
@@ -100,3 +93,5 @@ demo = gr.Interface(
 if __name__ == "__main__":
     demo.launch(share=False)

 FINE_TUNED_MODEL_ID = "mrdbourke/FoodExtract-Vision-SmolVLM2-500M-fine-tune-v1"
 OUTPUT_TOKENS = 256
 # Load original base model (no fine-tuning)
 print(f"[INFO] Loading Original Model")
 original_pipeline = pipeline(
     "image-text-to-text",
     model=BASE_MODEL_ID,
+    dtype=torch.bfloat16,
+    device_map="auto"
 )
 # Load fine-tuned model
 ft_pipe = pipeline(
     "image-text-to-text",
     model=FINE_TUNED_MODEL_ID,
+    dtype=torch.bfloat16,
+    device_map="auto"
 )
 def create_message(input_image):
     return [{'role': 'user',
  'content': [{'type': 'image',
 if __name__ == "__main__":
     demo.launch(share=False)