Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- .gitattributes +1 -0
- BiteVision101_e20.pth +3 -0
- app.py +50 -0
- bitevision_model.py +22 -0
- examples/1976.jpg +0 -0
- examples/25525.jpg +3 -0
- examples/2586.jpg +0 -0
- requirements.txt +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
examples/25525.jpg filter=lfs diff=lfs merge=lfs -text
|
BiteVision101_e20.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b3e4f6618d6bc1d1794f312b840d31e855bc4d604a61292dfb2a423ac4d5f4f
|
| 3 |
+
size 31824378
|
app.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import bitevision_model
|
| 3 |
+
import torch
|
| 4 |
+
from timeit import default_timer as timer
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
effnetb2, effnetb2_transforms=bitevision_model.create_effnetb2(101,42)
|
| 8 |
+
|
| 9 |
+
effnetb2.load_state_dict(torch.load("BiteVision101_e20.pth", only_weights=True, map_location=torch.device("cpu"), weights_only=True))
|
| 10 |
+
|
| 11 |
+
classes=['apple_pie','baby_back_ribs','baklava','beef_carpaccio','beef_tartare','beet_salad','beignets',
|
| 12 |
+
'bibimbap','bread_pudding','breakfast_burrito','bruschetta','caesar_salad','cannoli','caprese_salad','carrot_cake','ceviche','cheese_plate',
|
| 13 |
+
'cheesecake','chicken_curry','chicken_quesadilla','chicken_wings','chocolate_cake','chocolate_mousse','churros','clam_chowder',
|
| 14 |
+
'club_sandwich','crab_cakes','creme_brulee','croque_madame','cup_cakes','deviled_eggs','donuts','dumplings','edamame','eggs_benedict',
|
| 15 |
+
'escargots','falafel','filet_mignon','fish_and_chips','foie_gras','french_fries','french_onion_soup','french_toast','fried_calamari',
|
| 16 |
+
'fried_rice','frozen_yogurt','garlic_bread','gnocchi','greek_salad','grilled_cheese_sandwich','grilled_salmon','guacamole','gyoza','hamburger','hot_and_sour_soup',
|
| 17 |
+
'hot_dog','huevos_rancheros','hummus','ice_cream','lasagna','lobster_bisque','lobster_roll_sandwich','macaroni_and_cheese','macarons',
|
| 18 |
+
'miso_soup','mussels','nachos','omelette','onion_rings','oysters','pad_thai','paella','pancakes','panna_cotta','peking_duck','pho','pizza',
|
| 19 |
+
'pork_chop','poutine','prime_rib','pulled_pork_sandwich','ramen','ravioli','red_velvet_cake','risotto','samosa','sashimi','scallops','seaweed_salad','shrimp_and_grits','spaghetti_bolognese',
|
| 20 |
+
'spaghetti_carbonara','spring_rolls','steak','strawberry_shortcake','sushi','tacos','takoyaki','tiramisu','tuna_tartare','waffles']
|
| 21 |
+
|
| 22 |
+
def make_pred_and_timeit(img):
|
| 23 |
+
|
| 24 |
+
start_timer=timer()
|
| 25 |
+
|
| 26 |
+
transformed_image=effnetb2_transforms(img).unsqueeze(dim=0)
|
| 27 |
+
pred_logits=effnetb2(transformed_image)
|
| 28 |
+
|
| 29 |
+
effnetb2.eval()
|
| 30 |
+
with torch.inference_mode():
|
| 31 |
+
pred_logits=effnetb2(transformed_image)
|
| 32 |
+
probs=torch.softmax(pred_logits, dim=1).squeeze()
|
| 33 |
+
|
| 34 |
+
pred_probs={classes[i]: round(probs[i].detach().numpy().item(),3) for i in range(len(probs))}
|
| 35 |
+
pred_time=timer()-start_timer
|
| 36 |
+
|
| 37 |
+
return pred_probs, pred_time
|
| 38 |
+
|
| 39 |
+
examples=[['examples'+example] for example in os.listdir("examples")]
|
| 40 |
+
|
| 41 |
+
title="BiteVision101: FoodImage Classification Model π π π₯"
|
| 42 |
+
description="π BiteVision101 π is the ultimate food detective! π΅οΈββοΈπ½οΈ Simply upload an image of any food, and it will instantly identify the dish and tell us its name! π₯β¨ From a mouthwatering pizza π to a juicy burger π or a vibrant salad π₯, BiteVision101 has got it covered! ππΈ No need to guess, just snap a pic and let the magic happen! β¨π"
|
| 43 |
+
article="BiteVision101 is a feature extraction model trained on the Food-101 dataset, leveraging EfficientNetB2 as its backbone. With a total of 7843303 parameters, BiteVision101 delivers powerful performance in food image recognition."
|
| 44 |
+
demo=gr.Interface(fn=make_pred_and_timeit, inputs=gr.Image(type="pil"), outputs=[gr.Label(num_top_classes=3, label="Predictions"),
|
| 45 |
+
gr.Number(label="Prediction Time(s)")], examples=examples,
|
| 46 |
+
title=title, description=description, article=article)
|
| 47 |
+
|
| 48 |
+
demo.launch()
|
| 49 |
+
|
| 50 |
+
|
bitevision_model.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torchvision
|
| 3 |
+
|
| 4 |
+
def create_effnetb2(num_classes:int=101, seed:int=42, device:str="cpu"):
|
| 5 |
+
|
| 6 |
+
weights=torchvision.models.EfficientNet_B2_Weights.DEFAULT
|
| 7 |
+
effnetb2=torchvision.models.efficientnet_b2(weights=weights).to(device)
|
| 8 |
+
|
| 9 |
+
for params in effnetb2.parameters():
|
| 10 |
+
params.requires_grad=False
|
| 11 |
+
|
| 12 |
+
torch.manual_seed(seed)
|
| 13 |
+
effnetb2.classifier=torch.nn.Sequential(
|
| 14 |
+
torch.nn.Dropout(p=0.3, inplace=True),
|
| 15 |
+
torch.nn.Linear(in_features=1408, out_features=101)
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
effnetb2_transforms=weights.transforms()
|
| 19 |
+
|
| 20 |
+
return effnetb2, effnetb2_transforms
|
| 21 |
+
|
| 22 |
+
|
examples/1976.jpg
ADDED
|
examples/25525.jpg
ADDED
|
Git LFS Details
|
examples/2586.jpg
ADDED
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch=2.5.1
|
| 2 |
+
torchvision=0.20.1
|
| 3 |
+
gradio=5.20.1
|