add requirements + gradio demo
Browse files- app.py +128 -0
- requirements.txt +5 -10
app.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from huggingface_hub import hf_hub_download
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import pickle
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
from argparse import Namespace
|
| 9 |
+
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
|
| 10 |
+
from io import BytesIO
|
| 11 |
+
from model import get_model
|
| 12 |
+
from src.utils.output_utils import prepare_output
|
| 13 |
+
from torchvision import transforms
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
REPO_ID = "Launchpad/inversecooking"
|
| 17 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 18 |
+
|
| 19 |
+
use_gpu = True
|
| 20 |
+
device = torch.device('cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
|
| 21 |
+
map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'
|
| 22 |
+
|
| 23 |
+
# Inverse Cooking
|
| 24 |
+
ingrs_vocab = pickle.load(
|
| 25 |
+
hf_hub_download(REPO_ID, 'data/ingr_vocab.pkl', HF_TOKEN), 'rb'
|
| 26 |
+
)
|
| 27 |
+
vocab = pickle.load(
|
| 28 |
+
hf_hub_download(REPO_ID, 'data/instr_vocab.pkl', HF_TOKEN), 'rb'
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
ingr_vocab_size = len(ingrs_vocab)
|
| 32 |
+
instrs_vocab_size = len(vocab)
|
| 33 |
+
|
| 34 |
+
# Hardcoded args
|
| 35 |
+
args = Namespace(
|
| 36 |
+
aux_data_dir='../data', batch_size=128, beam=-1, crop_size=224,
|
| 37 |
+
decay_lr=True, dropout_decoder_i=0.3, dropout_decoder_r=0.3,
|
| 38 |
+
dropout_encoder=0.3, embed_size=512, es_metric='loss',
|
| 39 |
+
eval_split='val', finetune_after=-1, get_perplexity=False,
|
| 40 |
+
greedy=False, image_model='resnet50', image_size=256,
|
| 41 |
+
ingrs_only=True, label_smoothing_ingr=0.1, learning_rate=0.001,
|
| 42 |
+
log_step=10, log_term=False, loss_weight=[1.0, 0.0, 0.0, 0.0],
|
| 43 |
+
lr_decay_every=1, lr_decay_rate=0.99, max_eval=4096, maxnumims=5,
|
| 44 |
+
maxnuminstrs=10, maxnumlabels=20, maxseqlen=15, model_name='model',
|
| 45 |
+
n_att=8, n_att_ingrs=4, num_epochs=400, num_workers=8, numgens=3,
|
| 46 |
+
patience=50, project_name='inversecooking',
|
| 47 |
+
recipe1m_dir='path/to/recipe1m', recipe_only=False, resume=False,
|
| 48 |
+
save_dir='path/to/save/models', scale_learning_rate_cnn=0.01,
|
| 49 |
+
suff='', temperature=1.0, tensorboard=True, transf_layers=16,
|
| 50 |
+
transf_layers_ingrs=4, transfer_from='', use_lmdb=True,
|
| 51 |
+
use_true_ingrs=False, weight_decay=0.0
|
| 52 |
+
)
|
| 53 |
+
args.maxseqlen = 15
|
| 54 |
+
args.ingrs_only = False
|
| 55 |
+
|
| 56 |
+
# Load the trained model parameters
|
| 57 |
+
model = get_model(args, ingr_vocab_size, instrs_vocab_size)
|
| 58 |
+
model.load_state_dict(torch.load(
|
| 59 |
+
hf_hub_download(REPO_ID, 'data/modelbest.ckpt', HF_TOKEN), map_location=map_loc)
|
| 60 |
+
)
|
| 61 |
+
model = model.to(device)
|
| 62 |
+
model.eval()
|
| 63 |
+
model.ingrs_only = False
|
| 64 |
+
model.recipe_only = False
|
| 65 |
+
|
| 66 |
+
transform_list = []
|
| 67 |
+
transform_list.append(transforms.Resize(256))
|
| 68 |
+
transform_list.append(transforms.CenterCrop(224))
|
| 69 |
+
transform_list.append(transforms.ToTensor())
|
| 70 |
+
transform_list.append(transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
|
| 71 |
+
transform = transforms.Compose(transform_list)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
greedy = [True, False, False, False]
|
| 75 |
+
beam = [-1, -1, -1, -1]
|
| 76 |
+
temperature = 1.0
|
| 77 |
+
numgens = 1
|
| 78 |
+
|
| 79 |
+
# StableDiffusion
|
| 80 |
+
pipe = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4').to('cuda')
|
| 81 |
+
|
| 82 |
+
def generate_image(input_img):
|
| 83 |
+
|
| 84 |
+
# Inverse Cooking
|
| 85 |
+
image_tensor = transform(input_img).unsqueeze(0).to(device)
|
| 86 |
+
|
| 87 |
+
for i in range(numgens):
|
| 88 |
+
with torch.no_grad():
|
| 89 |
+
outputs = model.sample(image_tensor, greedy=greedy[i],
|
| 90 |
+
temperature=temperature, beam=beam[i], true_ingrs=None)
|
| 91 |
+
|
| 92 |
+
ingr_ids = outputs['ingr_ids'].cpu().numpy()
|
| 93 |
+
recipe_ids = outputs['recipe_ids'].cpu().numpy()
|
| 94 |
+
|
| 95 |
+
outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)
|
| 96 |
+
|
| 97 |
+
recipe_name = outs['title']
|
| 98 |
+
ingredients = outs['ingrs'] # ingredient list
|
| 99 |
+
|
| 100 |
+
# Create hardcoded StableDiffusion prompt
|
| 101 |
+
ingredients = ', '.join(ingredients)
|
| 102 |
+
prompt = "Fancy food plating of " + recipe_name + " with ingredients " + ingredients
|
| 103 |
+
print(prompt)
|
| 104 |
+
|
| 105 |
+
# {"prompt": prompt, "ingredients": ingredients, "ingr_ids": ingr_ids}
|
| 106 |
+
|
| 107 |
+
# StableDiffusion
|
| 108 |
+
new_image = pipe(prompt).images[0]
|
| 109 |
+
return new_image
|
| 110 |
+
|
| 111 |
+
with gr.Blocks() as demo:
|
| 112 |
+
with gr.Row():
|
| 113 |
+
with gr.Column(scale=1):
|
| 114 |
+
gr.Image("https://www.ocf.berkeley.edu/~launchpad/media/uploads/project_logos/414478903_2298162417059609_260250523028403756_n_yt9pGFm.png", elem_id="logo-img", show_label=False, show_share_button=False, show_download_button=False)
|
| 115 |
+
|
| 116 |
+
with gr.Column(scale=3):
|
| 117 |
+
gr.Markdown("""Lunchpad is a [Launchpad](https://launchpad.studentorg.berkeley.edu/) project (Spring 2023) that transforms pictures of food to fancy plated versions through a novel transformer architecture and latent diffusion models.
|
| 118 |
+
<br/><br/>
|
| 119 |
+
**Model**: [Inverse Cooking](https://arxiv.org/abs/1812.06164), [Stable-Diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4)
|
| 120 |
+
<br/>
|
| 121 |
+
**Developed by**: Sebastian Zhao, Annabelle Park, Nikhil Pitta, Tanush Talati, Rahul Vijay, Jade Wang, Tony Xin
|
| 122 |
+
"""
|
| 123 |
+
)
|
| 124 |
+
with gr.Row():
|
| 125 |
+
gr.Interface(generate_image, gr.Image(), "image")
|
| 126 |
+
|
| 127 |
+
if __name__ == '__main__':
|
| 128 |
+
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,11 +1,6 @@
|
|
| 1 |
numpy
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
Pillow
|
| 8 |
-
tqdm
|
| 9 |
-
lmdb
|
| 10 |
-
tensorflow
|
| 11 |
-
tensorboardX
|
|
|
|
| 1 |
numpy
|
| 2 |
+
torch
|
| 3 |
+
torchvision
|
| 4 |
+
diffusers
|
| 5 |
+
transformers
|
| 6 |
+
tokenizers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|