FoodExtract-v1 / app.py
Janushi's picture
Uploading FoodExtract demo app.py
fd7b742 verified
import time
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# Load model
MODEL_PATH = "Janushi/FoodExtract-gemma-3-270m-fine-tune-v1"
loaded_model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path=MODEL_PATH,
dtype="auto",
device_map="auto",
attn_implementation="eager"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
loaded_model_pipeline = pipeline(
"text-generation",
model=loaded_model,
tokenizer=tokenizer
)
def pred_on_text(input_text):
start_time = time.time()
raw_output = loaded_model_pipeline(
text_inputs=[{"role": "user", "content": input_text}],
max_new_tokens=256,
disable_compile=True
)
end_time = time.time()
total_time = round(end_time - start_time, 4)
generated_text = raw_output[0]["generated_text"][1]["content"]
return generated_text, raw_output, total_time
description = """Extract food and drink items from text using a fine-tuned Gemma-3-270M.
Fine-tuned on mrdbourke/FoodExtract-1k dataset.
**Input:** Any text or image caption
**Output:** Structured food/drink extraction
**Example:**
- Input: "eggs, bacon and toast with orange juice"
- Output: food_or_drink: 1, foods: eggs, bacon, toast, drinks: orange juice
"""
demo = gr.Interface(
fn=pred_on_text,
inputs=gr.TextArea(lines=4, label="Input Text"),
outputs=[
gr.TextArea(lines=4, label="Generated Text"),
gr.TextArea(lines=7, label="Raw Output"),
gr.Number(label="Generation Time (s)")
],
title="🍳 BiteSight β€” Food Extraction with Fine-Tuned Gemma-3-270M",
description=description,
examples=[
["A plate of grilled tofu, salad with avocado and tomatoes"],
["Indian breakfast with roti, tea and fried potatoes"],
["cheese tacos"],
["A photo of a dog sitting on a beach"]
]
)
if __name__ == "__main__":
demo.launch(share=False)