import time import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Load model MODEL_PATH = "Janushi/FoodExtract-gemma-3-270m-fine-tune-v1" loaded_model = AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=MODEL_PATH, dtype="auto", device_map="auto", attn_implementation="eager" ) tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) loaded_model_pipeline = pipeline( "text-generation", model=loaded_model, tokenizer=tokenizer ) def pred_on_text(input_text): start_time = time.time() raw_output = loaded_model_pipeline( text_inputs=[{"role": "user", "content": input_text}], max_new_tokens=256, disable_compile=True ) end_time = time.time() total_time = round(end_time - start_time, 4) generated_text = raw_output[0]["generated_text"][1]["content"] return generated_text, raw_output, total_time description = """Extract food and drink items from text using a fine-tuned Gemma-3-270M. Fine-tuned on mrdbourke/FoodExtract-1k dataset. **Input:** Any text or image caption **Output:** Structured food/drink extraction **Example:** - Input: "eggs, bacon and toast with orange juice" - Output: food_or_drink: 1, foods: eggs, bacon, toast, drinks: orange juice """ demo = gr.Interface( fn=pred_on_text, inputs=gr.TextArea(lines=4, label="Input Text"), outputs=[ gr.TextArea(lines=4, label="Generated Text"), gr.TextArea(lines=7, label="Raw Output"), gr.Number(label="Generation Time (s)") ], title="🍳 BiteSight — Food Extraction with Fine-Tuned Gemma-3-270M", description=description, examples=[ ["A plate of grilled tofu, salad with avocado and tomatoes"], ["Indian breakfast with roti, tea and fried potatoes"], ["cheese tacos"], ["A photo of a dog sitting on a beach"] ] ) if __name__ == "__main__": demo.launch(share=False)