Spaces:

objects76
/

FoodExtract-v1

Running

File size: 3,655 Bytes


# Load dependencies
import time
import transformers
import torch
import spaces # Optional: run our model on the GPU (this will be much faster inference)

import gradio as gr

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline

@spaces.GPU # Optional: run our model on the GPU (this will be much faster inference)
def pred_on_text(input_text):
    start_time = time.time()

    raw_output = loaded_model_pipeline(text_inputs=[{"role": "user",
                                                    "content": input_text}],
                                       max_new_tokens=256,
                                       disable_compile=True)
    end_time = time.time()
    total_time = round(end_time - start_time, 4)

    generated_text = raw_output[0]["generated_text"][1]["content"]

    return generated_text, raw_output, total_time

# Load the model (from our Hugging Face Repo)
# Note: You may have to replace my username `objects76` for your own
MODEL_PATH = "objects76/FoodExtract-gemma-3-270m-fine-tune-v1"


# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_PATH
)

# # Load the model into a pipeline
# loaded_model = AutoModelForCausalLM.from_pretrained(
#     pretrained_model_name_or_path=MODEL_PATH,
#     dtype="auto",
#     device_map="auto",
#     attn_implementation="eager"
# )
# loaded_model_pipeline = pipeline("text-generation",
#                                  model=loaded_model,
#                                  tokenizer=tokenizer)

loaded_model_pipeline = pipeline(
    "text-generation",
    model=MODEL_PATH,  # ← pass path, let pipeline load
    tokenizer=tokenizer,
    torch_dtype="auto",
    device_map="auto",
    model_kwargs={"attn_implementation": "eager"}
)


# Create the demo
description = """텍스트에서 음식과 음료 항목을 추출하는 파인튜닝된 SLM(Small Language Model)
 - basemodel: [Gemma 3 270M](https://huggingface.co/google/gemma-3-270m-it)
 - dataset: [FoodExtract-1k 데이터셋](https://huggingface.co/datasets/objects76/FoodExtract-1k)

* 입력 (str): 원시 텍스트 문자열 또는 이미지 캡션 (예: "항해 앉아 있는 개의 사진" 또는 "베이컨, 계란, 토스트가 있는 아침 식사")
* 출력 (str): 음식/비음식 분류와 추출된 명사형 음식 및 음료 항목, 다양한 음식 태그가 포함된 생성 텍스트

For example:

* Input: "For breakfast I had eggs, bacon and toast and a glass of orange juice"
* Output:

```
food_or_drink: 1
tags: fi, di
foods: eggs, bacon, toast
drinks: orange juice
```
"""

demo = gr.Interface(fn=pred_on_text,
                    inputs=gr.TextArea(lines=4, label="Input Text"),
                    outputs=[gr.TextArea(lines=4, label="Generated Text"),
                             gr.TextArea(lines=7, label="Raw Output"),
                             gr.Number(label="Generation Time (s)")],
                    title="🍳 Structured FoodExtract with a Fine-Tuned Gemma 3 270M",
                    description=description,
                    examples=[["Hello world! This is my first fine-tuned LLM!"],
                              ["그릴에 구운 바라문디와 아보카도, 올리브, 토마토, 이탈리안 드레싱이 곁들여진 샐러드가 있는 한 접시 음식"],
                              ["British Breakfast with baked beans, fried eggs, black pudding, sausages, bacon, mushrooms, a cup of tea and toast and fried tomatoes"],
                              ["Steak tacos"],
                              ["A photo of a dog sitting on a beach"]]
)

if __name__ == "__main__":
    demo.launch(share=False)