File size: 4,125 Bytes
36a42d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289bf28
1752bb9
 
 
 
289bf28
36a42d4
 
1752bb9
36a42d4
 
1752bb9
36a42d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from fastapi import FastAPI, File, UploadFile, Request
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates

import os
import json
import base64
import re
from dotenv import load_dotenv
import requests
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
from io import BytesIO
from imghdr import what

load_dotenv()
api_key = os.getenv("GOOGLE_API")

app = FastAPI()

templates = Jinja2Templates(directory="templates")

device = "cuda" if torch.cuda.is_available() else "cpu"

os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
os.environ["HF_DATASETS_CACHE"] = "/tmp/huggingface/datasets"
os.environ["DIFFUSERS_CACHE"] = "/tmp/huggingface/diffusers"

pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    cache_dir="/tmp/huggingface",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
)

pipe.to(device)


def clean_filename(text):
    return re.sub(r'[^\w\-_\. ]', '_', text.strip().lower().replace(" ", "_"))


def generate_image_base64(food_name):
    prompt = f"Professional food photography of {food_name}, top-down view, realistic lighting"
    image = pipe(prompt).images[0]

    buffered = BytesIO()
    image.save(buffered, format="PNG")
    encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return encoded_image

def get_mime_type(image_bytes):
    kind = what(None, h=image_bytes)
    return f"image/{kind or 'jpeg'}"

def extract_menu_from_image(image_bytes):
    base64_image = base64.b64encode(image_bytes).decode('utf-8')
    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key={api_key}"
    
    prompt = """

    Extract the menu items from this image and return ONLY a JSON array like:

    [

      {

        "food": "Dish Name",

        "description": "Short description or empty string",

        "price": 10,

        "category": "Category"

      }

    ]

    """

    payload = {
        "contents": [
            {
                "parts": [
                    {"text": prompt},
                    {
                        "inline_data": {
                            "mime_type": get_mime_type(image_bytes),
                            "data": base64_image
                        }
                    }
                ]
            }
        ],
        "generationConfig": {
            "responseMimeType": "application/json"
        }
    }

    headers = {'Content-Type': 'application/json'}

    try:
        res = requests.post(url, headers=headers, json=payload)
        res.raise_for_status()
        text = res.json()['candidates'][0]['content']['parts'][0]['text']
        return json.loads(text)
    except Exception as e:
        print("Error extracting menu:", e)
        return []


@app.get("/", response_class=HTMLResponse)
async def form(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})


@app.post("/upload", response_class=HTMLResponse)
async def upload(request: Request, menu_image: UploadFile = File(...)):
    image_bytes = await menu_image.read()
    menu_items = extract_menu_from_image(image_bytes)

    for item in menu_items:
        item["img_base64"] = generate_image_base64(item["food"])

    html = "<h2>🍽️ AI Food Menu</h2><div style='display:flex; flex-wrap:wrap;'>"
    for item in menu_items:
        html += f"""

        <div style='border:1px solid #ccc; margin:10px; width:220px; text-align:center; padding:10px; border-radius:10px; box-shadow:2px 2px 5px #aaa;'>

            <img src='data:image/png;base64,{item["img_base64"]}' width='200'><br>

            <h3>{item['food']}</h3>

            <p><b>${item['price']}</b></p>

            <p>{item['description']}</p>

        </div>

        """
    html += "</div><br><a href='/'>Upload Another</a>"
    return HTMLResponse(content=html)