Jack1808's picture
Update app.py
67b0f48 verified
from flask import Flask, render_template, request, jsonify
import os
import json
import re
import base64
from PIL import Image
import io
import google.generativeai as genai
from diffusers import StableDiffusionPipeline
import torch
from dotenv import load_dotenv
# βœ… Set writable cache directory to avoid permission issues
os.environ["HF_HOME"] = "/tmp/hf_cache"
app = Flask(__name__)
# βœ… Load environment variables
load_dotenv()
# βœ… Configure Gemini API
api_key = os.getenv("GEMINI_API_KEY")
if api_key:
genai.configure(api_key=api_key)
else:
print("⚠️ GEMINI_API_KEY not found in environment variables!")
# βœ… Initialize Stable Diffusion pipeline
pipe = None
try:
pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
cache_dir=os.environ["HF_HOME"],
safety_checker=None,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
pipe.enable_attention_slicing()
print("βœ… Stable Diffusion pipeline loaded successfully.")
except Exception as e:
print(f"❌ Error loading Stable Diffusion: {e}")
pipe = None
@app.route('/')
def index():
return render_template('index.html')
@app.route('/extract_menu', methods=['POST'])
def extract_menu():
try:
if 'image' not in request.files and 'image_data' not in request.json:
return jsonify({'error': 'No image provided'}), 400
image = None
if 'image' in request.files:
file = request.files['image']
if file.filename == '':
return jsonify({'error': 'No file selected'}), 400
image = Image.open(file.stream)
elif request.json and 'image_data' in request.json:
image_data = request.json['image_data']
if image_data.startswith('data:image'):
image_data = image_data.split(',')[1]
image_bytes = base64.b64decode(image_data)
image = Image.open(io.BytesIO(image_bytes))
if not image:
return jsonify({'error': 'Could not process image'}), 400
if not api_key:
return jsonify({'error': 'Gemini API key not configured'}), 500
model = genai.GenerativeModel('gemini-1.5-flash')
prompt = """
You are given an image of a restaurant menu.
Extract all menu categories and their items. For each item, also extract its description and price if available.
Return only valid JSON in this format:
[
{
"category": "Category Name",
"items": [
{
"name": "Item 1",
"description": "Description of Item 1",
"price": "Price of Item 1 (if available, else null or empty string)"
}
]
}
]
If nothing is found, return an empty array.
"""
response = model.generate_content([prompt, image])
response_text = response.text
json_match = re.search(r"```json\s*(.*?)\s*```", response_text, re.DOTALL)
json_data = json_match.group(1) if json_match else response_text
menu = json.loads(json_data)
return jsonify({
'success': True,
'menu': menu
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/generate_image', methods=['POST'])
def generate_image():
try:
if not pipe:
return jsonify({'error': 'Image generation model not available'}), 500
data = request.json
print("πŸ“₯ Received data:", data)
item_name = data.get('name', 'Delicious Food Item')
item_description = data.get('description', 'A beautiful and tasty dish')
prompt = (
f"High-quality professional food photography of {item_name}, "
f"{item_description}. "
"Plated beautifully on a restaurant table, vibrant colors, natural lighting, shallow depth of field, "
"garnished, appetizing, trending on food blogs, ultra-realistic, 4k."
)
print("🧠 Generated prompt:", prompt)
result = pipe(prompt, num_inference_steps=20)
generated_image = result.images[0]
buffered = io.BytesIO()
generated_image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return jsonify({
'success': True,
'image': f"data:image/png;base64,{img_str}"
})
except Exception as e:
print("❌ Error during image generation:", e)
return jsonify({'error': str(e)}), 500
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=7860)