plant / app.py
Uunan's picture
Update app.py
3645e2a verified
import os
import torch
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import gradio as gr
# ---------------------------------------------------------
# ENV FIX (OPSİYONEL AMA TEMİZ)
# ---------------------------------------------------------
os.environ["OMP_NUM_THREADS"] = "1"
# ---------------------------------------------------------
# DEVICE
# ---------------------------------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32
print(f"🚀 Device: {device} | dtype: {dtype}")
# ---------------------------------------------------------
# MODEL
# ---------------------------------------------------------
MODEL_NAME = "Salesforce/blip2-flan-t5-xl"
# Space çok çöküyorsa şuna düş:
# MODEL_NAME = "Salesforce/blip2-flan-t5-base"
print("⏳ Model yükleniyor...")
processor = Blip2Processor.from_pretrained(
MODEL_NAME,
use_fast=True
)
model = Blip2ForConditionalGeneration.from_pretrained(
MODEL_NAME,
torch_dtype=dtype,
device_map="auto" if device == "cuda" else None
)
model.to(device)
model.eval()
print("✅ Model hazır!")
# ---------------------------------------------------------
# INFERENCE FUNCTION
# ---------------------------------------------------------
def generate_caption(image: Image.Image):
if image is None:
return "❌ Lütfen bir görsel yükleyin."
image = image.convert("RGB")
inputs = processor(image, return_tensors="pt").to(device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=40
)
caption = processor.decode(
output[0],
skip_special_tokens=True
)
return caption
# ---------------------------------------------------------
# GRADIO UI + API
# ---------------------------------------------------------
demo = gr.Interface(
fn=generate_caption,
inputs=gr.Image(type="pil", label="📷 Görsel Yükle"),
outputs=gr.Textbox(label="📝 Üretilen Açıklama"),
api_name="generate_caption", # 🔴 API İSMİ
title="BLIP-2 Image Captioning",
description="BLIP-2 FLAN-T5 ile Image → Text"
)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)