| | import os |
| | import torch |
| | from PIL import Image |
| | from transformers import Blip2Processor, Blip2ForConditionalGeneration |
| | import gradio as gr |
| |
|
| | |
| | |
| | |
| | os.environ["OMP_NUM_THREADS"] = "1" |
| |
|
| | |
| | |
| | |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | dtype = torch.float16 if device == "cuda" else torch.float32 |
| |
|
| | print(f"🚀 Device: {device} | dtype: {dtype}") |
| |
|
| | |
| | |
| | |
| | MODEL_NAME = "Salesforce/blip2-flan-t5-xl" |
| | |
| | |
| |
|
| | print("⏳ Model yükleniyor...") |
| |
|
| | processor = Blip2Processor.from_pretrained( |
| | MODEL_NAME, |
| | use_fast=True |
| | ) |
| |
|
| | model = Blip2ForConditionalGeneration.from_pretrained( |
| | MODEL_NAME, |
| | torch_dtype=dtype, |
| | device_map="auto" if device == "cuda" else None |
| | ) |
| |
|
| | model.to(device) |
| | model.eval() |
| |
|
| | print("✅ Model hazır!") |
| |
|
| | |
| | |
| | |
| | def generate_caption(image: Image.Image): |
| | if image is None: |
| | return "❌ Lütfen bir görsel yükleyin." |
| |
|
| | image = image.convert("RGB") |
| |
|
| | inputs = processor(image, return_tensors="pt").to(device) |
| |
|
| | with torch.no_grad(): |
| | output = model.generate( |
| | **inputs, |
| | max_new_tokens=40 |
| | ) |
| |
|
| | caption = processor.decode( |
| | output[0], |
| | skip_special_tokens=True |
| | ) |
| |
|
| | return caption |
| |
|
| | |
| | |
| | |
| | demo = gr.Interface( |
| | fn=generate_caption, |
| | inputs=gr.Image(type="pil", label="📷 Görsel Yükle"), |
| | outputs=gr.Textbox(label="📝 Üretilen Açıklama"), |
| | api_name="generate_caption", |
| | title="BLIP-2 Image Captioning", |
| | description="BLIP-2 FLAN-T5 ile Image → Text" |
| | ) |
| |
|
| | demo.launch( |
| | server_name="0.0.0.0", |
| | server_port=7860, |
| | show_error=True |
| | ) |
| |
|