Spaces:

kodzonee
/

image-caption-japanese

Sleeping

App Files Files Community

kodzonee commited on Nov 9, 2025

Commit

f9ab28e

verified ·

1 Parent(s): b465399

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -43

app.py CHANGED Viewed

@@ -1,60 +1,109 @@
-import torch
-from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
 import gradio as gr
 from PIL import Image
-# -------------------------------
-# 1️⃣ MODEL YÜKLEME (Optimizeli)
-# -------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# İngilizce açıklama üretmek için BLIP base modeli
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
-# İngilizce açıklamayı Japonca'ya çevirmek için T5-base model
-translator_tokenizer = AutoTokenizer.from_pretrained("staka/fugumt-en-ja")
-translator_model = AutoModelForSeq2SeqLM.from_pretrained("staka/fugumt-en-ja").to(device)
-# -------------------------------
-# 2️⃣ FONKSİYONLAR
-# -------------------------------
-def generate_caption(image):
-    """Resimden İngilizce açıklama oluşturur."""
-    inputs = processor(images=image, return_tensors="pt").to(device)
-    output = blip_model.generate(**inputs, max_new_tokens=50)
-    english_caption = processor.decode(output[0], skip_special_tokens=True)
-    return english_caption
 def translate_to_japanese(text):
-    """İngilizce metni Japoncaya çevirir."""
     inputs = translator_tokenizer(text, return_tensors="pt", padding=True).to(device)
-    translated = translator_model.generate(**inputs, max_new_tokens=100)
     japanese_text = translator_tokenizer.decode(translated[0], skip_special_tokens=True)
     return japanese_text
-def process_image(image):
-    """Resimden Japonca açıklama üretir."""
-    english_caption = generate_caption(image)
     japanese_caption = translate_to_japanese(english_caption)
-    return f"🇺🇸 **English:** {english_caption}\n\n🇯🇵 **Japanese:** {japanese_caption}"
-# -------------------------------
-# 3️⃣ GRADIO ARAYÜZÜ
-# -------------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## 🏯 Japanese Image Caption Generator")
-    gr.Markdown("Upload an image, and this app will describe it naturally in **Japanese and English**.")
     with gr.Row():
-        image_input = gr.Image(type="pil", label="Upload an image")
-        caption_output = gr.Markdown(label="Result")
-    generate_button = gr.Button("Generate Caption")
-    generate_button.click(fn=process_image, inputs=image_input, outputs=caption_output)
-# -------------------------------
-# 4️⃣ UYGULAMA ÇALIŞTIR
-# -------------------------------
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from transformers import (
+    BlipProcessor,
+    BlipForConditionalGeneration,
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM
+)
 from PIL import Image
+import torch
+# =============== Model Load ===============
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# --- Image Captioning Model (English) ---
+caption_model_name = "Salesforce/blip-image-captioning-large"
+caption_processor = BlipProcessor.from_pretrained(caption_model_name)
+caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_name).to(device)
+# --- English → Japanese Translation Model ---
+translator_model_name = "staka/fugumt-en-ja"
+translator_tokenizer = AutoTokenizer.from_pretrained(translator_model_name)
+translator_model = AutoModelForSeq2SeqLM.from_pretrained(translator_model_name).to(device)
+# =============== Core Functions ===============
+def generate_english_caption(image):
+    """Generate an English caption for an image."""
+    inputs = caption_processor(images=image, return_tensors="pt").to(device)
+    output = caption_model.generate(
+        **inputs,
+        max_new_tokens=80,
+        num_beams=5,
+        temperature=0.7,
+        repetition_penalty=2.0
+    )
+    caption = caption_processor.decode(output[0], skip_special_tokens=True)
+    return caption
 def translate_to_japanese(text):
+    """Translate English text to natural Japanese."""
     inputs = translator_tokenizer(text, return_tensors="pt", padding=True).to(device)
+    translated = translator_model.generate(
+        **inputs,
+        max_new_tokens=80,
+        num_beams=5,
+        early_stopping=True,
+        repetition_penalty=2.5
+    )
     japanese_text = translator_tokenizer.decode(translated[0], skip_special_tokens=True)
     return japanese_text
+def caption_image(image, detail_level):
+    """Generate Japanese captions with different detail levels."""
+    english_caption = generate_english_caption(image)
     japanese_caption = translate_to_japanese(english_caption)
+    if detail_level == "Detailed / 詳細":
+        # Add descriptive depth
+        prompt = f"The image shows: {english_caption}. Describe it vividly in English."
+        inputs = caption_processor(text=prompt, images=image, return_tensors="pt").to(device)
+        detailed_output = caption_model.generate(
+            **inputs,
+            max_new_tokens=120,
+            num_beams=7,
+            temperature=0.8
+        )
+        detailed_caption = caption_processor.decode(detailed_output[0], skip_special_tokens=True)
+        japanese_detailed = translate_to_japanese(detailed_caption)
+        return f"🇺🇸 **English (Detailed):** {detailed_caption}\n\n🇯🇵 **日本語 (詳細):** {japanese_detailed}"
+    else:
+        return f"🇺🇸 **English:** {english_caption}\n\n🇯🇵 **日本語:** {japanese_caption}"
+# =============== Gradio UI ===============
+with gr.Blocks(title="Japanese Image Captioning") as demo:
+    gr.Markdown("## 🏞️ Japanese Image Captioning / 日本語画像キャプション生成")
+    gr.Markdown("""
+    **Upload an image and generate a natural Japanese caption.**
+    画像をアップロードして、自然な日本語の説明文を生成します。
+    """)
     with gr.Row():
+        image_input = gr.Image(label="Upload Image / 画像をアップロード", type="pil")
+        detail_choice = gr.Radio(
+            ["Simple / シンプル", "Detailed / 詳細"],
+            label="Choose Caption Style / キャプションのスタイルを選択",
+            value="Simple / シンプル"
+        )
+    output_text = gr.Textbox(
+        label="Generated Caption / 生成されたキャプション",
+        lines=6,
+        max_lines=8,
+        interactive=False
+    )
+    generate_btn = gr.Button("Generate Caption / キャプションを生成")
+    generate_btn.click(
+        caption_image,
+        inputs=[image_input, detail_choice],
+        outputs=output_text
+    )
+demo.launch()