kodzonee commited on
Commit
f9ab28e
·
verified ·
1 Parent(s): b465399

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -43
app.py CHANGED
@@ -1,60 +1,109 @@
1
- import torch
2
- from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
3
  import gradio as gr
 
 
 
 
 
 
4
  from PIL import Image
 
5
 
6
- # -------------------------------
7
- # 1️⃣ MODEL YÜKLEME (Optimizeli)
8
- # -------------------------------
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
- # İngilizce açıklama üretmek için BLIP base modeli
12
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
13
- blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
 
 
 
 
 
 
 
14
 
15
- # İngilizce açıklamayı Japonca'ya çevirmek için T5-base model
16
- translator_tokenizer = AutoTokenizer.from_pretrained("staka/fugumt-en-ja")
17
- translator_model = AutoModelForSeq2SeqLM.from_pretrained("staka/fugumt-en-ja").to(device)
 
 
 
 
 
 
 
 
 
 
18
 
19
- # -------------------------------
20
- # 2️⃣ FONKSİYONLAR
21
- # -------------------------------
22
- def generate_caption(image):
23
- """Resimden İngilizce açıklama oluşturur."""
24
- inputs = processor(images=image, return_tensors="pt").to(device)
25
- output = blip_model.generate(**inputs, max_new_tokens=50)
26
- english_caption = processor.decode(output[0], skip_special_tokens=True)
27
- return english_caption
28
 
29
  def translate_to_japanese(text):
30
- """İngilizce metni Japoncaya çevirir."""
31
  inputs = translator_tokenizer(text, return_tensors="pt", padding=True).to(device)
32
- translated = translator_model.generate(**inputs, max_new_tokens=100)
 
 
 
 
 
 
33
  japanese_text = translator_tokenizer.decode(translated[0], skip_special_tokens=True)
34
  return japanese_text
35
 
36
- def process_image(image):
37
- """Resimden Japonca açıklama üretir."""
38
- english_caption = generate_caption(image)
 
39
  japanese_caption = translate_to_japanese(english_caption)
40
- return f"🇺🇸 **English:** {english_caption}\n\n🇯🇵 **Japanese:** {japanese_caption}"
41
-
42
- # -------------------------------
43
- # 3️⃣ GRADIO ARAYÜZÜ
44
- # -------------------------------
45
- with gr.Blocks() as demo:
46
- gr.Markdown("## 🏯 Japanese Image Caption Generator")
47
- gr.Markdown("Upload an image, and this app will describe it naturally in **Japanese and English**.")
48
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  with gr.Row():
50
- image_input = gr.Image(type="pil", label="Upload an image")
51
- caption_output = gr.Markdown(label="Result")
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- generate_button = gr.Button("Generate Caption")
54
- generate_button.click(fn=process_image, inputs=image_input, outputs=caption_output)
 
 
 
55
 
56
- # -------------------------------
57
- # 4️⃣ UYGULAMA ÇALIŞTIR
58
- # -------------------------------
59
- if __name__ == "__main__":
60
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ from transformers import (
3
+ BlipProcessor,
4
+ BlipForConditionalGeneration,
5
+ AutoTokenizer,
6
+ AutoModelForSeq2SeqLM
7
+ )
8
  from PIL import Image
9
+ import torch
10
 
11
+ # =============== Model Load ===============
 
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
+ # --- Image Captioning Model (English) ---
15
+ caption_model_name = "Salesforce/blip-image-captioning-large"
16
+ caption_processor = BlipProcessor.from_pretrained(caption_model_name)
17
+ caption_model = BlipForConditionalGeneration.from_pretrained(caption_model_name).to(device)
18
+
19
+ # --- English → Japanese Translation Model ---
20
+ translator_model_name = "staka/fugumt-en-ja"
21
+ translator_tokenizer = AutoTokenizer.from_pretrained(translator_model_name)
22
+ translator_model = AutoModelForSeq2SeqLM.from_pretrained(translator_model_name).to(device)
23
+
24
 
25
+ # =============== Core Functions ===============
26
+ def generate_english_caption(image):
27
+ """Generate an English caption for an image."""
28
+ inputs = caption_processor(images=image, return_tensors="pt").to(device)
29
+ output = caption_model.generate(
30
+ **inputs,
31
+ max_new_tokens=80,
32
+ num_beams=5,
33
+ temperature=0.7,
34
+ repetition_penalty=2.0
35
+ )
36
+ caption = caption_processor.decode(output[0], skip_special_tokens=True)
37
+ return caption
38
 
 
 
 
 
 
 
 
 
 
39
 
40
  def translate_to_japanese(text):
41
+ """Translate English text to natural Japanese."""
42
  inputs = translator_tokenizer(text, return_tensors="pt", padding=True).to(device)
43
+ translated = translator_model.generate(
44
+ **inputs,
45
+ max_new_tokens=80,
46
+ num_beams=5,
47
+ early_stopping=True,
48
+ repetition_penalty=2.5
49
+ )
50
  japanese_text = translator_tokenizer.decode(translated[0], skip_special_tokens=True)
51
  return japanese_text
52
 
53
+
54
+ def caption_image(image, detail_level):
55
+ """Generate Japanese captions with different detail levels."""
56
+ english_caption = generate_english_caption(image)
57
  japanese_caption = translate_to_japanese(english_caption)
58
+
59
+ if detail_level == "Detailed / 詳細":
60
+ # Add descriptive depth
61
+ prompt = f"The image shows: {english_caption}. Describe it vividly in English."
62
+ inputs = caption_processor(text=prompt, images=image, return_tensors="pt").to(device)
63
+ detailed_output = caption_model.generate(
64
+ **inputs,
65
+ max_new_tokens=120,
66
+ num_beams=7,
67
+ temperature=0.8
68
+ )
69
+ detailed_caption = caption_processor.decode(detailed_output[0], skip_special_tokens=True)
70
+ japanese_detailed = translate_to_japanese(detailed_caption)
71
+ return f"🇺🇸 **English (Detailed):** {detailed_caption}\n\n🇯🇵 **日本語 (詳細):** {japanese_detailed}"
72
+
73
+ else:
74
+ return f"🇺🇸 **English:** {english_caption}\n\n🇯🇵 **日本語:** {japanese_caption}"
75
+
76
+
77
+ # =============== Gradio UI ===============
78
+ with gr.Blocks(title="Japanese Image Captioning") as demo:
79
+ gr.Markdown("## 🏞️ Japanese Image Captioning / 日本語画像キャプション生成")
80
+
81
+ gr.Markdown("""
82
+ **Upload an image and generate a natural Japanese caption.**
83
+ 画像をアップロードして、自然な日本語の説明文を生成します。
84
+ """)
85
+
86
  with gr.Row():
87
+ image_input = gr.Image(label="Upload Image / 画像をアップロード", type="pil")
88
+ detail_choice = gr.Radio(
89
+ ["Simple / シンプル", "Detailed / 詳細"],
90
+ label="Choose Caption Style / キャプションのスタイルを選択",
91
+ value="Simple / シンプル"
92
+ )
93
+
94
+ output_text = gr.Textbox(
95
+ label="Generated Caption / 生成されたキャプション",
96
+ lines=6,
97
+ max_lines=8,
98
+ interactive=False
99
+ )
100
+
101
+ generate_btn = gr.Button("Generate Caption / キャプションを生成")
102
 
103
+ generate_btn.click(
104
+ caption_image,
105
+ inputs=[image_input, detail_choice],
106
+ outputs=output_text
107
+ )
108
 
109
+ demo.launch()