Spaces:

ginigen
/

Every-Text

Runtime error

App Files Files Community

ginipick commited on Mar 18

Commit

1e367e3

verified ·

1 Parent(s): 0399de8

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -134

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import time
 from os import path
 import tempfile
@@ -13,19 +14,20 @@ import string
 import torch
 from PIL import Image
 from safetensors.torch import load_file
 from huggingface_hub import hf_hub_download
-# Diffusers 관련 라이브러리
 import gradio as gr
 from diffusers import FluxPipeline
-# Google GenAI 라이브러리
 from google import genai
 from google.genai import types
 #######################################
-# 0. 환경설정
 #######################################
 BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
@@ -35,6 +37,25 @@ os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
 os.environ["HF_HUB_CACHE"] = CACHE_PATH
 os.environ["HF_HOME"] = CACHE_PATH
 class timer:
     def __init__(self, method_name="timed process"):
         self.method = method_name
@@ -46,7 +67,7 @@ class timer:
         print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s")
 #######################################
-# 1. FLUX 파이프라인 로드
 #######################################
 if not path.exists(CACHE_PATH):
@@ -63,7 +84,7 @@ pipe.fuse_lora(lora_scale=0.125)
 pipe.to(device="cuda", dtype=torch.bfloat16)
 #######################################
-# 2. Google GenAI (Gemini) - 이미지 변환 함수
 #######################################
 def save_binary_file(file_name, data):
@@ -71,12 +92,14 @@ def save_binary_file(file_name, data):
         f.write(data)
 def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
-    """Gemini 모델을 통해 이미지 내부 텍스트를 변경."""
     api_key = os.getenv("GAPI_TOKEN", None)
     if not api_key:
         raise ValueError(
-            "GAPI_TOKEN 환경 변수가 설정되지 않았습니다. "
-            "Google GenAI API를 사용하기 위해서는 GAPI_TOKEN이 필요합니다."
         )
     client = genai.Client(api_key=api_key)
@@ -120,7 +143,7 @@ def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
             candidate = chunk.candidates[0].content.parts[0]
             if candidate.inline_data:
                 save_binary_file(temp_path, candidate.inline_data.data)
-                print(f"[DEBUG] Gemini returned image -> {temp_path}")
                 image_path = temp_path
                 break
             else:
@@ -129,30 +152,49 @@ def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
     del files
     return image_path, text_response
 #######################################
-# 3. Diffusion (Flux)용 함수
 #######################################
 def generate_random_letters(length: int) -> str:
-    """length 길이만큼 대소문자 알파벳을 무작위로 생성."""
     letters = string.ascii_lowercase + string.ascii_uppercase
     return "".join(random.choice(letters) for _ in range(length))
 def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str:
     """
-    프롬프트 내 <text1>, <text2>, <text3>를
-    각각 r1, r2, r3로 치환.
-    - <text1>은 필수 (없으면 자동으로 뒤에 붙임).
-    - <text2>, <text3>는 있으면 치환, 없으면 무시.
     """
-    # 1) <text1>은 필수
     if "<text1>" in prompt:
         prompt = prompt.replace("<text1>", r1)
     else:
-        # 자동 덧붙임
         prompt = f"{prompt} with clear readable text that says '{r1}'"
-    # 2) <text2>, <text3>는 선택
     if "<text2>" in prompt:
         prompt = prompt.replace("<text2>", r2)
     if "<text3>" in prompt:
@@ -160,9 +202,9 @@ def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str
     return prompt
-def generate_initial_image(prompt, random1, random2, random3, height, width, steps, scale, seed):
     """
-    Flux 파이프라인을 이용해 (r1, r2, r3)가 들어간 이미지를 생성.
     """
     with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux Generation"):
         result = pipe(
@@ -177,50 +219,56 @@ def generate_initial_image(prompt, random1, random2, random3, height, width, ste
     return result
-def change_multi_text_in_image(original_image, random1, final1, random2, final2, random3, final3):
     """
-    Gemini를 통해, 이미지 안의 r1->final1, r2->final2, r3->final3 식으로 텍스트 교체.
-    - r2, final2 (또는 r3, final3)가 빈 문자열이면 해당 교체는 건너뜀.
     """
-    # 교체 지시문 만들기
     instructions = []
-    if random1 and final1:
-        instructions.append(f"Change any text reading '{random1}' in this image to '{final1}'.")
-    if random2 and final2:
-        instructions.append(f"Change any text reading '{random2}' in this image to '{final2}'.")
-    if random3 and final3:
-        instructions.append(f"Change any text reading '{random3}' in this image to '{final3}'.")
-    # 만약 교체 지시문이 없다면 그냥 return original_image
-    if not instructions:
-        print("[WARN] No text changes requested!")
-        return original_image
-    full_instruction = " ".join(instructions)
-    try:
-        # 임시 파일에 original_image 저장
-        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
-            original_path = tmp.name
-            original_image.save(original_path)
-        image_path, text_response = generate_by_google_genai(
-            text=full_instruction,
-            file_name=original_path
-        )
-        if image_path:
-            with open(image_path, "rb") as f:
-                image_data = f.read()
-            new_img = Image.open(io.BytesIO(image_data))
-            return new_img
-        else:
-            # 이미지 없이 텍스트만 온 경우
-            print("[WARN] Gemini returned only text:", text_response)
-            return original_image
-    except Exception as e:
-        raise gr.Error(f"Error: {e}")
 #######################################
-# 4. 메인 프로세스 함수
 #######################################
 def run_process(
@@ -235,121 +283,121 @@ def run_process(
     seed
 ):
     """
-    1) final_text1(필수), final_text2, final_text3(옵션) 각각 길이에 맞춰 무작위 알파벳 만들기.
-    2) prompt 내 <text1>, <text2>, <text3> 치환 -> Flux로 1차(랜덤) 이미지.
-    3) Gemini 호출 -> r1->final_text1, r2->final_text2, r3->final_text3 교체 -> 최종 이미지.
     """
-    # (A) 무작위 알파벳
-    r1 = generate_random_letters(len(final_text1)) if final_text1 else ""
-    r2 = generate_random_letters(len(final_text2)) if final_text2 else ""
-    r3 = generate_random_letters(len(final_text3)) if final_text3 else ""
-    # (B) 프롬프트 치환
-    final_prompt = fill_prompt_with_random_texts(prompt, r1, r2, r3)
-    print(f"[DEBUG] final_prompt = {final_prompt}")
-    # (C) 1차 이미지 (랜덤 텍스트)
-    random_image = generate_initial_image(final_prompt, r1, r2, r3, height, width, steps, scale, seed)
-    # (D) 2차 이미지 (실제 텍스트)
-    final_image = change_multi_text_in_image(
-        random_image,
-        r1, final_text1,
-        r2, final_text2,
-        r3, final_text3
-    )
-    return [random_image, final_image]
 #######################################
-# 5. Gradio UI
 #######################################
-with gr.Blocks(title="Flux + Google GenAI (Up to 3 Text placeholders)") as demo:
     gr.Markdown(
         """
-        # Flux + Google GenAI: 최대 3개의 `<text>` 교체
-        ## 사용 방법
-        1. 아래 Prompt에 `<text1>`, `<text2>`, `<text3>`를 최대 3개까지 배치 가능.
-           - 예) "A poster with <text1> in large letters, also <text2> in the corner"
-           - **<text1>은 필수**(없으면 자동으로 문구가 뒤에 붙음)
-           - <text2>, <text3>는 넣어도 되고, 안 넣어도 됨.
-        2. "New Text #1" (필수), "New Text #2", "New Text #3"를 입력.
-           - #2, #3는 비워 두면 해당 자리 교체 없음.
-        3. "Generate Images" 버튼 →
-           (1) `<text1>`, `<text2>`, `<text3>` 자리에 (또는 자동으로) **무작위 알파벳** 넣은 1차 이미지 생성
-           (2) 이어 Gemini 모델을 통해 무작위 알파벳 → 실제 "New Text #1/2/3" 변경한 2차 이미지
-           - **두 이미지**(랜덤 텍스트 → 최종 텍스트)가 순서대로 출력됩니다.
-        ---
         """
     )
-    # 예시 5개
     examples = [
         [
-            "A futuristic billboard shows <text1> and a small sign <text2> on the left side. <text3> is a hidden watermark.",
-            "HELLO", "WELCOME", "2025"
         ],
         [
-            "A fantasy poster with <text1> and <text2> in stylized letters, plus a tiny note <text3> at the bottom.",
-            "Dragons", "MagicRealm", "Beware!"
         ],
         [
-            "A neon sign reading <text1>, with a secondary text <text2> below. <text3> might appear in the corner.",
-            "OPEN", "24HOUR", "NoSmoking"
         ],
         [
-            "A big invitation card with main text <text1>, subtitle <text2>, signature <text3> in cursive.",
-            "Birthday Party", "Today Only", "From Your Friend"
         ],
         [
-            "A large graffiti wall with <text1> in bold letters, plus <text2> and <text3> near the edges.",
-            "FREEDOM", "HOPE", "LOVE"
-        ]
     ]
     with gr.Row():
         with gr.Column():
-            prompt_input = gr.Textbox(
-                lines=3,
-                label="Prompt (use `<text1>`, `<text2>`, `<text3>` as needed)",
-                placeholder="Ex) A poster with <text1>, plus a line <text2>, etc."
-            )
-            final_text1 = gr.Textbox(
-                label="New Text #1 (Required)",
-                placeholder="Ex) HELLO"
-            )
-            final_text2 = gr.Textbox(
-                label="New Text #2 (Optional)",
-                placeholder="Ex) WELCOME"
-            )
-            final_text3 = gr.Textbox(
-                label="New Text #3 (Optional)",
-                placeholder="Ex) 2025 or anything"
-            )
-            with gr.Accordion("Advanced Settings", open=False):
                 height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
                 width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
                 steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
                 scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5)
-                seed = gr.Number(label="Seed (reproducibility)", value=1234, precision=0)
-            run_btn = gr.Button("Generate Images", variant="primary")
             gr.Examples(
                 examples=examples,
                 inputs=[prompt_input, final_text1, final_text2, final_text3],
-                label="Click to load example"
             )
         with gr.Column():
-            random_image_output = gr.Image(label="1) Random Text Image", type="pil")
-            final_image_output = gr.Image(label="2) Final Text Image", type="pil")
-    # 버튼 액션
     run_btn.click(
         fn=run_process,
         inputs=[
@@ -363,7 +411,7 @@ with gr.Blocks(title="Flux + Google GenAI (Up to 3 Text placeholders)") as demo:
             scale,
             seed
         ],
-        outputs=[random_image_output, final_image_output]
     )
 demo.launch(max_threads=20)

 import os
+import re
 import time
 from os import path
 import tempfile
 import torch
 from PIL import Image
+from transformers import pipeline
 from safetensors.torch import load_file
 from huggingface_hub import hf_hub_download
+# Diffusers
 import gradio as gr
 from diffusers import FluxPipeline
+# (Internal) text-modification library
 from google import genai
 from google.genai import types
 #######################################
+# 0. Environment & Translation Pipeline
 #######################################
 BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
 os.environ["HF_HUB_CACHE"] = CACHE_PATH
 os.environ["HF_HOME"] = CACHE_PATH
+# Translation (Korean -> English), CPU only
+translator = pipeline(
+    task="translation",
+    model="Helsinki-NLP/opus-mt-ko-en",
+    device=-1  # force CPU
+)
+def maybe_translate_to_english(text: str) -> str:
+    """
+    If the prompt contains any Korean characters, translate to English.
+    Otherwise, return as-is.
+    """
+    if re.search("[가-힣]", text):
+        translated = translator(text)[0]["translation_text"]
+        print(f"[TRANSLATE] Detected Korean -> '{text}' -> '{translated}'")
+        return translated
+    return text
+# Simple Timer Class
 class timer:
     def __init__(self, method_name="timed process"):
         self.method = method_name
         print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s")
 #######################################
+# 1. Load FLUX Pipeline
 #######################################
 if not path.exists(CACHE_PATH):
 pipe.to(device="cuda", dtype=torch.bfloat16)
 #######################################
+# 2. Internal Text Modification Functions
 #######################################
 def save_binary_file(file_name, data):
         f.write(data)
 def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
+    """
+    Internally modifies text within an image, returning a new image path.
+    (Screen instructions do not mention 'Google'.)
+    """
     api_key = os.getenv("GAPI_TOKEN", None)
     if not api_key:
         raise ValueError(
+            "GAPI_TOKEN is missing. Please set an API key."
         )
     client = genai.Client(api_key=api_key)
             candidate = chunk.candidates[0].content.parts[0]
             if candidate.inline_data:
                 save_binary_file(temp_path, candidate.inline_data.data)
+                print(f"[DEBUG] Returned new image -> {temp_path}")
                 image_path = temp_path
                 break
             else:
     del files
     return image_path, text_response
 #######################################
+# 3. Diffusion Utility
 #######################################
 def generate_random_letters(length: int) -> str:
+    """
+    Create a random sequence of uppercase/lowercase letters of given length.
+    """
     letters = string.ascii_lowercase + string.ascii_uppercase
     return "".join(random.choice(letters) for _ in range(length))
+def is_all_english(text: str) -> bool:
+    """
+    Check if text consists only of English letters (a-z, A-Z), digits, spaces,
+    and a few basic punctuation characters. If so, return True.
+    Otherwise, False (includes Korean or other characters).
+    """
+    return bool(re.match(r'^[a-zA-Z0-9\s\.,!\?\']*$', text))
+def maybe_use_random_or_original(final_text: str) -> str:
+    """
+    If final_text is strictly English/allowed chars, use it as-is.
+    If it contains other chars (like Korean, etc.),
+    replace with random letters of the same length.
+    """
+    if not final_text:
+        return ""
+    if is_all_english(final_text):
+        return final_text
+    else:
+        return generate_random_letters(len(final_text))
 def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str:
     """
+    Replace <text1>, <text2>, <text3> with r1, r2, r3 respectively.
+    <text1> is required; if missing, we append something.
     """
     if "<text1>" in prompt:
         prompt = prompt.replace("<text1>", r1)
     else:
         prompt = f"{prompt} with clear readable text that says '{r1}'"
     if "<text2>" in prompt:
         prompt = prompt.replace("<text2>", r2)
     if "<text3>" in prompt:
     return prompt
+def generate_initial_image(prompt, height, width, steps, scale, seed):
     """
+    Use Flux Pipeline to generate the initial image from the prompt.
     """
     with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux Generation"):
         result = pipe(
     return result
+#######################################
+# 4. Creating 2 Final Images
+#######################################
+def build_multi_change_instruction(r1, f1, r2, f2, r3, f3):
     """
+    Summarize instructions to replace (r1->f1), (r2->f2), (r3->f3).
     """
     instructions = []
+    if r1 and f1:
+        instructions.append(f"Change any text reading '{r1}' in this image to '{f1}'.")
+    if r2 and f2:
+        instructions.append(f"Change any text reading '{r2}' in this image to '{f2}'.")
+    if r3 and f3:
+        instructions.append(f"Change any text reading '{r3}' in this image to '{f3}'.")
+    if instructions:
+        return " ".join(instructions)
+    return "No text changes needed."
+def change_text_in_image_two_times(original_image, instruction):
+    """
+    Call the text modification function twice,
+    returning 2 final variations.
+    """
+    results = []
+    for version_tag in ["(A)", "(B)"]:
+        mod_instruction = f"{instruction} {version_tag}"
+        try:
+            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+                original_path = tmp.name
+                original_image.save(original_path)
+            image_path, text_response = generate_by_google_genai(
+                text=mod_instruction,
+                file_name=original_path
+            )
+            if image_path:
+                with open(image_path, "rb") as f:
+                    image_data = f.read()
+                new_img = Image.open(io.BytesIO(image_data))
+                results.append(new_img)
+            else:
+                results.append(original_image)
+        except Exception as e:
+            raise gr.Error(f"Error: {e}")
+    return results
 #######################################
+# 5. Main Process
 #######################################
 def run_process(
     seed
 ):
     """
+    1) If prompt has Korean, translate to English
+    2) For each <textX>, if it's purely English, use as-is,
+       else generate random letters of the same length.
+    3) Generate initial image with these placeholders
+    4) Then produce 2 final images by replacing placeholders with real texts
     """
+    prompt_en = maybe_translate_to_english(prompt)
+    # Decide random vs original for each text
+    r1 = maybe_use_random_or_original(final_text1)
+    r2 = maybe_use_random_or_original(final_text2)
+    r3 = maybe_use_random_or_original(final_text3)
+    print(f"[DEBUG] Using placeholders: r1='{r1}', r2='{r2}', r3='{r3}'")
+    # Fill prompt
+    final_prompt = fill_prompt_with_random_texts(prompt_en, r1, r2, r3)
+    print(f"[DEBUG] final_prompt = {final_prompt}")
+    # Generate initial "random/original" image
+    _random_image = generate_initial_image(final_prompt, height, width, steps, scale, seed)
+    # Build final instructions & call twice -> 2 final images
+    instruction = build_multi_change_instruction(r1, final_text1, r2, final_text2, r3, final_text3)
+    final_imgs = change_text_in_image_two_times(_random_image, instruction)
+    # Return only the 2 final images (don't show the random image)
+    return [final_imgs[0], final_imgs[1]]
 #######################################
+# 6. Gradio UI
 #######################################
+with gr.Blocks(title="Eevery Text Imaginator: FLUX") as demo:
     gr.Markdown(
         """
+        <h2 style="text-align:center; margin-bottom: 15px;">
+            <strong>Eevery Text Imaginator: FLUX</strong>
+        </h2>
+        <p style="text-align:center;">
+            This tool generates two final images from a prompt
+            containing placeholders <code>&lt;text1&gt;</code>, <code>&lt;text2&gt;</code>, <code>&lt;text3&gt;</code>.
+            If your chosen text is purely English, it will appear directly;
+            otherwise it becomes random letters in the initial phase.
+        </p>
+        <hr style="margin: 15px 0;">
         """
     )
+    # 5 example prompts (focusing on <text1>, <text2>)
     examples = [
         [
+            "On a grand stage, <text1> in big letters and <text2> on the left side",
+            "HELLO", "WORLD", ""
         ],
         [
+            "Futuristic neon sign with <text1>, plus <text2> near the bottom",
+            "WELCOME", "SALE", ""
         ],
         [
+            "A classical poster reading <text1> in bold, <text2> as a subtitle",
+            "MUSICFEST", "2025", ""
         ],
         [
+            "In a cartoon style, a speech bubble with <text1> and another text <text2>",
+            "HI!", "OhYes", ""
         ],
         [
+            "Large billboard featuring <text1>, smaller text <text2> in the corner",
+            "ANNOUNCEMENT", "OPENNOW", ""
+        ],
     ]
     with gr.Row():
         with gr.Column():
+            with gr.Box():
+                prompt_input = gr.Textbox(
+                    lines=3,
+                    label="Prompt (Korean or English)",
+                    placeholder="On a grand stage, <text1> in big letters..."
+                )
+                final_text1 = gr.Textbox(
+                    label="New Text #1 (Required)",
+                    placeholder="Example: HELLO or 안녕하세요"
+                )
+                final_text2 = gr.Textbox(
+                    label="New Text #2 (Optional)",
+                    placeholder="Example: WORLD or 반갑습니다"
+                )
+                final_text3 = gr.Textbox(
+                    label="New Text #3 (Optional)",
+                    placeholder="(Leave blank if not used)"
+                )
+            with gr.Accordion("Advanced Settings (optional)", open=False):
                 height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
                 width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
                 steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
                 scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5)
+                seed = gr.Number(label="Seed", value=1234, precision=0)
+            run_btn = gr.Button("Generate 2 Final Images", variant="primary")
             gr.Examples(
                 examples=examples,
                 inputs=[prompt_input, final_text1, final_text2, final_text3],
+                label="Example Prompts"
             )
         with gr.Column():
+            final_image_output1 = gr.Image(label="Final Image #1", type="pil")
+            final_image_output2 = gr.Image(label="Final Image #2", type="pil")
+    # We only display the 2 final images, not the initial random image
     run_btn.click(
         fn=run_process,
         inputs=[
             scale,
             seed
         ],
+        outputs=[final_image_output1, final_image_output2]
     )
 demo.launch(max_threads=20)