Spaces:

ginigen
/

Every-Text

Runtime error

App Files Files Community

Every-Text / app.py

ginipick

Update app.py

1e367e3 verified 9 months ago

raw

history blame

13.7 kB

	import os
	import re
	import time
	from os import path
	import tempfile
	import uuid
	import base64
	import mimetypes
	import json
	import io
	import random
	import string

	import torch
	from PIL import Image

	from transformers import pipeline
	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download

	# Diffusers
	import gradio as gr
	from diffusers import FluxPipeline

	# (Internal) text-modification library
	from google import genai
	from google.genai import types

	#######################################
	# 0. Environment & Translation Pipeline
	#######################################

	BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
	CACHE_PATH = path.join(BASE_DIR, "models")

	os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
	os.environ["HF_HUB_CACHE"] = CACHE_PATH
	os.environ["HF_HOME"] = CACHE_PATH

	# Translation (Korean -> English), CPU only
	translator = pipeline(
	task="translation",
	model="Helsinki-NLP/opus-mt-ko-en",
	device=-1 # force CPU
	)

	def maybe_translate_to_english(text: str) -> str:
	"""
	If the prompt contains any Korean characters, translate to English.
	Otherwise, return as-is.
	"""
	if re.search("[가-힣]", text):
	translated = translator(text)[0]["translation_text"]
	print(f"[TRANSLATE] Detected Korean -> '{text}' -> '{translated}'")
	return translated
	return text

	# Simple Timer Class
	class timer:
	def __init__(self, method_name="timed process"):
	self.method = method_name
	def __enter__(self):
	self.start = time.time()
	print(f"[TIMER] {self.method} starts")
	def __exit__(self, exc_type, exc_val, exc_tb):
	end = time.time()
	print(f"[TIMER] {self.method} took {round(end - self.start, 2)}s")

	#######################################
	# 1. Load FLUX Pipeline
	#######################################

	if not path.exists(CACHE_PATH):
	os.makedirs(CACHE_PATH, exist_ok=True)

	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.bfloat16
	)

	lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale=0.125)
	pipe.to(device="cuda", dtype=torch.bfloat16)

	#######################################
	# 2. Internal Text Modification Functions
	#######################################

	def save_binary_file(file_name, data):
	with open(file_name, "wb") as f:
	f.write(data)

	def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
	"""
	Internally modifies text within an image, returning a new image path.
	(Screen instructions do not mention 'Google'.)
	"""
	api_key = os.getenv("GAPI_TOKEN", None)
	if not api_key:
	raise ValueError(
	"GAPI_TOKEN is missing. Please set an API key."
	)

	client = genai.Client(api_key=api_key)
	files = [client.files.upload(file=file_name)]

	contents = [
	types.Content(
	role="user",
	parts=[
	types.Part.from_uri(
	file_uri=files[0].uri,
	mime_type=files[0].mime_type,
	),
	types.Part.from_text(text=text),
	],
	),
	]

	generate_content_config = types.GenerateContentConfig(
	temperature=1,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192,
	response_modalities=["image", "text"],
	response_mime_type="text/plain",
	)

	text_response = ""
	image_path = None

	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	temp_path = tmp.name
	for chunk in client.models.generate_content_stream(
	model=model,
	contents=contents,
	config=generate_content_config,
	):
	if not chunk.candidates or not chunk.candidates[0].content:
	continue

	candidate = chunk.candidates[0].content.parts[0]
	if candidate.inline_data:
	save_binary_file(temp_path, candidate.inline_data.data)
	print(f"[DEBUG] Returned new image -> {temp_path}")
	image_path = temp_path
	break
	else:
	text_response += chunk.text + "\n"

	del files
	return image_path, text_response


	#######################################
	# 3. Diffusion Utility
	#######################################

	def generate_random_letters(length: int) -> str:
	"""
	Create a random sequence of uppercase/lowercase letters of given length.
	"""
	letters = string.ascii_lowercase + string.ascii_uppercase
	return "".join(random.choice(letters) for _ in range(length))

	def is_all_english(text: str) -> bool:
	"""
	Check if text consists only of English letters (a-z, A-Z), digits, spaces,
	and a few basic punctuation characters. If so, return True.
	Otherwise, False (includes Korean or other characters).
	"""
	return bool(re.match(r'^[a-zA-Z0-9\s\.,!\?\']*$', text))

	def maybe_use_random_or_original(final_text: str) -> str:
	"""
	If final_text is strictly English/allowed chars, use it as-is.
	If it contains other chars (like Korean, etc.),
	replace with random letters of the same length.
	"""
	if not final_text:
	return ""
	if is_all_english(final_text):
	return final_text
	else:
	return generate_random_letters(len(final_text))

	def fill_prompt_with_random_texts(prompt: str, r1: str, r2: str, r3: str) -> str:
	"""
	Replace <text1>, <text2>, <text3> with r1, r2, r3 respectively.
	<text1> is required; if missing, we append something.
	"""
	if "<text1>" in prompt:
	prompt = prompt.replace("<text1>", r1)
	else:
	prompt = f"{prompt} with clear readable text that says '{r1}'"

	if "<text2>" in prompt:
	prompt = prompt.replace("<text2>", r2)
	if "<text3>" in prompt:
	prompt = prompt.replace("<text3>", r3)

	return prompt

	def generate_initial_image(prompt, height, width, steps, scale, seed):
	"""
	Use Flux Pipeline to generate the initial image from the prompt.
	"""
	with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("Flux Generation"):
	result = pipe(
	prompt=[prompt],
	generator=torch.Generator().manual_seed(int(seed)),
	num_inference_steps=int(steps),
	guidance_scale=float(scale),
	height=int(height),
	width=int(width),
	max_sequence_length=256
	).images[0]
	return result


	#######################################
	# 4. Creating 2 Final Images
	#######################################

	def build_multi_change_instruction(r1, f1, r2, f2, r3, f3):
	"""
	Summarize instructions to replace (r1->f1), (r2->f2), (r3->f3).
	"""
	instructions = []
	if r1 and f1:
	instructions.append(f"Change any text reading '{r1}' in this image to '{f1}'.")
	if r2 and f2:
	instructions.append(f"Change any text reading '{r2}' in this image to '{f2}'.")
	if r3 and f3:
	instructions.append(f"Change any text reading '{r3}' in this image to '{f3}'.")
	if instructions:
	return " ".join(instructions)
	return "No text changes needed."

	def change_text_in_image_two_times(original_image, instruction):
	"""
	Call the text modification function twice,
	returning 2 final variations.
	"""
	results = []
	for version_tag in ["(A)", "(B)"]:
	mod_instruction = f"{instruction} {version_tag}"
	try:
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
	original_path = tmp.name
	original_image.save(original_path)

	image_path, text_response = generate_by_google_genai(
	text=mod_instruction,
	file_name=original_path
	)
	if image_path:
	with open(image_path, "rb") as f:
	image_data = f.read()
	new_img = Image.open(io.BytesIO(image_data))
	results.append(new_img)
	else:
	results.append(original_image)
	except Exception as e:
	raise gr.Error(f"Error: {e}")
	return results


	#######################################
	# 5. Main Process
	#######################################

	def run_process(
	prompt,
	final_text1,
	final_text2,
	final_text3,
	height,
	width,
	steps,
	scale,
	seed
	):
	"""
	1) If prompt has Korean, translate to English
	2) For each <textX>, if it's purely English, use as-is,
	else generate random letters of the same length.
	3) Generate initial image with these placeholders
	4) Then produce 2 final images by replacing placeholders with real texts
	"""
	prompt_en = maybe_translate_to_english(prompt)

	# Decide random vs original for each text
	r1 = maybe_use_random_or_original(final_text1)
	r2 = maybe_use_random_or_original(final_text2)
	r3 = maybe_use_random_or_original(final_text3)

	print(f"[DEBUG] Using placeholders: r1='{r1}', r2='{r2}', r3='{r3}'")

	# Fill prompt
	final_prompt = fill_prompt_with_random_texts(prompt_en, r1, r2, r3)
	print(f"[DEBUG] final_prompt = {final_prompt}")

	# Generate initial "random/original" image
	_random_image = generate_initial_image(final_prompt, height, width, steps, scale, seed)

	# Build final instructions & call twice -> 2 final images
	instruction = build_multi_change_instruction(r1, final_text1, r2, final_text2, r3, final_text3)
	final_imgs = change_text_in_image_two_times(_random_image, instruction)
	# Return only the 2 final images (don't show the random image)
	return [final_imgs[0], final_imgs[1]]

	#######################################
	# 6. Gradio UI
	#######################################

	with gr.Blocks(title="Eevery Text Imaginator: FLUX") as demo:
	gr.Markdown(
	"""
	<h2 style="text-align:center; margin-bottom: 15px;">
	<strong>Eevery Text Imaginator: FLUX</strong>
	</h2>

	<p style="text-align:center;">
	This tool generates two final images from a prompt
	containing placeholders <code><text1></code>, <code><text2></code>, <code><text3></code>.
	If your chosen text is purely English, it will appear directly;
	otherwise it becomes random letters in the initial phase.
	</p>

	<hr style="margin: 15px 0;">
	"""
	)

	# 5 example prompts (focusing on <text1>, <text2>)
	examples = [
	[
	"On a grand stage, <text1> in big letters and <text2> on the left side",
	"HELLO", "WORLD", ""
	],
	[
	"Futuristic neon sign with <text1>, plus <text2> near the bottom",
	"WELCOME", "SALE", ""
	],
	[
	"A classical poster reading <text1> in bold, <text2> as a subtitle",
	"MUSICFEST", "2025", ""
	],
	[
	"In a cartoon style, a speech bubble with <text1> and another text <text2>",
	"HI!", "OhYes", ""
	],
	[
	"Large billboard featuring <text1>, smaller text <text2> in the corner",
	"ANNOUNCEMENT", "OPENNOW", ""
	],
	]

	with gr.Row():
	with gr.Column():
	with gr.Box():
	prompt_input = gr.Textbox(
	lines=3,
	label="Prompt (Korean or English)",
	placeholder="On a grand stage, <text1> in big letters..."
	)
	final_text1 = gr.Textbox(
	label="New Text #1 (Required)",
	placeholder="Example: HELLO or 안녕하세요"
	)
	final_text2 = gr.Textbox(
	label="New Text #2 (Optional)",
	placeholder="Example: WORLD or 반갑습니다"
	)
	final_text3 = gr.Textbox(
	label="New Text #3 (Optional)",
	placeholder="(Leave blank if not used)"
	)

	with gr.Accordion("Advanced Settings (optional)", open=False):
	height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
	width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
	steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
	scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.5, value=3.5)
	seed = gr.Number(label="Seed", value=1234, precision=0)

	run_btn = gr.Button("Generate 2 Final Images", variant="primary")

	gr.Examples(
	examples=examples,
	inputs=[prompt_input, final_text1, final_text2, final_text3],
	label="Example Prompts"
	)

	with gr.Column():
	final_image_output1 = gr.Image(label="Final Image #1", type="pil")
	final_image_output2 = gr.Image(label="Final Image #2", type="pil")

	# We only display the 2 final images, not the initial random image
	run_btn.click(
	fn=run_process,
	inputs=[
	prompt_input,
	final_text1,
	final_text2,
	final_text3,
	height,
	width,
	steps,
	scale,
	seed
	],
	outputs=[final_image_output1, final_image_output2]
	)

	demo.launch(max_threads=20)