akhaliq HF Staff commited on
Commit
79d7f22
Β·
1 Parent(s): e5c52d7

use HunyuanImage-2.1 for text to image

Browse files
Files changed (1) hide show
  1. app.py +47 -85
app.py CHANGED
@@ -2833,7 +2833,7 @@ def create_temp_media_url(media_bytes: bytes, filename: str, media_type: str = "
2833
  """
2834
  try:
2835
  # Create unique filename with timestamp and UUID
2836
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
2837
  unique_id = str(uuid.uuid4())[:8]
2838
  base_name, ext = os.path.splitext(filename)
2839
  unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
@@ -2925,7 +2925,7 @@ def upload_media_to_hf(media_bytes: bytes, filename: str, media_type: str = "ima
2925
  # Continue anyway, repo might already exist
2926
 
2927
  # Create unique filename with timestamp and UUID
2928
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
2929
  unique_id = str(uuid.uuid4())[:8]
2930
  base_name, ext = os.path.splitext(filename)
2931
  unique_filename = f"{media_type}/{timestamp}_{unique_id}_{base_name}{ext}"
@@ -3029,93 +3029,54 @@ def cleanup_temp_media_files():
3029
  except Exception as e:
3030
  print(f"[TempCleanup] Error during cleanup: {str(e)}")
3031
 
3032
- def generate_image_with_gemini(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
3033
- """Generate image using Google Gemini 2.5 Flash Image Preview via OpenRouter.
3034
 
3035
- Uses google/gemini-2.5-flash-image-preview:free via OpenRouter chat completions API.
3036
 
3037
  Returns an HTML <img> tag whose src is an uploaded temporary URL.
3038
  """
3039
  try:
3040
- print(f"[Text2Image] Starting generation with prompt: {prompt[:100]}...")
3041
- # Check for OpenRouter API key
3042
- openrouter_key = os.getenv('OPENROUTER_API_KEY')
3043
- if not openrouter_key:
3044
- print("[Text2Image] Missing OPENROUTER_API_KEY")
3045
- return "Error: OPENROUTER_API_KEY environment variable is not set. Please set it to your OpenRouter API key."
 
3046
 
3047
- import requests
3048
- import json as _json
3049
- import base64
3050
- import io as _io
3051
  from PIL import Image
 
3052
 
3053
- # Create the chat completion request for text-to-image
3054
- headers = {
3055
- "Authorization": f"Bearer {openrouter_key}",
3056
- "Content-Type": "application/json"
3057
- }
 
3058
 
3059
- data = {
3060
- "model": "google/gemini-2.5-flash-image-preview:free",
3061
- "messages": [
3062
- {
3063
- "role": "user",
3064
- "content": f"Generate an image based on this description: {prompt}"
3065
- }
3066
- ],
3067
- "temperature": 0.7,
3068
- "max_tokens": 1000
3069
- }
 
 
 
 
 
 
 
 
 
3070
 
3071
- try:
3072
- print("[Text2Image] Making API request to OpenRouter...")
3073
- response = requests.post(
3074
- "https://openrouter.ai/api/v1/chat/completions",
3075
- headers=headers,
3076
- json=data,
3077
- timeout=60
3078
- )
3079
- response.raise_for_status()
3080
- result_data = response.json()
3081
- print(f"[Text2Image] Received API response: {_json.dumps(result_data, indent=2)}")
3082
-
3083
- # Extract the generated image from the response (using same pattern as image-to-image)
3084
- message = result_data.get('choices', [{}])[0].get('message', {})
3085
-
3086
- if message and 'images' in message and message['images']:
3087
- # Get the first image from the 'images' list
3088
- image_data = message['images'][0]
3089
- base64_string = image_data.get('image_url', {}).get('url', '')
3090
-
3091
- if base64_string and ',' in base64_string:
3092
- # Remove the "data:image/png;base64," prefix
3093
- base64_content = base64_string.split(',')[1]
3094
-
3095
- # Decode the base64 string and create a PIL image
3096
- img_bytes = base64.b64decode(base64_content)
3097
- generated_image = Image.open(_io.BytesIO(img_bytes))
3098
-
3099
- # Convert PIL image to JPEG bytes for upload
3100
- out_buf = _io.BytesIO()
3101
- generated_image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
3102
- image_bytes = out_buf.getvalue()
3103
- else:
3104
- raise RuntimeError(f"API returned an invalid image format. Response: {_json.dumps(result_data, indent=2)}")
3105
- else:
3106
- raise RuntimeError(f"API did not return an image. Full Response: {_json.dumps(result_data, indent=2)}")
3107
-
3108
- except requests.exceptions.HTTPError as err:
3109
- error_body = err.response.text
3110
- if err.response.status_code == 401:
3111
- return "Error: Authentication failed. Check your OpenRouter API key."
3112
- elif err.response.status_code == 429:
3113
- return "Error: Rate limit exceeded or insufficient credits. Check your OpenRouter account."
3114
- else:
3115
- return f"Error: An API error occurred: {error_body}"
3116
- except Exception as e:
3117
- return f"Error: An unexpected error occurred: {str(e)}"
3118
-
3119
  # Upload and return HTML tag
3120
  print("[Text2Image] Uploading image to HF...")
3121
  filename = f"generated_image_{image_index}.jpg"
@@ -3125,8 +3086,9 @@ def generate_image_with_gemini(prompt: str, image_index: int = 0, token: gr.OAut
3125
  return temp_url
3126
  print(f"[Text2Image] Successfully generated image: {temp_url}")
3127
  return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
 
3128
  except Exception as e:
3129
- print(f"Text-to-image generation error: {str(e)}")
3130
  return f"Error generating image (text-to-image): {str(e)}"
3131
 
3132
  def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
@@ -3651,7 +3613,7 @@ def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
3651
  # Generate images for each prompt
3652
  generated_images = []
3653
  for i, prompt in enumerate(image_prompts):
3654
- image_html = generate_image_with_gemini(prompt, i, token=None) # TODO: Pass token from parent context
3655
  if not image_html.startswith("Error"):
3656
  generated_images.append((i, image_html))
3657
 
@@ -3741,7 +3703,7 @@ def create_image_replacement_blocks_text_to_image_single(html_content: str, prom
3741
  placeholder_images = re.findall(img_pattern, html_content)
3742
 
3743
  # Generate a single image
3744
- image_html = generate_image_with_gemini(prompt, 0, token=None) # TODO: Pass token from parent context
3745
  if image_html.startswith("Error"):
3746
  return ""
3747
 
@@ -4272,8 +4234,8 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
4272
  print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
4273
  # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
4274
  try:
4275
- print(f"[MediaApply] Calling generate_image_with_gemini with prompt: {t2i_prompt[:50]}...")
4276
- image_html_tag = generate_image_with_gemini(t2i_prompt, 0, token=token)
4277
  print(f"[MediaApply] Image generation result: {image_html_tag[:200]}...")
4278
  if not (image_html_tag or "").startswith("Error"):
4279
  print("[MediaApply] Attempting LLM placement of image...")
@@ -7233,7 +7195,7 @@ with gr.Blocks(
7233
  label="🎨 Generate Images (text β†’ image)",
7234
  value=False,
7235
  visible=True,
7236
- info="Include generated images in your outputs using Nano Banana"
7237
  )
7238
  text_to_image_prompt = gr.Textbox(
7239
  label="Text-to-Image Prompt",
 
2833
  """
2834
  try:
2835
  # Create unique filename with timestamp and UUID
2836
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
2837
  unique_id = str(uuid.uuid4())[:8]
2838
  base_name, ext = os.path.splitext(filename)
2839
  unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
 
2925
  # Continue anyway, repo might already exist
2926
 
2927
  # Create unique filename with timestamp and UUID
2928
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
2929
  unique_id = str(uuid.uuid4())[:8]
2930
  base_name, ext = os.path.splitext(filename)
2931
  unique_filename = f"{media_type}/{timestamp}_{unique_id}_{base_name}{ext}"
 
3029
  except Exception as e:
3030
  print(f"[TempCleanup] Error during cleanup: {str(e)}")
3031
 
3032
+ def generate_image_with_hunyuan(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
3033
+ """Generate image using Tencent HunyuanImage-2.1 via Hugging Face InferenceClient.
3034
 
3035
+ Uses tencent/HunyuanImage-2.1 via HuggingFace InferenceClient with fal-ai provider.
3036
 
3037
  Returns an HTML <img> tag whose src is an uploaded temporary URL.
3038
  """
3039
  try:
3040
+ print(f"[Text2Image] Starting HunyuanImage generation with prompt: {prompt[:100]}...")
3041
+
3042
+ # Check for HF_TOKEN
3043
+ hf_token = os.getenv('HF_TOKEN')
3044
+ if not hf_token:
3045
+ print("[Text2Image] Missing HF_TOKEN")
3046
+ return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
3047
 
3048
+ from huggingface_hub import InferenceClient
 
 
 
3049
  from PIL import Image
3050
+ import io as _io
3051
 
3052
+ # Create InferenceClient with fal-ai provider
3053
+ client = InferenceClient(
3054
+ provider="fal-ai",
3055
+ api_key=hf_token,
3056
+ bill_to="huggingface",
3057
+ )
3058
 
3059
+ print("[Text2Image] Making API request to HuggingFace InferenceClient...")
3060
+
3061
+ # Generate image using HunyuanImage-2.1 model
3062
+ image = client.text_to_image(
3063
+ prompt,
3064
+ model="tencent/HunyuanImage-2.1",
3065
+ )
3066
+
3067
+ print(f"[Text2Image] Successfully generated image with size: {image.size}")
3068
+
3069
+ # Resize image to reduce size while maintaining quality
3070
+ max_size = 1024
3071
+ if image.width > max_size or image.height > max_size:
3072
+ image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
3073
+
3074
+ # Convert PIL Image to bytes for upload
3075
+ buffer = _io.BytesIO()
3076
+ # Save as JPEG with good quality
3077
+ image.convert('RGB').save(buffer, format='JPEG', quality=90, optimize=True)
3078
+ image_bytes = buffer.getvalue()
3079
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3080
  # Upload and return HTML tag
3081
  print("[Text2Image] Uploading image to HF...")
3082
  filename = f"generated_image_{image_index}.jpg"
 
3086
  return temp_url
3087
  print(f"[Text2Image] Successfully generated image: {temp_url}")
3088
  return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
3089
+
3090
  except Exception as e:
3091
+ print(f"[Text2Image] Error generating image with HunyuanImage: {str(e)}")
3092
  return f"Error generating image (text-to-image): {str(e)}"
3093
 
3094
  def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
 
3613
  # Generate images for each prompt
3614
  generated_images = []
3615
  for i, prompt in enumerate(image_prompts):
3616
+ image_html = generate_image_with_hunyuan(prompt, i, token=None) # TODO: Pass token from parent context
3617
  if not image_html.startswith("Error"):
3618
  generated_images.append((i, image_html))
3619
 
 
3703
  placeholder_images = re.findall(img_pattern, html_content)
3704
 
3705
  # Generate a single image
3706
+ image_html = generate_image_with_hunyuan(prompt, 0, token=None) # TODO: Pass token from parent context
3707
  if image_html.startswith("Error"):
3708
  return ""
3709
 
 
4234
  print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
4235
  # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
4236
  try:
4237
+ print(f"[MediaApply] Calling generate_image_with_hunyuan with prompt: {t2i_prompt[:50]}...")
4238
+ image_html_tag = generate_image_with_hunyuan(t2i_prompt, 0, token=token)
4239
  print(f"[MediaApply] Image generation result: {image_html_tag[:200]}...")
4240
  if not (image_html_tag or "").startswith("Error"):
4241
  print("[MediaApply] Attempting LLM placement of image...")
 
7195
  label="🎨 Generate Images (text β†’ image)",
7196
  value=False,
7197
  visible=True,
7198
+ info="Include generated images in your outputs using HunyuanImage-2.1"
7199
  )
7200
  text_to_image_prompt = gr.Textbox(
7201
  label="Text-to-Image Prompt",