Spaces:

PLXR
/

youtube_auto_image1

Sleeping

App Files Files Community

youtube_auto_image1 / logic_image.py

PLXR

Update logic_image.py

a0c3aea verified 2 months ago

raw

history blame contribute delete

16.8 kB

	# logic_image.py
	import json
	import logging
	import re

	logger = logging.getLogger(__name__)


	class ImageExtractionError(RuntimeError):
	"""Raised when image bytes cannot be extracted from a model response."""


	def _summarize_response_structure(response):
	if response is None:
	return "response=None"
	summary = {
	"type": type(response).__name__,
	"has_candidates": hasattr(response, "candidates"),
	"has_content": hasattr(response, "content"),
	"has_image": hasattr(response, "image"),
	"has_images": hasattr(response, "images"),
	"has_data": hasattr(response, "data"),
	}
	return ", ".join(f"{k}={v}" for k, v in summary.items())

	def _get_style_prompt(selected_style, custom_style_input, style_definitions):
	if selected_style == "직접 입력":
	return (custom_style_input or "").strip()
	v = style_definitions.get(selected_style, "")
	if isinstance(v, dict):
	return (v.get("prompt") or "").strip()
	return str(v).strip()

	def _safe_extract_image_bytes(response):
	cands = getattr(response, "candidates", None) or []
	if cands:
	for cand in cands:
	content = getattr(cand, "content", None)
	parts = getattr(content, "parts", None) or []
	for p in parts:
	inline = getattr(p, "inline_data", None)
	data = getattr(inline, "data", None) if inline else None
	if data:
	return data
	text_parts = [getattr(p, "text", None) for p in parts if getattr(p, "text", None)]
	if text_parts:
	snippet = re.sub(r"\s+", " ", " ".join(text_parts)).strip()[:200]
	raise ImageExtractionError(f"텍스트만 반환되어 이미지가 없습니다. 텍스트 일부: {snippet}")
	raise ImageExtractionError("이미지 파트가 없어 bytes를 추출할 수 없습니다.")

	for key in ["image", "images", "data"]:
	v = getattr(response, key, None)
	if isinstance(v, (bytes, bytearray)):
	return bytes(v)

	summary = _summarize_response_structure(response)
	raise ImageExtractionError(f"이미지 bytes 추출 실패: {summary}")


	def _generate_thumbnail_texts(strategy_key, script_text, title_hint, client, text_model_id):
	default_payload = {"top": "", "layout": "single", "left": "", "right": ""}
	if strategy_key.startswith("A."):
	instruction = """
	대본을 참고해서 썸네일용 짧은 문구 2개를 작성해.
	- 좌측 상단: 부정적/과거/약자 분위기의 짧은 문구 (6자 이내)
	- 우측 상단: 긍정적/미래/강자 분위기의 짧은 문구 (6자 이내)
	출력 형식(JSON ONLY):
	{"left": "...", "right": "..."}
	""".strip()
	elif strategy_key.startswith("B."):
	instruction = """
	대본을 참고해서 썸네일 상단에 들어갈 서브 문구를 작성해.
	- "속보/긴급/단독" 같은 톤
	- 6자 이내
	출력 형식(JSON ONLY):
	{"top": "..."}
	""".strip()
	else:
	instruction = """
	대본을 참고해서 썸네일 상단에 들어갈 서브 문구를 작성해.
	- 6자 이내 (없으면 빈 문자열)
	그리고 구성 유형을 선택해.
	- 비교/대조면 split
	- 스토리 흐름이면 single
	출력 형식(JSON ONLY):
	{"top": "...", "layout": "split\|single"}
	""".strip()

	prompt = f"""
	너는 유튜브 썸네일 카피라이터야.

	[대본]
	{script_text[:8000]}

	[참고 제목]
	{title_hint}

	[요구사항]
	{instruction}
	""".strip()

	try:
	response = client.models.generate_content(model=text_model_id, contents=prompt)
	text = (getattr(response, "text", "") or "").strip()
	except Exception as exc:
	logger.exception("썸네일 서브 문구 생성 실패")
	raise RuntimeError("썸네일 서브 문구 생성에 실패했습니다.") from exc

	payload = None
	try:
	payload = json.loads(text)
	except json.JSONDecodeError:
	payload = None

	if not isinstance(payload, dict):
	payload = {}

	merged = {default_payload, payload}
	layout = str(merged.get("layout", "single")).lower().strip()
	if layout not in {"split", "single"}:
	layout = "single"
	merged["layout"] = layout

	if strategy_key.startswith("A."):
	left_text = str(merged.get("left") or "").strip()
	right_text = str(merged.get("right") or "").strip()
	if not left_text:
	left_text = "위기"
	if not right_text:
	right_text = "기회"
	return {"left_text": left_text, "right_text": right_text}

	if strategy_key.startswith("B."):
	top_text = str(merged.get("top") or "").strip()
	if not top_text:
	top_text = "속보"
	return {"top_text": top_text}

	top_text = str(merged.get("top") or "").strip()
	return {"top_text": top_text, "layout": merged["layout"]}


	def _generate_video_prompt(scene_text, image_prompt, client, text_model_id):
	prompt = f"""
	너는 Google Flow(Veo3)용 비디오 프롬프트를 작성하는 전문가다.
	아래 장면 설명과 이미지 프롬프트를 참고해서, 1개 장면으로 짧은 비디오를 만들기 위한 프롬프트를 작성해라.

	[장면 텍스트]
	{scene_text}

	[이미지 프롬프트]
	{image_prompt}

	[규칙]
	- 출력은 한국어만.
	- 영상은 하나의 장면으로 구성.
	- 카메라 동작(패닝/줌/트래킹)과 피사체 움직임을 포함.
	- 텍스트 오버레이/자막/로고 금지.
	- 최종 출력은 프롬프트 텍스트 1개만. JSON/마크다운/번호 금지.
	""".strip()

	try:
	response = client.models.generate_content(model=text_model_id, contents=prompt)
	return (getattr(response, "text", "") or "").strip()
	except Exception as exc:
	logger.exception("비디오 프롬프트 생성 실패")
	return (scene_text or "").strip()

	def process_scene_task(
	index,
	scene,
	selected_style,
	custom_style_input,
	client,
	text_model_id,
	image_model_id,
	aspect_ratio,
	reference_image=None,
	):
	scene_text = (scene.get("text") or "").strip()
	full_script = (scene.get("full_script") or "").strip()

	from config_style import STYLE_DEFINITIONS
	style_prompt = _get_style_prompt(selected_style, custom_style_input, STYLE_DEFINITIONS)

	# ================= [강력 수정된 부분] =================
	# '만화 칸', '테두리'를 명시적으로 금지하는 규칙 추가
	brain_prompt = f"""
	너는 유튜브 영상용 '장면 이미지 프롬프트'를 만드는 전문가야.
	아래 대본을 보고, 딱 "한 장면"을 그리기 위한 프롬프트를 만들어.

	[스타일 지시]
	{style_prompt}

	[화면비]
	{aspect_ratio}

	[대본]
	{scene_text}

	[전체 흐름 요약 참고]
	{full_script[:1200]}

	[절대 규칙]
	1) 출력은 무조건 한국어로만 작성.
	2) 대본의 의미를 바꾸거나 내용을 추가하지 마라.
	3) 화면 분할(Split Screen), 만화 칸(Panel) 나누기 금지.
	4) 이미지에 글자, 자막, 말풍선 절대 넣지 마라. (No Text, No Speech Bubble)
	5) 최종 출력은 "프롬프트 한 덩어리 텍스트"만.
	6) 설명이나 분석 텍스트 출력 금지.
	7) 16:9 (1280x720) 비율로 생성.
	8) 대본에 구체적 대상이 있으면 시각적 묘사로 포함.
	9) 이미지의 상하좌우 모든 가장자리에 테두리(Border), 프레임, 여백을 절대 만들지 마라. (Borderless)
	10) 만화책이나 웹툰 형식처럼 네모난 칸 안에 그림을 가두지 말고, 캔버스 전체를 하나의 그림으로 꽉 채워라 (Full Shot).
	11) 하단부나 상단에 검은 띠(Letterbox)나 흰색 공백을 절대 만들지 마라.

	이제 프롬프트를 출력해.
	""".strip()
	# ================= [강력 수정된 부분 끝] =================

	try:
	brain_res = client.models.generate_content(
	model=text_model_id,
	contents=brain_prompt
	)
	except Exception as exc:
	logger.exception("장면 프롬프트 생성 실패")
	raise RuntimeError("장면 프롬프트 생성에 실패했습니다.") from exc

	final_prompt = getattr(brain_res, "text", None) or scene_text
	final_prompt = re.sub(r"\s+", " ", final_prompt).strip()

	uses_gemini_image = "gemini" in (image_model_id or "").lower()
	last_scene_response = {"value": None}

	def _render_scene_image(prompt_text):
	contents = prompt_text
	if reference_image is not None:
	try:
	from google.genai import types
	import io
	buf = io.BytesIO()
	reference_image.save(buf, format="PNG")
	ref_part = types.Part.from_bytes(data=buf.getvalue(), mime_type="image/png")
	contents = [prompt_text, ref_part]
	except Exception as exc:
	logger.exception("참조 이미지 처리 실패")
	raise RuntimeError("참조 이미지 처리에 실패했습니다.") from exc

	if uses_gemini_image:
	try:
	from google.genai import types
	except Exception as exc:
	logger.exception("GenerateContentConfig 로드 실패")
	raise RuntimeError("이미지 생성 설정을 불러오지 못했습니다.") from exc

	img_res = client.models.generate_content(
	model=image_model_id,
	contents=contents,
	config=types.GenerateContentConfig(
	response_modalities=["IMAGE"],
	image_config=types.ImageConfig(image_size="1K", aspect_ratio=aspect_ratio)
	)
	)
	last_scene_response["value"] = img_res
	return _safe_extract_image_bytes(img_res)

	if hasattr(client.models, "generate_images"):
	img_res = client.models.generate_images(
	model=image_model_id,
	prompt=prompt_text
	)
	last_scene_response["value"] = img_res
	return _safe_extract_image_bytes(img_res)

	raise RuntimeError("장면 이미지 생성 실패: 모델 호환성 확인 필요")

	try:
	img_bytes = _render_scene_image(final_prompt)
	video_prompt = _generate_video_prompt(scene_text, final_prompt, client, text_model_id)
	return index, final_prompt, img_bytes, video_prompt
	except ImageExtractionError as exc:
	response = last_scene_response["value"]
	logger.exception("장면 이미지 추출 실패 (1차)")
	if "텍스트만 반환" in str(exc):
	fallback_prompt = f"""
	핵심 장면: {scene_text}
	16:9 테두리 없는 전체 화면(Borderless Full Shot). 여백 없음. 이미지 생성만 출력.
	""".strip()
	try:
	img_bytes = _render_scene_image(fallback_prompt)
	video_prompt = _generate_video_prompt(scene_text, fallback_prompt, client, text_model_id)
	return index, fallback_prompt, img_bytes, video_prompt
	except ImageExtractionError:
	raise
	raise
	except Exception as exc:
	logger.exception("장면 이미지 생성 실패")
	raise RuntimeError("장면 이미지 생성 실패") from exc

	def process_thumbnail_task(
	index,
	strategy_key,
	strategy_text,
	script_text,
	title_hint,
	client,
	text_model_id,
	image_model_id,
	aspect_ratio,
	reference_image=None,
	):
	# ================= [강력 수정된 부분] =================
	brain_prompt = f"""
	너는 유튜브 썸네일 프롬프트를 만드는 전문가다.
	아래 대본과 썸네일 전략을 바탕으로, 이미지 생성 모델에 넣을 '한 장의 썸네일 프롬프트'를 작성하라.

	[전략 키]
	{strategy_key}

	[전략 내용]
	{strategy_text}

	[대본]
	{script_text[:8000]}

	[참고 제목(그리지 말 것)]
	{title_hint}

	[필수 조건]
	- 출력은 무조건 한국어만. 영어/로마자 금지.
	- 특정 국가 상징 자동생성 금지: 태극기, 청와대 등.
	- 정치인 등 실존 인물은 익명 캐릭터/실루엣으로 처리.
	- 화면비: 16:9 (1280x720)
	- 이미지는 캔버스 전체를 꽉 채워야 함.
	- 만화책 프레임, 테두리, 말풍선, 여백을 절대 그리지 마라 (Borderless).
	- 최종 출력은 프롬프트 텍스트 1개만.
	- 대본에 구체적 대상이 있으면 시각적으로 포함.
	""".strip()
	# ================= [강력 수정된 부분 끝] =================

	try:
	brain_res = client.models.generate_content(model=text_model_id, contents=brain_prompt)
	scene_prompt = (getattr(brain_res, "text", "") or "").strip()
	except Exception as exc:
	logger.exception("썸네일 프롬프트 생성 실패")
	raise RuntimeError("썸네일 프롬프트 생성에 실패했습니다.") from exc

	overlay_texts = _generate_thumbnail_texts(strategy_key, script_text, title_hint, client, text_model_id)

	# ================= [강력 수정된 부분] =================
	common_rules = f"""
	- 포맷: 16:9 (1280x720).
	- 테두리(Border)나 프레임 없이 이미지가 캔버스 끝까지 꽉 차야 한다 (Full Bleed).
	- 상하좌우에 흰색 여백이나 검은 레터박스를 절대 만들지 마라.
	- 텍스트 오버레이 반드시 포함.
	- 폰트: 굵은 고딕, 흰 글씨 + 검정 스트로크, 높은 대비.
	- 메인 타이틀 \"{title_hint}\"는 중단 이상 위치에 배치.
	""".strip()
	# ================= [강력 수정된 부분 끝] =================

	if strategy_key.startswith("A."):
	overlay_rules = f"""
	- 좌측 상단 문구: \"{overlay_texts['left_text']}\".
	- 우측 상단 문구: \"{overlay_texts['right_text']}\".
	- 메인 타이틀: \"{title_hint}\"는 중단 또는 상단에 배치.
	""".strip()
	elif strategy_key.startswith("B."):
	overlay_rules = f"""
	- 상단 서브 문구: \"{overlay_texts['top_text']}\".
	- 메인 타이틀: \"{title_hint}\"는 중단 또는 상단에 배치.
	""".strip()
	else:
	layout_text = "Split Screen" if overlay_texts["layout"] == "split" else "Single Scene"
	overlay_rules = f"""
	- 구성: {layout_text}.
	- 상단 서브 문구: \"{overlay_texts['top_text']}\".
	- 메인 타이틀: \"{title_hint}\"는 중단 또는 상단에 배치.
	""".strip()

	final_prompt = f"""
	[전략 설명]
	{strategy_text}

	[장면 구성]
	{scene_prompt}

	[공통 규칙]
	{common_rules}

	[텍스트 배치]
	{overlay_rules}
	""".strip()

	uses_gemini_image = "gemini" in (image_model_id or "").lower()
	last_response = {"value": None}

	def _render_image(prompt_text):
	if uses_gemini_image:
	try:
	from google.genai import types
	except Exception as exc:
	logger.exception("GenerateContentConfig 로드 실패")
	raise RuntimeError("이미지 생성 설정을 불러오지 못했습니다.") from exc
	img_res = client.models.generate_content(
	model=image_model_id,
	contents=prompt_text,
	config=types.GenerateContentConfig(
	response_modalities=["IMAGE"],
	image_config=types.ImageConfig(image_size="1K", aspect_ratio=aspect_ratio)
	)
	)
	last_response["value"] = img_res
	return _safe_extract_image_bytes(img_res)
	if hasattr(client.models, "generate_images"):
	img_res = client.models.generate_images(
	model=image_model_id,
	prompt=prompt_text
	)
	last_response["value"] = img_res
	return _safe_extract_image_bytes(img_res)
	raise RuntimeError("썸네일 이미지 생성 실패: 모델 호환성 확인 필요")

	try:
	img_bytes = _render_image(final_prompt)
	return index, final_prompt, img_bytes
	except ImageExtractionError as exc:
	response = last_response["value"]
	logger.exception("썸네일 이미지 추출 실패 (1차)")
	if not strategy_key.startswith("B."):
	raise
	fallback_prompt = f"""
	16:9 유튜브 썸네일. 테두리 없는 꽉 찬 화면(Borderless Full Bleed).
	전경에 2D 스틱맨 리액션.
	텍스트 오버레이: 상단 \"{overlay_texts.get('top_text', '')}\", 하단 중앙 \"{title_hint}\".
	""".strip()
	try:
	img_bytes = _render_image(fallback_prompt)
	return index, fallback_prompt, img_bytes
	except ImageExtractionError as retry_exc:
	logger.exception("썸네일 이미지 추출 실패 (폴백)")
	raise retry_exc from exc
	except Exception as exc:
	logger.exception("썸네일 이미지 생성 실패")
	raise RuntimeError("썸네일 이미지 생성 실패") from exc