Spaces:

whispersound
/

ewet

Sleeping

App Files Files Community

ewet / app.py

whispersound

Update app.py

dab0a85 verified 11 months ago

raw

history blame contribute delete

19.5 kB

	import os
	import json
	import logging
	import re
	import gradio as gr
	# google.generativeai v0.3.1 이상 필요
	import google.generativeai as genai
	from dotenv import load_dotenv
	import random
	from typing import List, Dict, Any, Optional, Tuple

	load_dotenv()

	# ------------------- 로깅 설정 -------------------
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# ------------------- Gemini API 설정 -------------------
	GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
	if not GEMINI_API_KEY:
	logger.error("Gemini API 키가 .env 파일에 설정되지 않았습니다. 번역 및 프롬프트 생성이 작동하지 않을 수 있습니다.")
	else:
	try:
	genai.configure(api_key=GEMINI_API_KEY)
	logger.info("Gemini API 키가 성공적으로 설정되었습니다.")
	except Exception as e:
	logger.error(f"Gemini API 키 설정 중 오류 발생: {e}")
	GEMINI_API_KEY = "" # 오류 발생 시 키 비활성화

	# ------------------- 레퍼런스 데이터 로드 -------------------
	CLOTHING_REFERENCES_PATH = 'clothing_references.json' # 실제 파일 경로로 수정하세요
	clothing_references: List[Dict[str, Any]] = []

	try:
	# UTF-8 인코딩 명시적 지정
	with open(CLOTHING_REFERENCES_PATH, 'r', encoding='utf-8') as f:
	clothing_references = json.load(f).get("clothing_references", [])
	if not clothing_references:
	logger.warning(f"{CLOTHING_REFERENCES_PATH} 파일에서 'clothing_references' 키를 찾을 수 없거나 비어 있습니다.")
	else:
	logger.info(f"{len(clothing_references)}개의 의류 레퍼런스를 성공적으로 로드했습니다.")
	except FileNotFoundError:
	logger.error(f"레퍼런스 파일({CLOTHING_REFERENCES_PATH})을 찾을 수 없습니다. 레퍼런스 기능 없이 작동합니다.")
	clothing_references = []
	except json.JSONDecodeError:
	logger.error(f"레퍼런스 파일({CLOTHING_REFERENCES_PATH})의 형식이 잘못되었습니다. JSON 파싱에 실패했습니다.")
	clothing_references = []
	except Exception as e:
	logger.error(f"레퍼런스 파일 로드 중 예상치 못한 오류 발생: {e}")
	clothing_references = []

	# ------------------- Gemini를 사용한 번역 함수 (신규 추가) -------------------
	def translate_with_gemini(text_to_translate: str) -> str:
	"""Gemini API를 사용하여 한국어 텍스트를 영어로 번역합니다."""
	if not text_to_translate:
	return ""
	if not GEMINI_API_KEY:
	logger.warning("Gemini API 키가 없어 번역을 건너<0xEB><0x9B><0x84>니다. 원본 텍스트를 반환합니다.")
	return text_to_translate

	try:
	# --- 모델명 변경 ---
	model = genai.GenerativeModel('gemini-2.0-flash')
	# -------------------

	prompt = f"""Translate the following Korean text into natural-sounding English.
	Respond ONLY with the translated English text and nothing else.

	Korean Text:
	{text_to_translate}

	English Translation:"""

	response = model.generate_content(
	prompt,
	generation_config=genai.types.GenerationConfig(
	temperature=0.2,
	max_output_tokens=len(text_to_translate) * 3
	)
	)

	translated_text = response.text.strip()
	translated_text = translated_text.strip('"')
	logger.info(f"번역 성공: '{text_to_translate}' -> '{translated_text}' (Model: gemini-2.0-flash)")
	return translated_text

	except Exception as e:
	logger.error(f"Gemini 번역 중 오류 발생 ('{text_to_translate}') with gemini-2.0-flash: {e}. 원본 텍스트를 반환합니다.")
	return text_to_translate

	# ------------------- Gemini를 사용한 프롬프트 생성 함수 (레퍼런스 활용) -------------------
	def generate_prompt_with_gemini(person_description_en, item_description_2_en, item_description_3_en, custom_prompt_en, reference: Optional[Dict[str, Any]]):
	"""Gemini API를 사용해 개선된 프롬프트 생성 (랜덤 레퍼런스 정보 활용)"""
	if not GEMINI_API_KEY:
	logger.error("Gemini API 키가 설정되지 않았습니다.")
	return "오류: Gemini API 키가 없어 프롬프트 생성을 진행할 수 없습니다."

	try:
	system_instruction = f"""
	You are an expert prompt generator for virtual fashion try-on using an AI image generator like Midjourney.
	Your goal is to create ONE highly detailed and realistic prompt in natural English that visually describes a complete scene, incorporating elements from a provided reference scenario.
	Your response MUST include the exact input texts for the person and items in the following format:
	- The person description MUST appear as: "({person_description_en} from image #1)"
	- The first item description MUST appear as: "({item_description_2_en} from image #2)"
	- The second item description MUST appear as: "({item_description_3_en} from image #3)"

	The prompt should describe a specific lifestyle scene inspired by the reference details (setting, background, pose, camera angle, style).
	Critically, ensure the model's face from (image #1) is exactly preserved, clear, sharp, and realistically integrated into the scene.
	The final image should look like a high-quality photograph.

	Return ONLY the prompt as one complete English paragraph.
	Always end the prompt with the following parameters, maintaining the specified seed: --ar 9:16 --face #1 --seed 123456 --q 3 --v 5.2 --style raw
	"""

	prompt_person = f"({person_description_en} from image #1)"
	prompt_items = f"({item_description_2_en} from image #2) and ({item_description_3_en} from image #3)"

	reference_details = ""
	if reference:
	reference_details = f"""
	Reference Scenario Details to inspire the scene:
	- Style: {reference.get('style', 'N/A')}
	- Setting: {reference.get('setting', 'N/A')}
	- Background: {reference.get('background', 'N/A')}
	- Pose: {reference.get('pose', 'N/A')}
	- Camera Angle: {reference.get('camera_angle', 'standard view')}
	- Lens Type: {reference.get('lens_type', 'standard lens')}
	- Focus/Light: {reference.get('focus_style', 'standard focus and light')}
	"""

	prompt_request = f"""
	Create a detailed Midjourney prompt for a virtual fitting:

	- The person is: {prompt_person}.
	- They are wearing: {prompt_items}.
	- Additional user request: "{custom_prompt_en if custom_prompt_en else 'None'}"
	{reference_details}

	IMPORTANT INSTRUCTIONS:
	- Generate ONE single paragraph prompt in natural, descriptive English.
	- Describe a specific, realistic lifestyle scene incorporating the reference details.
	- Emphasize exact preservation, clarity, and sharpness of the face from image #1.
	- The output should resemble a high-quality photograph.
	- Do NOT explain anything. Do NOT include bullet points or extra text.
	- Ensure the prompt ends ONLY with: --ar 9:16 --face #1 --seed 123456 --q 3 --v 5.2 --style raw
	"""
	# --- 모델명 변경 ---
	model = genai.GenerativeModel(
	'gemini-2.0-flash', # 모델명 변경
	system_instruction=system_instruction
	)
	# -------------------

	logger.info(f"Gemini에게 프롬프트 생성 요청 시작 (Model: gemini-2.0-flash)")

	response = model.generate_content(
	prompt_request,
	generation_config=genai.types.GenerationConfig(
	temperature=0.7,
	top_p=0.95,
	top_k=40,
	max_output_tokens=8192 # 모델 최대치 확인 필요
	)
	)

	logger.info(f"Gemini 응답 수신 (프롬프트 생성)")

	try:
	enhanced_prompt = response.text.strip()
	except AttributeError as ae:
	logger.warning(f"Gemini 응답에서 .text 속성을 찾을 수 없음: {ae}. 응답 객체 구조 확인 필요.")
	try:
	if response.candidates:
	enhanced_prompt = response.candidates[0].content.parts[0].text.strip()
	else:
	enhanced_prompt = "⚠️ Gemini 응답 파싱 실패 (candidates 없음)"
	except Exception as e:
	logger.warning(f"Gemini 응답 대체 파싱 실패: {str(e)}. 응답: {response}")
	enhanced_prompt = "⚠️ Gemini 응답 파싱 완전 실패"
	except Exception as e:
	logger.error(f"Gemini 응답 파싱 중 예상치 못한 오류: {str(e)}. 응답: {response}")
	enhanced_prompt = "⚠️ Gemini 응답 파싱 중 오류 발생"

	required_params = "--ar 9:16 --face #1 --seed 123456 --q 3 --v 5.2 --style raw"
	if not enhanced_prompt.endswith(required_params):
	prompt_base = re.sub(r'--ar\s+\S+\s+--face\s+\S+\s+--seed\s+\d+\s+--q\s+\d+(\.\d+)?\s+--v\s+\S+\s+--style\s+\S+$', '', enhanced_prompt).strip()
	enhanced_prompt = f"{prompt_base} {required_params}"

	enhanced_prompt = filter_prompt_only(enhanced_prompt)

	logger.info(f"Gemini 생성 프롬프트 (필터링 후): {enhanced_prompt}")
	return enhanced_prompt

	except Exception as e:
	logger.exception("Gemini 프롬프트 생성 중 심각한 오류 발생:")
	return f"오류: Gemini 프롬프트 생성 실패 ({str(e)}). 기본 프롬프트를 사용합니다."


	def filter_prompt_only(prompt: str) -> str:
	"""Gemini의 설명 및 불필요한 메시지를 제거하고 실제 프롬프트만 추출하는 함수 (변경 없음)"""
	prompt = prompt.strip()

	prompt = re.sub(r"```[a-zA-Z]\n(.?)\n```", r"\1", prompt, flags=re.DOTALL)
	prompt = prompt.strip('`')

	start_phrases = [
	"Here's the generated prompt:", "Here is the prompt:", "Okay, here's the prompt:",
	"Enhanced prompt:", "Generated prompt:", "Prompt:", "Here's an enhanced prompt:",
	"Here is the improved prompt:", "I've refined the prompt:", "Below is the prompt:",
	"The enhanced prompt is:"
	]
	end_phrases = [
	"I hope this helps!", "Let me know if you need adjustments.", "Enjoy generating!",
	"This prompt aims to fulfill all requirements."
	]

	for phrase in start_phrases:
	if prompt.lower().startswith(phrase.lower()):
	prompt = prompt[len(phrase):].lstrip(':').strip()
	break

	prompt_lines = prompt.split('\n')
	if len(prompt_lines) > 1:
	last_line = prompt_lines[-1].strip()
	for phrase in end_phrases:
	if last_line.startswith(phrase):
	prompt = '\n'.join(prompt_lines[:-1]).strip()
	break

	required_params = "--ar 9:16 --face #1 --seed 123456 --q 3 --v 5.2 --style raw"
	if required_params in prompt:
	base_prompt = prompt.split(required_params)[0].strip()
	prompt = f"{base_prompt} {required_params}"
	elif not prompt.endswith(required_params):
	logger.warning("Gemini 결과에서 필수 파라미터 누락 확인, 강제 추가")
	prompt = f"{prompt.strip()} {required_params}"

	return prompt.strip()


	# ------------------- 기본 프롬프트 생성 함수 (Gemini 실패 시 Fallback) -------------------
	def generate_basic_prompt(person_description_ko, item_description_2_ko, item_description_3_ko, custom_prompt_ko):
	"""사용자 입력 기반의 기본 프롬프트를 생성 (Gemini 실패 시 사용, 입력은 한국어)"""
	person_en = translate_with_gemini(person_description_ko)
	item2_en = translate_with_gemini(item_description_2_ko)
	item3_en = translate_with_gemini(item_description_3_ko)
	custom_en = translate_with_gemini(custom_prompt_ko)

	combined_item_description = f"({item2_en} from image #2) and ({item3_en} from image #3)"
	base_prompt = (f"Hyperrealistic lifestyle portrait of a ({person_en} from image #1) wearing "
	f"{combined_item_description}. "
	f"Her face is exactly preserved from (image #1)")
	if custom_en.strip():
	base_prompt += f", {custom_en.strip()}"

	base_prompt += " --ar 9:16 --face #1 --seed 123456 --q 3 --v 5.2 --style raw"
	logger.info(f"기본 프롬프트 생성됨 (번역 시도됨): {base_prompt}")
	return base_prompt

	# ------------------- 최종 프롬프트 생성 함수 (수정됨) -------------------
	def generate_final_prompt(model_image, item_image_2, item_image_3, person_description_ko, item_description_2_ko, item_description_3_ko, custom_prompt_ko):
	if not model_image or not item_image_2 or not item_image_3:
	return "오류: 모델 이미지(#1)와 아이템 이미지(#2, #3)를 모두 업로드해주세요."
	if not person_description_ko or not item_description_2_ko or not item_description_3_ko:
	return "오류: 인물 설명과 두 아이템 설명을 모두 입력해주세요."

	if not GEMINI_API_KEY:
	logger.error("Gemini API 키가 없어 진행할 수 없습니다. .env 파일을 확인하세요.")
	return "오류: Gemini API 키가 설정되지 않았습니다. 프롬프트를 생성할 수 없습니다."

	logger.info("입력된 한국어 설명을 영어로 번역 시작 (Model: gemini-2.0-flash)...")
	translated_person = translate_with_gemini(person_description_ko)
	translated_item_2 = translate_with_gemini(item_description_2_ko)
	translated_item_3 = translate_with_gemini(item_description_3_ko)
	translated_custom = translate_with_gemini(custom_prompt_ko)
	logger.info("번역 완료 (오류 시 원본 텍스트 사용됨).")

	if translated_person == person_description_ko and person_description_ko:
	logger.warning(f"인물 설명({person_description_ko}) 번역 실패 또는 이미 영어일 수 있습니다.")

	selected_reference = None
	if clothing_references:
	selected_reference = random.choice(clothing_references)
	logger.info(f"선택된 레퍼런스 ID: {selected_reference.get('id', 'N/A')}, 스타일: {selected_reference.get('style', 'N/A')}")
	else:
	logger.warning("사용 가능한 레퍼런스가 없어 레퍼런스 없이 진행합니다.")

	try:
	# --- 모델명 변경 반영됨 ---
	generated_prompt = generate_prompt_with_gemini(
	translated_person,
	translated_item_2,
	translated_item_3,
	translated_custom,
	selected_reference
	)
	# -------------------------

	if "오류:" in generated_prompt or "⚠️" in generated_prompt:
	logger.warning(f"Gemini 프롬프트 생성 실패 또는 오류 포함: {generated_prompt}. 기본 프롬프트를 사용합니다.")
	return generate_basic_prompt(person_description_ko, item_description_2_ko, item_description_3_ko, custom_prompt_ko)
	else:
	return generated_prompt

	except Exception as e:
	logger.exception("최종 프롬프트 생성 과정에서 예외 발생:")
	return generate_basic_prompt(person_description_ko, item_description_2_ko, item_description_3_ko, custom_prompt_ko)

	# ------------------- Gradio 인터페이스 구성 (변경 없음) -------------------
	def create_app():
	with gr.Blocks(title="가상 피팅 스튜디오") as demo:
	gr.Markdown("# 가상 피팅 스튜디오")
	gr.Markdown("""
	좌측은 입력 섹션, 우측은 출력 섹션입니다.
	- 이미지 업로드: #1(인물), #2(아이템), #3(아이템) 이미지를 업로드하세요.
	- 설명 입력: 각 이미지에 대한 설명을 한국어로 입력하세요 (예: 미소짓는 젊은 여성, 베이지색 울 코트, 청바지). Gemini가 영어로 번역합니다.
	- 커스텀 설명: 추가하고 싶은 배경, 포즈, 분위기 등을 한국어로 입력하세요 (선택 사항).
	- 프롬프트 생성: 버튼을 누르면 입력 설명을 영어로 번역하고, 랜덤 레퍼런스와 조합하여 Midjourney용 프롬프트를 생성합니다.
	- 시드 고정: 프롬프트에는 항상 `--seed 123456`이 포함되어 일관성을 유지합니다.
	- 레퍼런스: `clothing_references.json` 파일에서 랜덤하게 배경, 카메라 샷 등을 참조합니다.
	- 주의: 번역 및 프롬프트 생성에 Gemini API(gemini-2.0-flash)가 사용되므로 API 키가 필요하며, 약간의 처리 시간이 소요될 수 있습니다.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("## 입력 섹션")
	with gr.Row():
	model_image = gr.Image(label="모델 이미지 (#1)", type="pil", sources=["upload"])
	item_image_2 = gr.Image(label="아이템 이미지 (#2)", type="pil", sources=["upload"])
	item_image_3 = gr.Image(label="아이템 이미지 (#3)", type="pil", sources=["upload"])
	with gr.Row():
	person_description_ko = gr.Textbox(
	label="인물설명(#1) (한국어)",
	placeholder="예: 미소짓는 젊은 여성",
	lines=1, interactive=True
	)
	item_description_2_ko = gr.Textbox(
	label="아이템설명(#2) (한국어)",
	placeholder="예: 베이지색 울 코트",
	lines=1, interactive=True
	)
	item_description_3_ko = gr.Textbox(
	label="아이템설명(#3) (한국어)",
	placeholder="예: 밝은 워싱의 청바지",
	lines=1, interactive=True
	)
	custom_prompt_ko = gr.Textbox(
	label="커스텀 상황 설명 (한국어, 선택 사항)",
	placeholder="예: 파리의 가을 거리에서 커피를 들고 있음, 따뜻한 오후 햇살",
	lines=2, interactive=True
	)
	prompt_btn = gr.Button("프롬프트 생성 (번역 포함)", variant="primary")

	with gr.Column(scale=1):
	gr.Markdown("## 출력 섹션")
	prompt_output = gr.Textbox(
	label="생성된 Midjourney 프롬프트 (영문)",
	lines=15,
	interactive=False
	)

	prompt_btn.click(
	fn=generate_final_prompt,
	inputs=[model_image, item_image_2, item_image_3, person_description_ko, item_description_2_ko, item_description_3_ko, custom_prompt_ko],
	outputs=[prompt_output]
	)

	return demo

	if __name__ == "__main__":
	if not GEMINI_API_KEY:
	print("경고: Gemini API 키가 설정되지 않았습니다. 번역 및 프롬프트 생성이 제한될 수 있습니다.")
	# 사용하려는 모델명 확인 (Google AI Studio 또는 API 문서 참고)
	print("사용할 Gemini 모델: gemini-2.0-flash (API 키 환경에서 사용 가능한지 확인 필요)")
	if not clothing_references:
	print("경고: clothing_references.json 파일을 로드하지 못했거나 비어있습니다. 레퍼런스 기능 없이 실행됩니다.")
	else:
	print(f"{len(clothing_references)}개의 레퍼런스를 사용하여 앱을 시작합니다.")

	app = create_app()
	app.queue()
	app.launch()