Spaces:

AIRider
/

llllkkkkkk

Paused

App Files Files Community

llllkkkkkk / app.py

AIRider

Create app.py

5625f85 verified over 1 year ago

raw

history blame contribute delete

5.29 kB

	import os
	import openai
	import gradio as gr
	from transformers import BlipProcessor, BlipForConditionalGeneration
	from dotenv import load_dotenv
	import torch
	from PIL import Image # PIL을 사용하여 이미지를 열기 위해 추가

	# .env 파일에서 환경 변수를 불러옴
	load_dotenv()

	# API 키 및 검증
	API_KEY = os.getenv("OPENAI_API_KEY")
	if API_KEY is None:
	raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")

	openai.api_key = API_KEY # OpenAI API 키 설정

	# BLIP 모델 로드 (이미지 캡셔닝)
	blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # GPU 또는 CPU 설정
	blip_model.to(device) # 모델을 선택한 디바이스로 이동

	# GPT-4 API 호출 함수 (temperature, top_p 값 추가 조정)
	def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": system_message},
	{"role": "user", "content": content},
	],
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	)
	return response.choices[0].message['content'].strip()
	except openai.OpenAIError as e:
	return f"OpenAI API Error: {str(e)}"

	# generate_blog_post_in_korean 함수 수정 (temperature, top_p 조정 및 프롬프트 수정)
	def generate_blog_post_in_korean(image_path, user_input, style):
	# 1. 파일 경로에서 이미지를 열어 PIL 이미지로 변환
	image = Image.open(image_path)

	# 2. 이미지 캡셔닝 생성 (BLIP)
	inputs = blip_processor(image, return_tensors="pt").to(device)
	out = blip_model.generate(**inputs)
	image_caption = blip_processor.decode(out[0], skip_special_tokens=True)

	# 3. 스타일에 따라 프롬프트 및 temperature/top_p 설정
	if style == "사실적인":
	combined_prompt = (
	f"이미지 설명: {image_caption}\n"
	f"사용자 입력: {user_input}\n\n"
	"이 두 설명을 기반으로 있는 그대로의 사실만 간결하고 정확하게 묘사해 주세요. "
	"불필요한 배경 설명이나 추론은 피하고, 장면에 대한 정확한 정보만 제공해 주세요.\n\n"
	"예시: '테이블 위에 여러 그릇의 된장찌개와 다양한 음식들이 놓여져 있다. "
	"중앙에 뚝배기에 담긴 된장찌개가 있고, 그 옆에는 각종 반찬들이 놓여 있습니다.'"
	)
	temperature = 0.2 # 최대한 사실에 기반
	top_p = 0.7 # 예측의 다양성 억제
	elif style == "감성적인":
	combined_prompt = (
	f"이미지 설명: {image_caption}\n"
	f"사용자 입력: {user_input}\n\n"
	"이 두 설명을 참고해서 일상적이고 따뜻한 분위기의 글로 표현해 주세요. "
	"추가적인 설명이나 배경보다는 장면과 감정을 자연스럽게 전달하는 글을 써 주세요.\n\n"
	"예시: '된장찌개가 놓인 테이블에는 다양한 음식들이 정갈하게 차려져 있습니다. "
	"뜨끈한 된장찌개에서는 구수한 향이 풍기고, 그 옆에는 고기와 채소가 듬뿍 담긴 반찬들이 놓여 있어요. "
	"밥과 함께 먹기 좋은 음식들이 준비되어 있고, 집에서 정성스럽게 만든 따뜻한 느낌이 듭니다.'"
	)
	temperature = 0.7 # 더 창의적이고 감성적인 표현
	top_p = 0.9 # 풍부한 표현을 위해 다양성 허용

	# 4. GPT-4로 설명 생성
	system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
	translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)

	return translated_caption

	# 하나의 이미지만 처리하는 함수
	def generate_blog_post_single(image, desc, style):
	if image is not None and desc.strip() != "":
	result = generate_blog_post_in_korean(image, desc, style)
	return result
	else:
	return "" # 이미지가 없거나 설명이 없으면 빈 문자열 반환

	# Gradio 인터페이스 설정 (하나의 이미지와 설명만 받음)
	iface = gr.Interface(
	fn=generate_blog_post_single,
	inputs=[
	gr.File(label="이미지 업로드"), # gr.Image 대신 gr.File로 변경
	gr.Textbox(label="사진에 대한 설명 입력", placeholder="사진 설명을 입력하세요"),
	gr.Radio(["사실적인", "감성적인"], label="설명 스타일 선택", value="사실적인") # default -> value로 변경
	],
	outputs=gr.Textbox(label="이미지 설명 결과"),
	title="이미지 설명 생성기",
	description="하나의 이미지와 텍스트를 바탕으로 최상의 한국어로 표현합니다.",
	allow_flagging="never"
	)

	if __name__ == "__main__":
	iface.launch(share=True)