Spaces:

aliceblue11
/

image_text_01

Sleeping

App Files Files Community

image_text_01 / app.py

aliceblue11

Create app.py

48ed049 verified 7 months ago

raw

history blame contribute delete

10.1 kB

	import gradio as gr
	import google.generativeai as genai
	from PIL import Image
	import io
	import base64
	import json
	from typing import Optional, Tuple

	class KoreanOCRApp:
	def __init__(self):
	self.model = None
	self.api_key = None

	def configure_api(self, api_key: str) -> str:
	"""API 키를 설정하고 모델을 초기화합니다."""
	try:
	if not api_key or api_key.strip() == "":
	return "❌ API 키를 입력해주세요."

	genai.configure(api_key=api_key.strip())
	self.model = genai.GenerativeModel('gemini-2.5-flash')
	self.api_key = api_key.strip()
	return "✅ API 키가 성공적으로 설정되었습니다."
	except Exception as e:
	return f"❌ API 키 설정 중 오류가 발생했습니다: {str(e)}"

	def extract_korean_text(self, image: Image.Image, api_key: str) -> Tuple[str, Image.Image]:
	"""이미지에서 한국어 텍스트를 추출합니다."""
	try:
	# API 키가 변경되었거나 처음 설정하는 경우
	if not self.model or self.api_key != api_key.strip():
	config_result = self.configure_api(api_key)
	if "❌" in config_result:
	return config_result, image

	if not image:
	return "❌ 이미지를 업로드해주세요.", None

	# 이미지 전처리 (선택사항)
	if image.mode != 'RGB':
	image = image.convert('RGB')

	# 이미지 크기 최적화 (너무 큰 경우)
	max_size = 1024
	if max(image.size) > max_size:
	ratio = max_size / max(image.size)
	new_size = tuple(int(dim * ratio) for dim in image.size)
	image = image.resize(new_size, Image.Resampling.LANCZOS)

	# 한국어 텍스트 추출을 위한 프롬프트
	prompt = """
	이 이미지에서 모든 한국어 텍스트를 추출해주세요.
	다음 규칙을 따라주세요:
	1. 이미지에 있는 모든 한국어 텍스트를 정확하게 읽어주세요
	2. 텍스트의 위치나 순서를 고려하여 자연스럽게 배열해주세요
	3. 줄바꿈이나 문단 구분이 있다면 그대로 유지해주세요
	4. 영어나 숫자가 함께 있다면 그것도 포함해주세요
	5. 읽을 수 없거나 불분명한 부분이 있다면 [불분명]으로 표시해주세요

	추출된 텍스트만 출력해주세요:
	"""

	# Gemini API 호출
	response = self.model.generate_content([prompt, image])

	if response.text:
	extracted_text = response.text.strip()
	success_message = f"✅ 텍스트 추출 완료:\n\n{extracted_text}"
	return success_message, image
	else:
	return "❌ 텍스트를 추출할 수 없습니다.", image

	except Exception as e:
	error_message = f"❌ 오류가 발생했습니다: {str(e)}"
	return error_message, image

	def verify_image_display(self, image: Image.Image) -> Tuple[str, Image.Image]:
	"""업로드된 이미지와 출력창의 이미지가 같은지 확인합니다."""
	if image is None:
	return "❌ 이미지를 먼저 업로드해주세요.", None

	try:
	# 이미지 정보 확인
	width, height = image.size
	format_info = image.format if hasattr(image, 'format') else "Unknown"
	mode = image.mode

	verification_text = f"""
	✅ 이미지 검증 완료

	📊 이미지 정보:
	- 크기: {width} x {height} 픽셀
	- 포맷: {format_info}
	- 색상 모드: {mode}
	- 파일 크기: 약 {width * height * (3 if mode == 'RGB' else 1)} 바이트

	🔍 이미지가 올바르게 표시되고 있습니다.
	"""

	return verification_text, image

	except Exception as e:
	return f"❌ 이미지 검증 중 오류가 발생했습니다: {str(e)}", image

	def create_app():
	"""그라디오 앱을 생성합니다."""
	ocr_app = KoreanOCRApp()

	with gr.Blocks(
	title="한국어 OCR 텍스트 추출기",
	theme=gr.themes.Soft(),
	css="""
	.container {
	max-width: 1200px;
	margin: auto;
	}
	.header {
	text-align: center;
	padding: 20px;
	background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
	color: white;
	border-radius: 10px;
	margin-bottom: 20px;
	}
	"""
	) as app:

	# 헤더
	gr.HTML("""
	<div class="header">
	<h1>🔤 한국어 OCR 텍스트 추출기</h1>
	<p>Google Gemini를 사용한 고성능 한국어 텍스트 인식</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# API 키 입력 섹션
	gr.Markdown("## 🔑 API 키 설정")
	api_key_input = gr.Textbox(
	label="Google Gemini API 키",
	placeholder="여기에 API 키를 입력하세요...",
	type="password",
	info="https://makersuite.google.com에서 API 키를 발급받으세요"
	)

	api_status = gr.Textbox(
	label="API 상태",
	value="API 키를 입력하고 설정하세요",
	interactive=False
	)

	# API 키 설정 버튼
	api_config_btn = gr.Button("🔧 API 키 설정", variant="primary")

	gr.Markdown("---")

	# 이미지 업로드 섹션
	gr.Markdown("## 📤 이미지 업로드")
	image_input = gr.Image(
	label="한국어 텍스트가 포함된 이미지를 업로드하세요",
	type="pil",
	height=300
	)

	# 버튼들
	with gr.Row():
	extract_btn = gr.Button("📖 텍스트 추출", variant="primary")
	verify_btn = gr.Button("🔍 이미지 검증", variant="secondary")

	with gr.Column(scale=1):
	# 결과 출력 섹션
	gr.Markdown("## 📄 추출 결과")

	text_output = gr.Textbox(
	label="추출된 텍스트",
	lines=10,
	placeholder="여기에 추출된 한국어 텍스트가 표시됩니다...",
	interactive=False
	)

	gr.Markdown("## 🖼️ 이미지 확인")
	image_output = gr.Image(
	label="업로드된 이미지 (검증용)",
	height=300
	)

	# 이미지 정보
	image_info = gr.Textbox(
	label="이미지 정보",
	lines=5,
	interactive=False
	)

	# 사용법 안내
	with gr.Accordion("📋 사용법 안내", open=False):
	gr.Markdown("""
	### 🔧 설정 방법
	1. API 키 발급: [Google AI Studio](https://makersuite.google.com)에서 무료 API 키를 발급받으세요
	2. API 키 입력: 위의 입력창에 발급받은 API 키를 입력하고 '설정' 버튼을 클릭하세요

	### 📖 텍스트 추출 방법
	1. 이미지 업로드: 한국어 텍스트가 포함된 이미지를 업로드하세요
	2. 텍스트 추출: '텍스트 추출' 버튼을 클릭하여 OCR을 실행하세요
	3. 결과 확인: 오른쪽 결과창에서 추출된 텍스트를 확인하세요

	### 📋 지원 형식
	- 이미지 형식: PNG, JPEG, WEBP, HEIC, HEIF
	- 최대 크기: 20MB (자동으로 최적화됩니다)
	- 언어: 한국어 중심 (영어, 숫자도 인식 가능)

	### 💡 팁
	- 선명하고 해상도가 높은 이미지를 사용하세요
	- 텍스트가 잘 보이도록 조명이 충분한 사진을 촬영하세요
	- 기울어진 이미지는 자동으로 보정을 시도합니다
	""")

	# 이벤트 핸들러 등록
	api_config_btn.click(
	fn=ocr_app.configure_api,
	inputs=[api_key_input],
	outputs=[api_status]
	)

	extract_btn.click(
	fn=ocr_app.extract_korean_text,
	inputs=[image_input, api_key_input],
	outputs=[text_output, image_output]
	)

	verify_btn.click(
	fn=ocr_app.verify_image_display,
	inputs=[image_input],
	outputs=[image_info, image_output]
	)

	# 이미지 업로드 시 자동으로 출력창에 표시
	image_input.change(
	fn=lambda img: (img, "이미지가 업로드되었습니다. 텍스트 추출을 실행하세요." if img else ""),
	inputs=[image_input],
	outputs=[image_output, image_info]
	)

	return app

	if __name__ == "__main__":
	app = create_app()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	debug=True,
	show_error=True
	)