kairo / app.py
Hyounggoo's picture
Upload 3 files
d27fca0 verified
import streamlit as st
import os
import pytesseract
import easyocr
from PIL import Image
import numpy as np
# Optional: Try PaddleOCR if installed
try:
from paddleocr import PaddleOCR
paddle_available = True
except ImportError:
paddle_available = False
# Set page configuration
st.set_page_config(page_title="KAIRO.ai - Hugging Face Demo", layout="wide")
st.title("๐Ÿง  KAIRO.ai - AI ๊ธฐ๋ฐ˜ ์–ธ์–ด ๊ฒ€์ˆ˜ ํ”Œ๋žซํผ (HF Demo)")
# Sidebar: OCR Engine
st.sidebar.header("๐Ÿ“‚ OCR ์—”์ง„ ์„ ํƒ")
available_engines = ["EasyOCR", "Tesseract"]
if paddle_available:
available_engines.append("PaddleOCR")
ocr_engines = st.sidebar.multiselect(
"์‚ฌ์šฉํ•  OCR ์—”์ง„์„ ์„ ํƒํ•˜์„ธ์š”",
available_engines,
default=["EasyOCR"]
)
# Sidebar: ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ
st.sidebar.markdown("### ๐Ÿง  ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์„ ํƒ")
checklist_korean = [
"๋งž์ถค๋ฒ• (Orthography)", "๋„์–ด์“ฐ๊ธฐ (Spacing Rules)", "๋ฌธ๋ฒ• ์˜ค๋ฅ˜ (Grammatical Errors)",
"ํ‘œ์ค€์–ด ๊ทœ์ • ์œ„๋ฐ˜ (Standard Language Regulation)", "์™ธ๋ž˜์–ด ํ‘œ๊ธฐ๋ฒ• ์œ„๋ฐ˜ (Loanword Orthography)",
"๋ฌธ์žฅ ๊ตฌ์กฐ ์˜ค๋ฅ˜ (Sentence Structure Error)", "์–ดํœ˜ ์ ์ ˆ์„ฑ (Lexical Appropriateness)",
"์ค‘๋ณต์–ด/๊ตฐ๋”๋”๊ธฐ ์ œ๊ฑฐ (Redundancy Elimination)", "์˜คํƒˆ์ž (Typographical Errors)",
"๋ฌธ์žฅ ๋ถ€ํ˜ธ ์‚ฌ์šฉ (Punctuation Usage)", "์–ด๋ฒ• ์˜ค๋ฅ˜ (Usage Error)",
"๋…ผ๋ฆฌ์  ์ผ๊ด€์„ฑ (Logical Coherence)", "ํ˜•์‹์  ์˜ค๋ฅ˜ (Formatting Consistency)"
]
checklist_foreign = [
"Spelling (์ฒ ์ž ์˜ค๋ฅ˜)", "Grammar (๋ฌธ๋ฒ• ์˜ค๋ฅ˜)", "Capitalization (๋Œ€์†Œ๋ฌธ์ž ์˜ค๋ฅ˜)",
"Subject-Verb Agreement (์ฃผ์–ด-๋™์‚ฌ ์ผ์น˜)", "Article Usage (๊ด€์‚ฌ์˜ ์ ์ ˆ์„ฑ)",
"Tense Consistency (์‹œ์ œ ์ผ๊ด€์„ฑ)", "Word Choice (์–ดํœ˜ ์„ ํƒ)",
"Redundancy (์ค‘๋ณต ํ‘œํ˜„)", "Tone (๋ฌธ์ฒด ๋ฐ ์ผ๊ด€์„ฑ)"
]
selected_korean_checks = st.sidebar.multiselect("โœ… ํ•œ๊ธ€ ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์„ ํƒ", checklist_korean)
selected_foreign_checks = st.sidebar.multiselect("โœ… ์™ธ๊ตญ์–ด ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์„ ํƒ", checklist_foreign)
# Cache EasyOCR for speed
@st.cache_resource
def load_easyocr():
return easyocr.Reader(["ko", "en"])
# Main Section
log_messages = []
progress = st.empty()
uploaded_file = st.file_uploader("PNG ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", type=["png"])
if uploaded_file is not None:
with st.spinner("๐Ÿ“ค ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์ค‘..."):
try:
progress.progress(10, text="์ด๋ฏธ์ง€ ์—ด๊ธฐ")
image = Image.open(uploaded_file)
st.image(image, caption="์—…๋กœ๋“œ๋œ ์ด๋ฏธ์ง€", use_column_width=True)
log_messages.append("โœ… ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ ์„ฑ๊ณต")
text_output = ""
if "EasyOCR" in ocr_engines:
try:
progress.progress(30, text="EasyOCR ์ธ์‹ ์ค‘...")
reader = load_easyocr()
result = reader.readtext(np.array(image), detail=0)
text_output += "\n".join(result) + "\n"
log_messages.append("โœ… EasyOCR ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
except Exception as e:
log_messages.append(f"โŒ EasyOCR ์˜ค๋ฅ˜: {str(e)}")
if "Tesseract" in ocr_engines:
try:
progress.progress(60, text="Tesseract ์ธ์‹ ์ค‘...")
tess_path = shutil.which("tesseract")
if tess_path is None:
raise EnvironmentError("Tesseract is not installed or not in PATH.")
tess_result = pytesseract.image_to_string(image, lang="kor+eng")
text_output += tess_result + "\n"
log_messages.append("โœ… Tesseract ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
except Exception as e:
log_messages.append(f"โŒ Tesseract ์˜ค๋ฅ˜: {str(e)}")
if "PaddleOCR" in ocr_engines and paddle_available:
try:
progress.progress(90, text="PaddleOCR ์ธ์‹ ์ค‘...")
paddle_ocr = PaddleOCR(use_angle_cls=True, lang='korean')
result = paddle_ocr.ocr(np.array(image), cls=True)
paddle_text = "\n".join([line[1][0] for line in result[0]])
text_output += paddle_text + "\n"
log_messages.append("โœ… PaddleOCR ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
except Exception as e:
log_messages.append(f"โŒ PaddleOCR ์˜ค๋ฅ˜: {str(e)}")
st.markdown("### ๐Ÿ” ์ถ”์ถœ๋œ ํ…์ŠคํŠธ")
st.text_area("OCR ๊ฒฐ๊ณผ", text_output, height=300)
progress.progress(100, text="โœ… OCR ์™„๋ฃŒ")
if selected_korean_checks or selected_foreign_checks:
st.markdown("### ๐Ÿ› ๏ธ ์„ ํƒ๋œ ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ")
if selected_korean_checks:
st.markdown("**๐Ÿ“Œ ํ•œ๊ธ€ ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ:**")
for check in selected_korean_checks:
st.write(f"โ€ข {check}")
if selected_foreign_checks:
st.markdown("**๐ŸŒ ์™ธ๊ตญ์–ด ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ:**")
for check in selected_foreign_checks:
st.write(f"โ€ข {check}")
log_messages.append("โœ… ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์ ์šฉ ์™„๋ฃŒ")
except Exception as e:
log_messages.append(f"โŒ ์ „์ฒด ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
# Footer Log
st.markdown("---")
st.markdown("### ๐Ÿ“‹ ์‹œ์Šคํ…œ ๋กœ๊ทธ")
for msg in log_messages:
if "โœ…" in msg:
st.success(msg)
elif "โŒ" in msg:
st.error(msg)
else:
st.info(msg)