import streamlit as st import os import pytesseract import easyocr from PIL import Image import numpy as np # Optional: Try PaddleOCR if installed try: from paddleocr import PaddleOCR paddle_available = True except ImportError: paddle_available = False # Set page configuration st.set_page_config(page_title="KAIRO.ai - Hugging Face Demo", layout="wide") st.title("🧠 KAIRO.ai - AI 기반 μ–Έμ–΄ κ²€μˆ˜ ν”Œλž«νΌ (HF Demo)") # Sidebar: OCR Engine st.sidebar.header("πŸ“‚ OCR μ—”μ§„ 선택") available_engines = ["EasyOCR", "Tesseract"] if paddle_available: available_engines.append("PaddleOCR") ocr_engines = st.sidebar.multiselect( "μ‚¬μš©ν•  OCR 엔진을 μ„ νƒν•˜μ„Έμš”", available_engines, default=["EasyOCR"] ) # Sidebar: κ²€μˆ˜ ν•­λͺ© st.sidebar.markdown("### 🧠 κ²€μˆ˜ ν•­λͺ© 선택") checklist_korean = [ "λ§žμΆ€λ²• (Orthography)", "띄어쓰기 (Spacing Rules)", "문법 였λ₯˜ (Grammatical Errors)", "ν‘œμ€€μ–΄ κ·œμ • μœ„λ°˜ (Standard Language Regulation)", "μ™Έλž˜μ–΄ ν‘œκΈ°λ²• μœ„λ°˜ (Loanword Orthography)", "λ¬Έμž₯ ꡬ쑰 였λ₯˜ (Sentence Structure Error)", "μ–΄νœ˜ μ μ ˆμ„± (Lexical Appropriateness)", "쀑볡어/ꡰ더더기 제거 (Redundancy Elimination)", "μ˜€νƒˆμž (Typographical Errors)", "λ¬Έμž₯ λΆ€ν˜Έ μ‚¬μš© (Punctuation Usage)", "어법 였λ₯˜ (Usage Error)", "논리적 일관성 (Logical Coherence)", "ν˜•μ‹μ  였λ₯˜ (Formatting Consistency)" ] checklist_foreign = [ "Spelling (철자 였λ₯˜)", "Grammar (문법 였λ₯˜)", "Capitalization (λŒ€μ†Œλ¬Έμž 였λ₯˜)", "Subject-Verb Agreement (μ£Όμ–΄-동사 일치)", "Article Usage (κ΄€μ‚¬μ˜ μ μ ˆμ„±)", "Tense Consistency (μ‹œμ œ 일관성)", "Word Choice (μ–΄νœ˜ 선택)", "Redundancy (쀑볡 ν‘œν˜„)", "Tone (문체 및 일관성)" ] selected_korean_checks = st.sidebar.multiselect("βœ… ν•œκΈ€ κ²€μˆ˜ ν•­λͺ© 선택", checklist_korean) selected_foreign_checks = st.sidebar.multiselect("βœ… μ™Έκ΅­μ–΄ κ²€μˆ˜ ν•­λͺ© 선택", checklist_foreign) # Cache EasyOCR for speed @st.cache_resource def load_easyocr(): return easyocr.Reader(["ko", "en"]) # Main Section log_messages = [] progress = st.empty() uploaded_file = st.file_uploader("PNG 이미지 μ—…λ‘œλ“œ", type=["png"]) if uploaded_file is not None: with st.spinner("πŸ“€ 이미지 처리 쀑..."): try: progress.progress(10, text="이미지 μ—΄κΈ°") image = Image.open(uploaded_file) st.image(image, caption="μ—…λ‘œλ“œλœ 이미지", use_column_width=True) log_messages.append("βœ… 이미지 μ—…λ‘œλ“œ 성곡") text_output = "" if "EasyOCR" in ocr_engines: try: progress.progress(30, text="EasyOCR 인식 쀑...") reader = load_easyocr() result = reader.readtext(np.array(image), detail=0) text_output += "\n".join(result) + "\n" log_messages.append("βœ… EasyOCR 처리 μ™„λ£Œ") except Exception as e: log_messages.append(f"❌ EasyOCR 였λ₯˜: {str(e)}") if "Tesseract" in ocr_engines: try: progress.progress(60, text="Tesseract 인식 쀑...") tess_path = shutil.which("tesseract") if tess_path is None: raise EnvironmentError("Tesseract is not installed or not in PATH.") tess_result = pytesseract.image_to_string(image, lang="kor+eng") text_output += tess_result + "\n" log_messages.append("βœ… Tesseract 처리 μ™„λ£Œ") except Exception as e: log_messages.append(f"❌ Tesseract 였λ₯˜: {str(e)}") if "PaddleOCR" in ocr_engines and paddle_available: try: progress.progress(90, text="PaddleOCR 인식 쀑...") paddle_ocr = PaddleOCR(use_angle_cls=True, lang='korean') result = paddle_ocr.ocr(np.array(image), cls=True) paddle_text = "\n".join([line[1][0] for line in result[0]]) text_output += paddle_text + "\n" log_messages.append("βœ… PaddleOCR 처리 μ™„λ£Œ") except Exception as e: log_messages.append(f"❌ PaddleOCR 였λ₯˜: {str(e)}") st.markdown("### πŸ” μΆ”μΆœλœ ν…μŠ€νŠΈ") st.text_area("OCR κ²°κ³Ό", text_output, height=300) progress.progress(100, text="βœ… OCR μ™„λ£Œ") if selected_korean_checks or selected_foreign_checks: st.markdown("### πŸ› οΈ μ„ νƒλœ κ²€μˆ˜ ν•­λͺ©") if selected_korean_checks: st.markdown("**πŸ“Œ ν•œκΈ€ κ²€μˆ˜ ν•­λͺ©:**") for check in selected_korean_checks: st.write(f"β€’ {check}") if selected_foreign_checks: st.markdown("**🌐 μ™Έκ΅­μ–΄ κ²€μˆ˜ ν•­λͺ©:**") for check in selected_foreign_checks: st.write(f"β€’ {check}") log_messages.append("βœ… κ²€μˆ˜ ν•­λͺ© 적용 μ™„λ£Œ") except Exception as e: log_messages.append(f"❌ 전체 였λ₯˜ λ°œμƒ: {str(e)}") # Footer Log st.markdown("---") st.markdown("### πŸ“‹ μ‹œμŠ€ν…œ 둜그") for msg in log_messages: if "βœ…" in msg: st.success(msg) elif "❌" in msg: st.error(msg) else: st.info(msg)