File size: 5,538 Bytes
d8335a8 d27fca0 5b11c67 77de371 5b11c67 209475f d27fca0 0dff0c6 d27fca0 5b11c67 d27fca0 5b11c67 d27fca0 5b11c67 0dff0c6 d27fca0 5b11c67 fa7c6d4 5b11c67 0dff0c6 fa7c6d4 0dff0c6 d27fca0 4cae06e d27fca0 5b11c67 fccf22c d27fca0 fccf22c 4cae06e fccf22c 4cae06e 1c026be fccf22c 4cae06e d27fca0 fccf22c f38f577 d27fca0 fccf22c 4cae06e fccf22c f38f577 fccf22c f38f577 fccf22c 1c026be d27fca0 76a6008 d27fca0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | import streamlit as st
import os
import pytesseract
import easyocr
from PIL import Image
import numpy as np
# Optional: Try PaddleOCR if installed
try:
from paddleocr import PaddleOCR
paddle_available = True
except ImportError:
paddle_available = False
# Set page configuration
st.set_page_config(page_title="KAIRO.ai - Hugging Face Demo", layout="wide")
st.title("๐ง KAIRO.ai - AI ๊ธฐ๋ฐ ์ธ์ด ๊ฒ์ ํ๋ซํผ (HF Demo)")
# Sidebar: OCR Engine
st.sidebar.header("๐ OCR ์์ง ์ ํ")
available_engines = ["EasyOCR", "Tesseract"]
if paddle_available:
available_engines.append("PaddleOCR")
ocr_engines = st.sidebar.multiselect(
"์ฌ์ฉํ OCR ์์ง์ ์ ํํ์ธ์",
available_engines,
default=["EasyOCR"]
)
# Sidebar: ๊ฒ์ ํญ๋ชฉ
st.sidebar.markdown("### ๐ง ๊ฒ์ ํญ๋ชฉ ์ ํ")
checklist_korean = [
"๋ง์ถค๋ฒ (Orthography)", "๋์ด์ฐ๊ธฐ (Spacing Rules)", "๋ฌธ๋ฒ ์ค๋ฅ (Grammatical Errors)",
"ํ์ค์ด ๊ท์ ์๋ฐ (Standard Language Regulation)", "์ธ๋์ด ํ๊ธฐ๋ฒ ์๋ฐ (Loanword Orthography)",
"๋ฌธ์ฅ ๊ตฌ์กฐ ์ค๋ฅ (Sentence Structure Error)", "์ดํ ์ ์ ์ฑ (Lexical Appropriateness)",
"์ค๋ณต์ด/๊ตฐ๋๋๊ธฐ ์ ๊ฑฐ (Redundancy Elimination)", "์คํ์ (Typographical Errors)",
"๋ฌธ์ฅ ๋ถํธ ์ฌ์ฉ (Punctuation Usage)", "์ด๋ฒ ์ค๋ฅ (Usage Error)",
"๋
ผ๋ฆฌ์ ์ผ๊ด์ฑ (Logical Coherence)", "ํ์์ ์ค๋ฅ (Formatting Consistency)"
]
checklist_foreign = [
"Spelling (์ฒ ์ ์ค๋ฅ)", "Grammar (๋ฌธ๋ฒ ์ค๋ฅ)", "Capitalization (๋์๋ฌธ์ ์ค๋ฅ)",
"Subject-Verb Agreement (์ฃผ์ด-๋์ฌ ์ผ์น)", "Article Usage (๊ด์ฌ์ ์ ์ ์ฑ)",
"Tense Consistency (์์ ์ผ๊ด์ฑ)", "Word Choice (์ดํ ์ ํ)",
"Redundancy (์ค๋ณต ํํ)", "Tone (๋ฌธ์ฒด ๋ฐ ์ผ๊ด์ฑ)"
]
selected_korean_checks = st.sidebar.multiselect("โ
ํ๊ธ ๊ฒ์ ํญ๋ชฉ ์ ํ", checklist_korean)
selected_foreign_checks = st.sidebar.multiselect("โ
์ธ๊ตญ์ด ๊ฒ์ ํญ๋ชฉ ์ ํ", checklist_foreign)
# Cache EasyOCR for speed
@st.cache_resource
def load_easyocr():
return easyocr.Reader(["ko", "en"])
# Main Section
log_messages = []
progress = st.empty()
uploaded_file = st.file_uploader("PNG ์ด๋ฏธ์ง ์
๋ก๋", type=["png"])
if uploaded_file is not None:
with st.spinner("๐ค ์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ค..."):
try:
progress.progress(10, text="์ด๋ฏธ์ง ์ด๊ธฐ")
image = Image.open(uploaded_file)
st.image(image, caption="์
๋ก๋๋ ์ด๋ฏธ์ง", use_column_width=True)
log_messages.append("โ
์ด๋ฏธ์ง ์
๋ก๋ ์ฑ๊ณต")
text_output = ""
if "EasyOCR" in ocr_engines:
try:
progress.progress(30, text="EasyOCR ์ธ์ ์ค...")
reader = load_easyocr()
result = reader.readtext(np.array(image), detail=0)
text_output += "\n".join(result) + "\n"
log_messages.append("โ
EasyOCR ์ฒ๋ฆฌ ์๋ฃ")
except Exception as e:
log_messages.append(f"โ EasyOCR ์ค๋ฅ: {str(e)}")
if "Tesseract" in ocr_engines:
try:
progress.progress(60, text="Tesseract ์ธ์ ์ค...")
tess_path = shutil.which("tesseract")
if tess_path is None:
raise EnvironmentError("Tesseract is not installed or not in PATH.")
tess_result = pytesseract.image_to_string(image, lang="kor+eng")
text_output += tess_result + "\n"
log_messages.append("โ
Tesseract ์ฒ๋ฆฌ ์๋ฃ")
except Exception as e:
log_messages.append(f"โ Tesseract ์ค๋ฅ: {str(e)}")
if "PaddleOCR" in ocr_engines and paddle_available:
try:
progress.progress(90, text="PaddleOCR ์ธ์ ์ค...")
paddle_ocr = PaddleOCR(use_angle_cls=True, lang='korean')
result = paddle_ocr.ocr(np.array(image), cls=True)
paddle_text = "\n".join([line[1][0] for line in result[0]])
text_output += paddle_text + "\n"
log_messages.append("โ
PaddleOCR ์ฒ๋ฆฌ ์๋ฃ")
except Exception as e:
log_messages.append(f"โ PaddleOCR ์ค๋ฅ: {str(e)}")
st.markdown("### ๐ ์ถ์ถ๋ ํ
์คํธ")
st.text_area("OCR ๊ฒฐ๊ณผ", text_output, height=300)
progress.progress(100, text="โ
OCR ์๋ฃ")
if selected_korean_checks or selected_foreign_checks:
st.markdown("### ๐ ๏ธ ์ ํ๋ ๊ฒ์ ํญ๋ชฉ")
if selected_korean_checks:
st.markdown("**๐ ํ๊ธ ๊ฒ์ ํญ๋ชฉ:**")
for check in selected_korean_checks:
st.write(f"โข {check}")
if selected_foreign_checks:
st.markdown("**๐ ์ธ๊ตญ์ด ๊ฒ์ ํญ๋ชฉ:**")
for check in selected_foreign_checks:
st.write(f"โข {check}")
log_messages.append("โ
๊ฒ์ ํญ๋ชฉ ์ ์ฉ ์๋ฃ")
except Exception as e:
log_messages.append(f"โ ์ ์ฒด ์ค๋ฅ ๋ฐ์: {str(e)}")
# Footer Log
st.markdown("---")
st.markdown("### ๐ ์์คํ
๋ก๊ทธ")
for msg in log_messages:
if "โ
" in msg:
st.success(msg)
elif "โ" in msg:
st.error(msg)
else:
st.info(msg)
|