|
|
import streamlit as st |
|
|
import os |
|
|
import pytesseract |
|
|
import easyocr |
|
|
from PIL import Image |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
try: |
|
|
from paddleocr import PaddleOCR |
|
|
paddle_available = True |
|
|
except ImportError: |
|
|
paddle_available = False |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="KAIRO.ai - Hugging Face Demo", layout="wide") |
|
|
st.title("๐ง KAIRO.ai - AI ๊ธฐ๋ฐ ์ธ์ด ๊ฒ์ ํ๋ซํผ (HF Demo)") |
|
|
|
|
|
|
|
|
st.sidebar.header("๐ OCR ์์ง ์ ํ") |
|
|
available_engines = ["EasyOCR", "Tesseract"] |
|
|
if paddle_available: |
|
|
available_engines.append("PaddleOCR") |
|
|
|
|
|
ocr_engines = st.sidebar.multiselect( |
|
|
"์ฌ์ฉํ OCR ์์ง์ ์ ํํ์ธ์", |
|
|
available_engines, |
|
|
default=["EasyOCR"] |
|
|
) |
|
|
|
|
|
|
|
|
st.sidebar.markdown("### ๐ง ๊ฒ์ ํญ๋ชฉ ์ ํ") |
|
|
|
|
|
checklist_korean = [ |
|
|
"๋ง์ถค๋ฒ (Orthography)", "๋์ด์ฐ๊ธฐ (Spacing Rules)", "๋ฌธ๋ฒ ์ค๋ฅ (Grammatical Errors)", |
|
|
"ํ์ค์ด ๊ท์ ์๋ฐ (Standard Language Regulation)", "์ธ๋์ด ํ๊ธฐ๋ฒ ์๋ฐ (Loanword Orthography)", |
|
|
"๋ฌธ์ฅ ๊ตฌ์กฐ ์ค๋ฅ (Sentence Structure Error)", "์ดํ ์ ์ ์ฑ (Lexical Appropriateness)", |
|
|
"์ค๋ณต์ด/๊ตฐ๋๋๊ธฐ ์ ๊ฑฐ (Redundancy Elimination)", "์คํ์ (Typographical Errors)", |
|
|
"๋ฌธ์ฅ ๋ถํธ ์ฌ์ฉ (Punctuation Usage)", "์ด๋ฒ ์ค๋ฅ (Usage Error)", |
|
|
"๋
ผ๋ฆฌ์ ์ผ๊ด์ฑ (Logical Coherence)", "ํ์์ ์ค๋ฅ (Formatting Consistency)" |
|
|
] |
|
|
|
|
|
checklist_foreign = [ |
|
|
"Spelling (์ฒ ์ ์ค๋ฅ)", "Grammar (๋ฌธ๋ฒ ์ค๋ฅ)", "Capitalization (๋์๋ฌธ์ ์ค๋ฅ)", |
|
|
"Subject-Verb Agreement (์ฃผ์ด-๋์ฌ ์ผ์น)", "Article Usage (๊ด์ฌ์ ์ ์ ์ฑ)", |
|
|
"Tense Consistency (์์ ์ผ๊ด์ฑ)", "Word Choice (์ดํ ์ ํ)", |
|
|
"Redundancy (์ค๋ณต ํํ)", "Tone (๋ฌธ์ฒด ๋ฐ ์ผ๊ด์ฑ)" |
|
|
] |
|
|
|
|
|
selected_korean_checks = st.sidebar.multiselect("โ
ํ๊ธ ๊ฒ์ ํญ๋ชฉ ์ ํ", checklist_korean) |
|
|
selected_foreign_checks = st.sidebar.multiselect("โ
์ธ๊ตญ์ด ๊ฒ์ ํญ๋ชฉ ์ ํ", checklist_foreign) |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_easyocr(): |
|
|
return easyocr.Reader(["ko", "en"]) |
|
|
|
|
|
|
|
|
log_messages = [] |
|
|
progress = st.empty() |
|
|
uploaded_file = st.file_uploader("PNG ์ด๋ฏธ์ง ์
๋ก๋", type=["png"]) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
with st.spinner("๐ค ์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ค..."): |
|
|
try: |
|
|
progress.progress(10, text="์ด๋ฏธ์ง ์ด๊ธฐ") |
|
|
image = Image.open(uploaded_file) |
|
|
st.image(image, caption="์
๋ก๋๋ ์ด๋ฏธ์ง", use_column_width=True) |
|
|
log_messages.append("โ
์ด๋ฏธ์ง ์
๋ก๋ ์ฑ๊ณต") |
|
|
|
|
|
text_output = "" |
|
|
|
|
|
if "EasyOCR" in ocr_engines: |
|
|
try: |
|
|
progress.progress(30, text="EasyOCR ์ธ์ ์ค...") |
|
|
reader = load_easyocr() |
|
|
result = reader.readtext(np.array(image), detail=0) |
|
|
text_output += "\n".join(result) + "\n" |
|
|
log_messages.append("โ
EasyOCR ์ฒ๋ฆฌ ์๋ฃ") |
|
|
except Exception as e: |
|
|
log_messages.append(f"โ EasyOCR ์ค๋ฅ: {str(e)}") |
|
|
|
|
|
if "Tesseract" in ocr_engines: |
|
|
try: |
|
|
progress.progress(60, text="Tesseract ์ธ์ ์ค...") |
|
|
tess_path = shutil.which("tesseract") |
|
|
if tess_path is None: |
|
|
raise EnvironmentError("Tesseract is not installed or not in PATH.") |
|
|
tess_result = pytesseract.image_to_string(image, lang="kor+eng") |
|
|
text_output += tess_result + "\n" |
|
|
log_messages.append("โ
Tesseract ์ฒ๋ฆฌ ์๋ฃ") |
|
|
except Exception as e: |
|
|
log_messages.append(f"โ Tesseract ์ค๋ฅ: {str(e)}") |
|
|
|
|
|
if "PaddleOCR" in ocr_engines and paddle_available: |
|
|
try: |
|
|
progress.progress(90, text="PaddleOCR ์ธ์ ์ค...") |
|
|
paddle_ocr = PaddleOCR(use_angle_cls=True, lang='korean') |
|
|
result = paddle_ocr.ocr(np.array(image), cls=True) |
|
|
paddle_text = "\n".join([line[1][0] for line in result[0]]) |
|
|
text_output += paddle_text + "\n" |
|
|
log_messages.append("โ
PaddleOCR ์ฒ๋ฆฌ ์๋ฃ") |
|
|
except Exception as e: |
|
|
log_messages.append(f"โ PaddleOCR ์ค๋ฅ: {str(e)}") |
|
|
|
|
|
st.markdown("### ๐ ์ถ์ถ๋ ํ
์คํธ") |
|
|
st.text_area("OCR ๊ฒฐ๊ณผ", text_output, height=300) |
|
|
progress.progress(100, text="โ
OCR ์๋ฃ") |
|
|
|
|
|
if selected_korean_checks or selected_foreign_checks: |
|
|
st.markdown("### ๐ ๏ธ ์ ํ๋ ๊ฒ์ ํญ๋ชฉ") |
|
|
if selected_korean_checks: |
|
|
st.markdown("**๐ ํ๊ธ ๊ฒ์ ํญ๋ชฉ:**") |
|
|
for check in selected_korean_checks: |
|
|
st.write(f"โข {check}") |
|
|
if selected_foreign_checks: |
|
|
st.markdown("**๐ ์ธ๊ตญ์ด ๊ฒ์ ํญ๋ชฉ:**") |
|
|
for check in selected_foreign_checks: |
|
|
st.write(f"โข {check}") |
|
|
log_messages.append("โ
๊ฒ์ ํญ๋ชฉ ์ ์ฉ ์๋ฃ") |
|
|
except Exception as e: |
|
|
log_messages.append(f"โ ์ ์ฒด ์ค๋ฅ ๋ฐ์: {str(e)}") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("### ๐ ์์คํ
๋ก๊ทธ") |
|
|
for msg in log_messages: |
|
|
if "โ
" in msg: |
|
|
st.success(msg) |
|
|
elif "โ" in msg: |
|
|
st.error(msg) |
|
|
else: |
|
|
st.info(msg) |
|
|
|