File size: 5,538 Bytes
d8335a8
d27fca0
5b11c67
 
77de371
5b11c67
209475f
d27fca0
 
 
 
 
 
0dff0c6
d27fca0
 
 
 
 
5b11c67
d27fca0
 
 
 
5b11c67
 
d27fca0
5b11c67
0dff0c6
 
d27fca0
5b11c67
 
 
 
 
 
 
 
 
 
 
fa7c6d4
5b11c67
 
 
 
 
0dff0c6
fa7c6d4
 
0dff0c6
d27fca0
4cae06e
 
 
 
d27fca0
 
 
 
 
5b11c67
fccf22c
 
d27fca0
fccf22c
 
 
 
 
4cae06e
fccf22c
4cae06e
 
 
 
 
 
 
 
1c026be
fccf22c
 
4cae06e
d27fca0
 
 
fccf22c
 
 
 
 
f38f577
d27fca0
fccf22c
4cae06e
fccf22c
 
 
 
 
 
 
f38f577
fccf22c
 
 
f38f577
fccf22c
 
 
 
 
 
 
 
 
 
 
 
 
1c026be
d27fca0
76a6008
d27fca0
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import streamlit as st
import os
import pytesseract
import easyocr
from PIL import Image
import numpy as np

# Optional: Try PaddleOCR if installed
try:
    from paddleocr import PaddleOCR
    paddle_available = True
except ImportError:
    paddle_available = False

# Set page configuration
st.set_page_config(page_title="KAIRO.ai - Hugging Face Demo", layout="wide")
st.title("๐Ÿง  KAIRO.ai - AI ๊ธฐ๋ฐ˜ ์–ธ์–ด ๊ฒ€์ˆ˜ ํ”Œ๋žซํผ (HF Demo)")

# Sidebar: OCR Engine
st.sidebar.header("๐Ÿ“‚ OCR ์—”์ง„ ์„ ํƒ")
available_engines = ["EasyOCR", "Tesseract"]
if paddle_available:
    available_engines.append("PaddleOCR")

ocr_engines = st.sidebar.multiselect(
    "์‚ฌ์šฉํ•  OCR ์—”์ง„์„ ์„ ํƒํ•˜์„ธ์š”",
    available_engines,
    default=["EasyOCR"]
)

# Sidebar: ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ
st.sidebar.markdown("### ๐Ÿง  ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์„ ํƒ")

checklist_korean = [
    "๋งž์ถค๋ฒ• (Orthography)", "๋„์–ด์“ฐ๊ธฐ (Spacing Rules)", "๋ฌธ๋ฒ• ์˜ค๋ฅ˜ (Grammatical Errors)",
    "ํ‘œ์ค€์–ด ๊ทœ์ • ์œ„๋ฐ˜ (Standard Language Regulation)", "์™ธ๋ž˜์–ด ํ‘œ๊ธฐ๋ฒ• ์œ„๋ฐ˜ (Loanword Orthography)",
    "๋ฌธ์žฅ ๊ตฌ์กฐ ์˜ค๋ฅ˜ (Sentence Structure Error)", "์–ดํœ˜ ์ ์ ˆ์„ฑ (Lexical Appropriateness)",
    "์ค‘๋ณต์–ด/๊ตฐ๋”๋”๊ธฐ ์ œ๊ฑฐ (Redundancy Elimination)", "์˜คํƒˆ์ž (Typographical Errors)",
    "๋ฌธ์žฅ ๋ถ€ํ˜ธ ์‚ฌ์šฉ (Punctuation Usage)", "์–ด๋ฒ• ์˜ค๋ฅ˜ (Usage Error)",
    "๋…ผ๋ฆฌ์  ์ผ๊ด€์„ฑ (Logical Coherence)", "ํ˜•์‹์  ์˜ค๋ฅ˜ (Formatting Consistency)"
]

checklist_foreign = [
    "Spelling (์ฒ ์ž ์˜ค๋ฅ˜)", "Grammar (๋ฌธ๋ฒ• ์˜ค๋ฅ˜)", "Capitalization (๋Œ€์†Œ๋ฌธ์ž ์˜ค๋ฅ˜)",
    "Subject-Verb Agreement (์ฃผ์–ด-๋™์‚ฌ ์ผ์น˜)", "Article Usage (๊ด€์‚ฌ์˜ ์ ์ ˆ์„ฑ)",
    "Tense Consistency (์‹œ์ œ ์ผ๊ด€์„ฑ)", "Word Choice (์–ดํœ˜ ์„ ํƒ)",
    "Redundancy (์ค‘๋ณต ํ‘œํ˜„)", "Tone (๋ฌธ์ฒด ๋ฐ ์ผ๊ด€์„ฑ)"
]

selected_korean_checks = st.sidebar.multiselect("โœ… ํ•œ๊ธ€ ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์„ ํƒ", checklist_korean)
selected_foreign_checks = st.sidebar.multiselect("โœ… ์™ธ๊ตญ์–ด ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์„ ํƒ", checklist_foreign)

# Cache EasyOCR for speed
@st.cache_resource
def load_easyocr():
    return easyocr.Reader(["ko", "en"])

# Main Section
log_messages = []
progress = st.empty()
uploaded_file = st.file_uploader("PNG ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ", type=["png"])

if uploaded_file is not None:
    with st.spinner("๐Ÿ“ค ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์ค‘..."):
        try:
            progress.progress(10, text="์ด๋ฏธ์ง€ ์—ด๊ธฐ")
            image = Image.open(uploaded_file)
            st.image(image, caption="์—…๋กœ๋“œ๋œ ์ด๋ฏธ์ง€", use_column_width=True)
            log_messages.append("โœ… ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ ์„ฑ๊ณต")

            text_output = ""

            if "EasyOCR" in ocr_engines:
                try:
                    progress.progress(30, text="EasyOCR ์ธ์‹ ์ค‘...")
                    reader = load_easyocr()
                    result = reader.readtext(np.array(image), detail=0)
                    text_output += "\n".join(result) + "\n"
                    log_messages.append("โœ… EasyOCR ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
                except Exception as e:
                    log_messages.append(f"โŒ EasyOCR ์˜ค๋ฅ˜: {str(e)}")

            if "Tesseract" in ocr_engines:
                try:
                    progress.progress(60, text="Tesseract ์ธ์‹ ์ค‘...")
                    tess_path = shutil.which("tesseract")
                    if tess_path is None:
                        raise EnvironmentError("Tesseract is not installed or not in PATH.")
                    tess_result = pytesseract.image_to_string(image, lang="kor+eng")
                    text_output += tess_result + "\n"
                    log_messages.append("โœ… Tesseract ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
                except Exception as e:
                    log_messages.append(f"โŒ Tesseract ์˜ค๋ฅ˜: {str(e)}")

            if "PaddleOCR" in ocr_engines and paddle_available:
                try:
                    progress.progress(90, text="PaddleOCR ์ธ์‹ ์ค‘...")
                    paddle_ocr = PaddleOCR(use_angle_cls=True, lang='korean')
                    result = paddle_ocr.ocr(np.array(image), cls=True)
                    paddle_text = "\n".join([line[1][0] for line in result[0]])
                    text_output += paddle_text + "\n"
                    log_messages.append("โœ… PaddleOCR ์ฒ˜๋ฆฌ ์™„๋ฃŒ")
                except Exception as e:
                    log_messages.append(f"โŒ PaddleOCR ์˜ค๋ฅ˜: {str(e)}")

            st.markdown("### ๐Ÿ” ์ถ”์ถœ๋œ ํ…์ŠคํŠธ")
            st.text_area("OCR ๊ฒฐ๊ณผ", text_output, height=300)
            progress.progress(100, text="โœ… OCR ์™„๋ฃŒ")

            if selected_korean_checks or selected_foreign_checks:
                st.markdown("### ๐Ÿ› ๏ธ ์„ ํƒ๋œ ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ")
                if selected_korean_checks:
                    st.markdown("**๐Ÿ“Œ ํ•œ๊ธ€ ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ:**")
                    for check in selected_korean_checks:
                        st.write(f"โ€ข {check}")
                if selected_foreign_checks:
                    st.markdown("**๐ŸŒ ์™ธ๊ตญ์–ด ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ:**")
                    for check in selected_foreign_checks:
                        st.write(f"โ€ข {check}")
                log_messages.append("โœ… ๊ฒ€์ˆ˜ ํ•ญ๋ชฉ ์ ์šฉ ์™„๋ฃŒ")
        except Exception as e:
            log_messages.append(f"โŒ ์ „์ฒด ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")

# Footer Log
st.markdown("---")
st.markdown("### ๐Ÿ“‹ ์‹œ์Šคํ…œ ๋กœ๊ทธ")
for msg in log_messages:
    if "โœ…" in msg:
        st.success(msg)
    elif "โŒ" in msg:
        st.error(msg)
    else:
        st.info(msg)