Hyounggoo commited on
Commit
d27fca0
Β·
verified Β·
1 Parent(s): 4cae06e

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +17 -14
  2. app.py +38 -21
  3. requirements.txt +4 -4
README.md CHANGED
@@ -1,15 +1,18 @@
1
- ---
2
- title: "KAIRO.ai"
3
- emoji: "🧠"
4
- colorFrom: indigo
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: "1.32.0"
8
- app_file: app.py
9
- pinned: false
10
- ---
11
 
12
- # KAIRO.ai - 닀쀑 OCR 정확도 비ꡐ 및 병합
13
- - μ—¬λŸ¬ OCR μ—”μ§„ κ²°κ³Ό 비ꡐ
14
- - μœ μ‚¬λ„ 점수 ν‘œμ‹œ
15
- - 졜적 후보 μžλ™ 선택
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # KAIRO.ai - Hugging Face Demo
 
 
 
 
 
 
 
 
 
2
 
3
+ 🧠 Korean/English Text Proofreading OCR App
4
+
5
+ ## How to Use
6
+ 1. Upload a PNG image.
7
+ 2. Choose OCR engines (EasyOCR recommended for HF).
8
+ 3. Select language proofreading checklists.
9
+ 4. View OCR result and logs.
10
+
11
+ ## Requirements
12
+ - Python 3.8+
13
+ - streamlit
14
+ - easyocr
15
+ - pytesseract
16
+ - pillow
17
+ - numpy
18
+ - (Optional) paddleocr + paddlepaddle
app.py CHANGED
@@ -1,20 +1,34 @@
1
  import streamlit as st
 
2
  import pytesseract
3
  import easyocr
4
- from paddleocr import PaddleOCR
5
  from PIL import Image
6
  import numpy as np
7
 
8
- st.set_page_config(page_title="KAIRO.ai", layout="wide")
9
- st.title("🧠 KAIRO.ai - AI 기반 μ–Έμ–΄ κ²€μˆ˜ ν”Œλž«νΌ")
 
 
 
 
10
 
 
 
 
 
 
11
  st.sidebar.header("πŸ“‚ OCR μ—”μ§„ 선택")
 
 
 
 
12
  ocr_engines = st.sidebar.multiselect(
13
  "μ‚¬μš©ν•  OCR 엔진을 μ„ νƒν•˜μ„Έμš”",
14
- ["EasyOCR", "Tesseract", "PaddleOCR"],
15
  default=["EasyOCR"]
16
  )
17
 
 
18
  st.sidebar.markdown("### 🧠 κ²€μˆ˜ ν•­λͺ© 선택")
19
 
20
  checklist_korean = [
@@ -36,19 +50,20 @@ checklist_foreign = [
36
  selected_korean_checks = st.sidebar.multiselect("βœ… ν•œκΈ€ κ²€μˆ˜ ν•­λͺ© 선택", checklist_korean)
37
  selected_foreign_checks = st.sidebar.multiselect("βœ… μ™Έκ΅­μ–΄ κ²€μˆ˜ ν•­λͺ© 선택", checklist_foreign)
38
 
39
- log_messages = []
40
- progress_text = "처리 μ€‘μž…λ‹ˆλ‹€..."
41
- progress = st.empty()
42
- uploaded_file = st.file_uploader("PNG 이미지 μ—…λ‘œλ“œ", type=["png"])
43
-
44
  @st.cache_resource
45
  def load_easyocr():
46
  return easyocr.Reader(["ko", "en"])
47
 
 
 
 
 
 
48
  if uploaded_file is not None:
49
  with st.spinner("πŸ“€ 이미지 처리 쀑..."):
50
  try:
51
- progress.progress(10, text=progress_text)
52
  image = Image.open(uploaded_file)
53
  st.image(image, caption="μ—…λ‘œλ“œλœ 이미지", use_column_width=True)
54
  log_messages.append("βœ… 이미지 μ—…λ‘œλ“œ 성곡")
@@ -68,13 +83,16 @@ if uploaded_file is not None:
68
  if "Tesseract" in ocr_engines:
69
  try:
70
  progress.progress(60, text="Tesseract 인식 쀑...")
 
 
 
71
  tess_result = pytesseract.image_to_string(image, lang="kor+eng")
72
  text_output += tess_result + "\n"
73
  log_messages.append("βœ… Tesseract 처리 μ™„λ£Œ")
74
  except Exception as e:
75
  log_messages.append(f"❌ Tesseract 였λ₯˜: {str(e)}")
76
 
77
- if "PaddleOCR" in ocr_engines:
78
  try:
79
  progress.progress(90, text="PaddleOCR 인식 쀑...")
80
  paddle_ocr = PaddleOCR(use_angle_cls=True, lang='korean')
@@ -103,14 +121,13 @@ if uploaded_file is not None:
103
  except Exception as e:
104
  log_messages.append(f"❌ 전체 였λ₯˜ λ°œμƒ: {str(e)}")
105
 
106
- # Footer Log Frame
107
  st.markdown("---")
108
- with st.container():
109
- st.markdown("### πŸ“‹ μ‹œμŠ€ν…œ 둜그")
110
- for msg in log_messages:
111
- if "βœ…" in msg:
112
- st.success(msg)
113
- elif "❌" in msg:
114
- st.error(msg)
115
- else:
116
- st.info(msg)
 
1
  import streamlit as st
2
+ import os
3
  import pytesseract
4
  import easyocr
 
5
  from PIL import Image
6
  import numpy as np
7
 
8
+ # Optional: Try PaddleOCR if installed
9
+ try:
10
+ from paddleocr import PaddleOCR
11
+ paddle_available = True
12
+ except ImportError:
13
+ paddle_available = False
14
 
15
+ # Set page configuration
16
+ st.set_page_config(page_title="KAIRO.ai - Hugging Face Demo", layout="wide")
17
+ st.title("🧠 KAIRO.ai - AI 기반 μ–Έμ–΄ κ²€μˆ˜ ν”Œλž«νΌ (HF Demo)")
18
+
19
+ # Sidebar: OCR Engine
20
  st.sidebar.header("πŸ“‚ OCR μ—”μ§„ 선택")
21
+ available_engines = ["EasyOCR", "Tesseract"]
22
+ if paddle_available:
23
+ available_engines.append("PaddleOCR")
24
+
25
  ocr_engines = st.sidebar.multiselect(
26
  "μ‚¬μš©ν•  OCR 엔진을 μ„ νƒν•˜μ„Έμš”",
27
+ available_engines,
28
  default=["EasyOCR"]
29
  )
30
 
31
+ # Sidebar: κ²€μˆ˜ ν•­λͺ©
32
  st.sidebar.markdown("### 🧠 κ²€μˆ˜ ν•­λͺ© 선택")
33
 
34
  checklist_korean = [
 
50
  selected_korean_checks = st.sidebar.multiselect("βœ… ν•œκΈ€ κ²€μˆ˜ ν•­λͺ© 선택", checklist_korean)
51
  selected_foreign_checks = st.sidebar.multiselect("βœ… μ™Έκ΅­μ–΄ κ²€μˆ˜ ν•­λͺ© 선택", checklist_foreign)
52
 
53
+ # Cache EasyOCR for speed
 
 
 
 
54
  @st.cache_resource
55
  def load_easyocr():
56
  return easyocr.Reader(["ko", "en"])
57
 
58
+ # Main Section
59
+ log_messages = []
60
+ progress = st.empty()
61
+ uploaded_file = st.file_uploader("PNG 이미지 μ—…λ‘œλ“œ", type=["png"])
62
+
63
  if uploaded_file is not None:
64
  with st.spinner("πŸ“€ 이미지 처리 쀑..."):
65
  try:
66
+ progress.progress(10, text="이미지 μ—΄κΈ°")
67
  image = Image.open(uploaded_file)
68
  st.image(image, caption="μ—…λ‘œλ“œλœ 이미지", use_column_width=True)
69
  log_messages.append("βœ… 이미지 μ—…λ‘œλ“œ 성곡")
 
83
  if "Tesseract" in ocr_engines:
84
  try:
85
  progress.progress(60, text="Tesseract 인식 쀑...")
86
+ tess_path = shutil.which("tesseract")
87
+ if tess_path is None:
88
+ raise EnvironmentError("Tesseract is not installed or not in PATH.")
89
  tess_result = pytesseract.image_to_string(image, lang="kor+eng")
90
  text_output += tess_result + "\n"
91
  log_messages.append("βœ… Tesseract 처리 μ™„λ£Œ")
92
  except Exception as e:
93
  log_messages.append(f"❌ Tesseract 였λ₯˜: {str(e)}")
94
 
95
+ if "PaddleOCR" in ocr_engines and paddle_available:
96
  try:
97
  progress.progress(90, text="PaddleOCR 인식 쀑...")
98
  paddle_ocr = PaddleOCR(use_angle_cls=True, lang='korean')
 
121
  except Exception as e:
122
  log_messages.append(f"❌ 전체 였λ₯˜ λ°œμƒ: {str(e)}")
123
 
124
+ # Footer Log
125
  st.markdown("---")
126
+ st.markdown("### πŸ“‹ μ‹œμŠ€ν…œ 둜그")
127
+ for msg in log_messages:
128
+ if "βœ…" in msg:
129
+ st.success(msg)
130
+ elif "❌" in msg:
131
+ st.error(msg)
132
+ else:
133
+ st.info(msg)
 
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  streamlit
2
- pytesseract
3
  easyocr
4
- paddleocr
5
- python-pptx
6
- Pillow
7
  numpy
 
 
 
1
  streamlit
 
2
  easyocr
3
+ pytesseract
4
+ pillow
 
5
  numpy
6
+ # Optional:
7
+ # paddleocr