Upload 3 files
Browse files- README.md +1 -1
- app_final_style.py +135 -0
- requirements.txt +2 -3
README.md
CHANGED
|
@@ -12,4 +12,4 @@ pinned: false
|
|
| 12 |
|
| 13 |
# KAIRO.ai
|
| 14 |
|
| 15 |
-
AI ๊ธฐ๋ฐ PNG ์ด๋ฏธ์ง
|
|
|
|
| 12 |
|
| 13 |
# KAIRO.ai
|
| 14 |
|
| 15 |
+
AI ๊ธฐ๋ฐ PNG ์ด๋ฏธ์ง ๊ต์ (OCR + GPT-4 Turbo + PPT ์์ฑ) ์์คํ
์
๋๋ค.
|
app_final_style.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import os
|
| 4 |
+
import zipfile
|
| 5 |
+
import io
|
| 6 |
+
from PIL import Image
|
| 7 |
+
import easyocr
|
| 8 |
+
from paddleocr import PaddleOCR
|
| 9 |
+
import numpy as np
|
| 10 |
+
import openai
|
| 11 |
+
from pptx import Presentation
|
| 12 |
+
from pptx.util import Inches, Pt
|
| 13 |
+
from pptx.dml.color import RGBColor
|
| 14 |
+
|
| 15 |
+
st.set_page_config(page_title="KAIRO.ai", layout="wide")
|
| 16 |
+
|
| 17 |
+
# ํค๋ ํ์ดํ
|
| 18 |
+
st.markdown(
|
| 19 |
+
"<h1 style='text-align:center; font-size:4em;'>๐ง KAIRO.ai</h1>"
|
| 20 |
+
"<p style='text-align:center; color:gray;'>v0.9 | ๋ง์ง๋ง ์
๋ฐ์ดํธ: 2025-06-02</p>",
|
| 21 |
+
unsafe_allow_html=True
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
ocr_engines = st.multiselect("OCR ์์ง ์ ํ (์ค๋ณต ๊ฐ๋ฅ)", ["EasyOCR", "PaddleOCR"], default=["EasyOCR"])
|
| 25 |
+
checklist = st.multiselect("๊ฒ์ ํญ๋ชฉ ์ ํ", ["์คํ์", "๋์ด์ฐ๊ธฐ", "๋ฌธ๋ฒ", "์ดํ"], default=["์คํ์", "๋์ด์ฐ๊ธฐ"])
|
| 26 |
+
uploaded_files = st.file_uploader("PNG ๋๋ ZIP ํ์ผ ์
๋ก๋", type=["png", "zip"], accept_multiple_files=True)
|
| 27 |
+
api_key = st.text_input("OpenAI API Key", type="password")
|
| 28 |
+
|
| 29 |
+
def extract_png_from_zip(zip_file):
|
| 30 |
+
png_images = []
|
| 31 |
+
with zipfile.ZipFile(zip_file) as z:
|
| 32 |
+
for f in z.namelist():
|
| 33 |
+
if f.lower().endswith(".png"):
|
| 34 |
+
with z.open(f) as file:
|
| 35 |
+
image = Image.open(file)
|
| 36 |
+
png_images.append((f, image.copy()))
|
| 37 |
+
return png_images
|
| 38 |
+
|
| 39 |
+
def highlight_diff(original, corrected):
|
| 40 |
+
# ๊ฐ์กฐ ๊ตฌ๋ฌธ ๋จ์ ๋ก์ง
|
| 41 |
+
original_words = original.split()
|
| 42 |
+
corrected_words = corrected.split()
|
| 43 |
+
highlighted_original = []
|
| 44 |
+
highlighted_corrected = []
|
| 45 |
+
|
| 46 |
+
for o, c in zip(original_words, corrected_words):
|
| 47 |
+
if o != c:
|
| 48 |
+
highlighted_original.append(f"[๋นจ]{o}[/๋นจ]")
|
| 49 |
+
highlighted_corrected.append(f"[ํ]{c}[/ํ]")
|
| 50 |
+
else:
|
| 51 |
+
highlighted_original.append(o)
|
| 52 |
+
highlighted_corrected.append(c)
|
| 53 |
+
return " ".join(highlighted_original), " ".join(highlighted_corrected)
|
| 54 |
+
|
| 55 |
+
def add_slide(prs, title_text, image, original, corrected):
|
| 56 |
+
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
| 57 |
+
left = Inches(0.5)
|
| 58 |
+
top = Inches(0.5)
|
| 59 |
+
width = Inches(2.5)
|
| 60 |
+
height = Inches(2)
|
| 61 |
+
|
| 62 |
+
# ์ด๋ฏธ์ง ์ฝ์
|
| 63 |
+
img_path = "/tmp/tempimg.png"
|
| 64 |
+
image.save(img_path)
|
| 65 |
+
slide.shapes.add_picture(img_path, left, top, width=width)
|
| 66 |
+
|
| 67 |
+
# ์ ๋ชฉ
|
| 68 |
+
title_box = slide.shapes.add_textbox(Inches(3.2), Inches(0.5), Inches(6), Inches(0.5))
|
| 69 |
+
title_frame = title_box.text_frame
|
| 70 |
+
title_frame.text = title_text
|
| 71 |
+
|
| 72 |
+
# ํ ์ฝ์
|
| 73 |
+
rows, cols = 2, 2
|
| 74 |
+
table = slide.shapes.add_table(rows, cols, Inches(3.2), Inches(1.2), Inches(6), Inches(2)).table
|
| 75 |
+
table.columns[0].width = Inches(3)
|
| 76 |
+
table.columns[1].width = Inches(3)
|
| 77 |
+
table.cell(0,0).text = "์ธ์๋ ๋ด์ฉ"
|
| 78 |
+
table.cell(0,1).text = "๊ต์ ๋ ๋ด์ฉ"
|
| 79 |
+
|
| 80 |
+
# ๊ฐ์กฐ ํ
์คํธ
|
| 81 |
+
o_text, c_text = highlight_diff(original, corrected)
|
| 82 |
+
table.cell(1,0).text = o_text.replace("[๋นจ]", "").replace("[/๋นจ]", "")
|
| 83 |
+
table.cell(1,1).text = c_text.replace("[ํ]", "").replace("[/ํ]", "")
|
| 84 |
+
|
| 85 |
+
# ์์ ๊ฐ์กฐ ์ ์ฉ
|
| 86 |
+
for i, cell in enumerate([table.cell(1,0), table.cell(1,1)]):
|
| 87 |
+
for p in cell.text_frame.paragraphs:
|
| 88 |
+
for r in p.runs:
|
| 89 |
+
if "[๋นจ]" in o_text and i == 0:
|
| 90 |
+
r.font.color.rgb = RGBColor(255, 0, 0)
|
| 91 |
+
if "[ํ]" in c_text and i == 1:
|
| 92 |
+
r.font.color.rgb = RGBColor(0, 176, 240)
|
| 93 |
+
|
| 94 |
+
if uploaded_files and api_key:
|
| 95 |
+
all_images = []
|
| 96 |
+
for file in uploaded_files:
|
| 97 |
+
if file.name.lower().endswith(".png"):
|
| 98 |
+
image = Image.open(file)
|
| 99 |
+
all_images.append((file.name, image))
|
| 100 |
+
elif file.name.lower().endswith(".zip"):
|
| 101 |
+
all_images += extract_png_from_zip(file)
|
| 102 |
+
|
| 103 |
+
st.success(f"{len(all_images)}๊ฐ ์ด๋ฏธ์ง ๋ถ์ ์์")
|
| 104 |
+
|
| 105 |
+
prs = Presentation()
|
| 106 |
+
|
| 107 |
+
for fname, image in all_images:
|
| 108 |
+
full_text = ""
|
| 109 |
+
if "EasyOCR" in ocr_engines:
|
| 110 |
+
reader = easyocr.Reader(['ko', 'en'])
|
| 111 |
+
result = reader.readtext(np.array(image), detail=0)
|
| 112 |
+
full_text += "
|
| 113 |
+
".join(result)
|
| 114 |
+
if "PaddleOCR" in ocr_engines:
|
| 115 |
+
paddle = PaddleOCR(use_angle_cls=True, lang='korean')
|
| 116 |
+
result = paddle.ocr(np.array(image), cls=True)
|
| 117 |
+
paddle_text = ""
|
| 118 |
+
for line in result[0]:
|
| 119 |
+
paddle_text += line[1][0] + "
|
| 120 |
+
"
|
| 121 |
+
full_text += "
|
| 122 |
+
" + paddle_text
|
| 123 |
+
|
| 124 |
+
openai.api_key = api_key
|
| 125 |
+
prompt = f"๋ค์ ๋ฌธ์ฅ์ ๊ต์ ํด์ค. ๊ธฐ์ค: {', '.join(checklist)}\n\n{full_text}"
|
| 126 |
+
response = openai.ChatCompletion.create(
|
| 127 |
+
model="gpt-4-turbo",
|
| 128 |
+
messages=[{"role": "user", "content": prompt}]
|
| 129 |
+
)
|
| 130 |
+
corrected = response.choices[0].message.content
|
| 131 |
+
add_slide(prs, fname, image, full_text, corrected)
|
| 132 |
+
|
| 133 |
+
pptx_path = "/mnt/data/KAIRO_FINAL_STYLE_v1.pptx"
|
| 134 |
+
prs.save(pptx_path)
|
| 135 |
+
st.download_button("๐ฅ ๊ต์ ๊ฒฐ๊ณผ ๋ค์ด๋ก๋", data=open(pptx_path, "rb").read(), file_name="KAIRO_FINAL_STYLE_v1.pptx")
|
requirements.txt
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
|
| 2 |
streamlit
|
|
|
|
| 3 |
Pillow
|
| 4 |
-
openai
|
| 5 |
python-pptx
|
| 6 |
-
|
| 7 |
-
paddleocr
|
|
|
|
| 1 |
|
| 2 |
streamlit
|
| 3 |
+
pytesseract
|
| 4 |
Pillow
|
|
|
|
| 5 |
python-pptx
|
| 6 |
+
openai
|
|
|