feat: use easyocr and enhance llm prompts
Browse files- app.py +41 -8
- requirements.txt +3 -0
app.py
CHANGED
|
@@ -2,23 +2,25 @@ import json
|
|
| 2 |
import re
|
| 3 |
from typing import Any, Dict, List, Optional, Sequence
|
| 4 |
|
|
|
|
| 5 |
import gradio as gr
|
|
|
|
| 6 |
import torch
|
| 7 |
from PIL import Image, ImageDraw
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 9 |
|
| 10 |
# --- OCR pipeline ---------------------------------------------------------
|
| 11 |
# Use a high-capacity OCR model for better accuracy on prescription labels.
|
| 12 |
-
|
| 13 |
LLM_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 14 |
|
| 15 |
|
| 16 |
def _load_ocr():
|
| 17 |
-
|
| 18 |
-
return
|
| 19 |
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
|
| 24 |
def _load_llm():
|
|
@@ -153,7 +155,23 @@ def parse_fields(raw: str) -> Dict[str, Any]:
|
|
| 153 |
|
| 154 |
|
| 155 |
def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
fields = parse_fields(raw_text)
|
| 158 |
|
| 159 |
warnings: List[str] = []
|
|
@@ -162,7 +180,12 @@ def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
|
|
| 162 |
if not fields["times_per_day"]:
|
| 163 |
warnings.append("1μΌ νμλ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€ (μ: 1μΌ 3ν).")
|
| 164 |
|
| 165 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
|
| 168 |
def render_card(fields: Dict[str, Any]) -> Image.Image:
|
|
@@ -270,8 +293,18 @@ def generate_llm_explanations(output: Dict[str, Any]) -> str:
|
|
| 270 |
"λΉμ μ μ½μ¬ μ μλμ
λλ€. μ΄λ €μ΄ μν μ©μ΄λ₯Ό μ°μ§ λ§κ³ , μ€νμλ μ΄ν΄ν μ μλ λ§ν¬λ‘ μΉμ νκ² μ€λͺ
νμΈμ."
|
| 271 |
)
|
| 272 |
user_prompt = (
|
| 273 |
-
"λ€μμ
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
)
|
| 276 |
|
| 277 |
messages = [
|
|
|
|
| 2 |
import re
|
| 3 |
from typing import Any, Dict, List, Optional, Sequence
|
| 4 |
|
| 5 |
+
import easyocr
|
| 6 |
import gradio as gr
|
| 7 |
+
import numpy as np
|
| 8 |
import torch
|
| 9 |
from PIL import Image, ImageDraw
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 11 |
|
| 12 |
# --- OCR pipeline ---------------------------------------------------------
|
| 13 |
# Use a high-capacity OCR model for better accuracy on prescription labels.
|
| 14 |
+
OCR_LANGS = ["ko", "en"]
|
| 15 |
LLM_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 16 |
|
| 17 |
|
| 18 |
def _load_ocr():
|
| 19 |
+
use_gpu = torch.cuda.is_available()
|
| 20 |
+
return easyocr.Reader(OCR_LANGS, gpu=use_gpu)
|
| 21 |
|
| 22 |
|
| 23 |
+
ocr_reader = _load_ocr()
|
| 24 |
|
| 25 |
|
| 26 |
def _load_llm():
|
|
|
|
| 155 |
|
| 156 |
|
| 157 |
def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
|
| 158 |
+
np_img = np.array(image.convert("RGB"))
|
| 159 |
+
results = ocr_reader.readtext(np_img, detail=1, paragraph=False)
|
| 160 |
+
|
| 161 |
+
segments: List[Dict[str, Any]] = []
|
| 162 |
+
lines: List[str] = []
|
| 163 |
+
for bbox, text, confidence in results:
|
| 164 |
+
cleaned = text.strip()
|
| 165 |
+
if not cleaned:
|
| 166 |
+
continue
|
| 167 |
+
lines.append(cleaned)
|
| 168 |
+
segments.append({
|
| 169 |
+
"text": cleaned,
|
| 170 |
+
"confidence": float(confidence),
|
| 171 |
+
"bbox": bbox,
|
| 172 |
+
})
|
| 173 |
+
|
| 174 |
+
raw_text = "\n".join(lines)
|
| 175 |
fields = parse_fields(raw_text)
|
| 176 |
|
| 177 |
warnings: List[str] = []
|
|
|
|
| 180 |
if not fields["times_per_day"]:
|
| 181 |
warnings.append("1μΌ νμλ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€ (μ: 1μΌ 3ν).")
|
| 182 |
|
| 183 |
+
return {
|
| 184 |
+
"raw_text": raw_text,
|
| 185 |
+
"fields": fields,
|
| 186 |
+
"warnings": warnings,
|
| 187 |
+
"segments": segments,
|
| 188 |
+
}
|
| 189 |
|
| 190 |
|
| 191 |
def render_card(fields: Dict[str, Any]) -> Image.Image:
|
|
|
|
| 293 |
"λΉμ μ μ½μ¬ μ μλμ
λλ€. μ΄λ €μ΄ μν μ©μ΄λ₯Ό μ°μ§ λ§κ³ , μ€νμλ μ΄ν΄ν μ μλ λ§ν¬λ‘ μΉμ νκ² μ€λͺ
νμΈμ."
|
| 294 |
)
|
| 295 |
user_prompt = (
|
| 296 |
+
"λ€μμ μ½λ΄ν¬μμ OCRλ‘ μΆμΆν μ 체 ν
μ€νΈμ
λλ€. μ½ μ΄λ¦κ³Ό λ³΅μ© μ§μλ₯Ό κΈ°λ°μΌλ‘ κ° μ½μ μ 보λ₯Ό μμ£Ό μ½κ² μ λ¦¬ν΄ μ£ΌμΈμ.\n"
|
| 297 |
+
"μꡬ μ¬ν:\n"
|
| 298 |
+
"1. κ° μ½λ§λ€ μλ νλͺ©μ bullet νμμΌλ‘ μμ±ν©λλ€.\n"
|
| 299 |
+
" - μ½ μ΄λ¦: (κ°λ₯νλ©΄ νκΈ/μλ¬Έ λ³κΈ°)\n"
|
| 300 |
+
" - μ΄λ€ μ½μΈμ§ ν μ€ μ€λͺ
\n"
|
| 301 |
+
" - λ³΅μ© μμ: μΈμ , μ΄λ€ μν©μμ 볡μ©νλ©΄ μ’μμ§ μμ\n"
|
| 302 |
+
" - λ³΅μ© λ°©λ² μμ: 1ν μ©λ/ν루 νμκ° μλ€λ©΄ μΈκΈ\n"
|
| 303 |
+
" - λΆμμ© λλ μ£Όμμ¬ν: νν λΆμμ©, νΌν΄μΌ ν νλ\n"
|
| 304 |
+
"2. μ΄λ €μ΄ μν μ©μ΄λ νΌνκ³ , μ€νμλ μ΄ν΄ν μ μλ λ§ν¬λ‘ μμ±ν©λλ€.\n"
|
| 305 |
+
"3. μ½ μ΄λ¦μ νμ€ν λͺ¨λ₯΄λ©΄ βμ΄λ¦ λ―ΈνμΈβμ΄λΌκ³ μ°κ³ , μ½μ¬μκ² νμΈνλΌκ³ μλ΄ν©λλ€.\n"
|
| 306 |
+
"4. λ§μ§λ§ λ¬Έλ¨μ λ°λμ βμ€μ 볡μ½μ μμ¬Β·μ½μ¬μ μ§μλ₯Ό λ°λ₯΄μΈμβ λ¬Έμ₯μ ν¬ν¨νμΈμ.\n"
|
| 307 |
+
f"\nμ½ λͺ©λ‘(μΆμΆ μμ½):\n{context}\n\nOCR μλ¬Έ μ 체:\n{raw_text}\n"
|
| 308 |
)
|
| 309 |
|
| 310 |
messages = [
|
requirements.txt
CHANGED
|
@@ -3,3 +3,6 @@ torch
|
|
| 3 |
gradio
|
| 4 |
Pillow
|
| 5 |
sentencepiece
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
gradio
|
| 4 |
Pillow
|
| 5 |
sentencepiece
|
| 6 |
+
easyocr
|
| 7 |
+
opencv-python-headless
|
| 8 |
+
numpy
|