LLDDWW commited on
Commit
c103d7c
Β·
1 Parent(s): 0e757ea

feat: use easyocr and enhance llm prompts

Browse files
Files changed (2) hide show
  1. app.py +41 -8
  2. requirements.txt +3 -0
app.py CHANGED
@@ -2,23 +2,25 @@ import json
2
  import re
3
  from typing import Any, Dict, List, Optional, Sequence
4
 
 
5
  import gradio as gr
 
6
  import torch
7
  from PIL import Image, ImageDraw
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
9
 
10
  # --- OCR pipeline ---------------------------------------------------------
11
  # Use a high-capacity OCR model for better accuracy on prescription labels.
12
- OCR_MODEL_ID = "microsoft/trocr-large-printed"
13
  LLM_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
14
 
15
 
16
  def _load_ocr():
17
- device = 0 if torch.cuda.is_available() else -1
18
- return pipeline("image-to-text", model=OCR_MODEL_ID, device=device)
19
 
20
 
21
- ocr = _load_ocr()
22
 
23
 
24
  def _load_llm():
@@ -153,7 +155,23 @@ def parse_fields(raw: str) -> Dict[str, Any]:
153
 
154
 
155
  def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
156
- raw_text = ocr(image)[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  fields = parse_fields(raw_text)
158
 
159
  warnings: List[str] = []
@@ -162,7 +180,12 @@ def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
162
  if not fields["times_per_day"]:
163
  warnings.append("1일 횟수λ₯Ό μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€ (예: 1일 3회).")
164
 
165
- return {"raw_text": raw_text, "fields": fields, "warnings": warnings}
 
 
 
 
 
166
 
167
 
168
  def render_card(fields: Dict[str, Any]) -> Image.Image:
@@ -270,8 +293,18 @@ def generate_llm_explanations(output: Dict[str, Any]) -> str:
270
  "당신은 약사 μ„ μƒλ‹˜μž…λ‹ˆλ‹€. μ–΄λ €μš΄ μ˜ν•™ μš©μ–΄λ₯Ό μ“°μ§€ 말고, 쀑학생도 이해할 수 μžˆλŠ” 말투둜 μΉœμ ˆν•˜κ²Œ μ„€λͺ…ν•˜μ„Έμš”."
271
  )
272
  user_prompt = (
273
- "λ‹€μŒμ€ μ•½λ΄‰νˆ¬ OCR κ²°κ³Όμž…λ‹ˆλ‹€. μ•½ 이름과 μš©λŸ‰ 정보λ₯Ό μ°Έκ³ ν•΄ 각 μ•½μ˜ 역할을 μ‰½κ²Œ μ„€λͺ…ν•˜κ³ , μ–Έμ œ λ³΅μš©ν•˜λ©΄ 쒋은지 μ˜ˆμ‹œ, μ£Όμ˜μ‚¬ν•­μ„ bullet둜 정리해 μ£Όμ„Έμš”.\n"
274
- f"μ•½ λͺ©λ‘:\n{context}\n\nOCR 원문:\n{raw_text}\n\n좜λ ₯ ν˜•μ‹:\n- μ•½ 이름: ...\n - ν•œ 쀄 μ„€λͺ…\n - μ˜ˆμ‹œ 상황\n - μ£Όμ˜ν•  점\nλ§ˆμ§€λ§‰μ—λŠ” μ˜λ£Œμ§„ 볡약 μ§€μ‹œλ₯Ό λ°˜λ“œμ‹œ 따라야 ν•œλ‹€λŠ” λ¬Έμž₯을 덧뢙여 μ£Όμ„Έμš”."
 
 
 
 
 
 
 
 
 
 
275
  )
276
 
277
  messages = [
 
2
  import re
3
  from typing import Any, Dict, List, Optional, Sequence
4
 
5
+ import easyocr
6
  import gradio as gr
7
+ import numpy as np
8
  import torch
9
  from PIL import Image, ImageDraw
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
11
 
12
  # --- OCR pipeline ---------------------------------------------------------
13
  # Use a high-capacity OCR model for better accuracy on prescription labels.
14
+ OCR_LANGS = ["ko", "en"]
15
  LLM_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
16
 
17
 
18
  def _load_ocr():
19
+ use_gpu = torch.cuda.is_available()
20
+ return easyocr.Reader(OCR_LANGS, gpu=use_gpu)
21
 
22
 
23
+ ocr_reader = _load_ocr()
24
 
25
 
26
  def _load_llm():
 
155
 
156
 
157
  def ocr_and_parse(image: Image.Image) -> Dict[str, Any]:
158
+ np_img = np.array(image.convert("RGB"))
159
+ results = ocr_reader.readtext(np_img, detail=1, paragraph=False)
160
+
161
+ segments: List[Dict[str, Any]] = []
162
+ lines: List[str] = []
163
+ for bbox, text, confidence in results:
164
+ cleaned = text.strip()
165
+ if not cleaned:
166
+ continue
167
+ lines.append(cleaned)
168
+ segments.append({
169
+ "text": cleaned,
170
+ "confidence": float(confidence),
171
+ "bbox": bbox,
172
+ })
173
+
174
+ raw_text = "\n".join(lines)
175
  fields = parse_fields(raw_text)
176
 
177
  warnings: List[str] = []
 
180
  if not fields["times_per_day"]:
181
  warnings.append("1일 횟수λ₯Ό μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€ (예: 1일 3회).")
182
 
183
+ return {
184
+ "raw_text": raw_text,
185
+ "fields": fields,
186
+ "warnings": warnings,
187
+ "segments": segments,
188
+ }
189
 
190
 
191
  def render_card(fields: Dict[str, Any]) -> Image.Image:
 
293
  "당신은 약사 μ„ μƒλ‹˜μž…λ‹ˆλ‹€. μ–΄λ €μš΄ μ˜ν•™ μš©μ–΄λ₯Ό μ“°μ§€ 말고, 쀑학생도 이해할 수 μžˆλŠ” 말투둜 μΉœμ ˆν•˜κ²Œ μ„€λͺ…ν•˜μ„Έμš”."
294
  )
295
  user_prompt = (
296
+ "λ‹€μŒμ€ μ•½λ΄‰νˆ¬μ—μ„œ OCR둜 μΆ”μΆœν•œ 전체 ν…μŠ€νŠΈμž…λ‹ˆλ‹€. μ•½ 이름과 볡용 μ§€μ‹œλ₯Ό 기반으둜 각 μ•½μ˜ 정보λ₯Ό μ•„μ£Ό μ‰½κ²Œ 정리해 μ£Όμ„Έμš”.\n"
297
+ "μš”κ΅¬ 사항:\n"
298
+ "1. 각 μ•½λ§ˆλ‹€ μ•„λž˜ ν•­λͺ©μ„ bullet ν˜•μ‹μœΌλ‘œ μž‘μ„±ν•©λ‹ˆλ‹€.\n"
299
+ " - μ•½ 이름: (κ°€λŠ₯ν•˜λ©΄ ν•œκΈ€/영문 병기)\n"
300
+ " - μ–΄λ–€ 약인지 ν•œ 쀄 μ„€λͺ…\n"
301
+ " - 볡용 μ˜ˆμ‹œ: μ–Έμ œ, μ–΄λ–€ μƒν™©μ—μ„œ λ³΅μš©ν•˜λ©΄ 쒋은지 μ˜ˆμ‹œ\n"
302
+ " - 볡용 방법 μ˜ˆμ‹œ: 1회 μš©λŸ‰/ν•˜λ£¨ νšŸμˆ˜κ°€ μžˆλ‹€λ©΄ μ–ΈκΈ‰\n"
303
+ " - λΆ€μž‘μš© λ˜λŠ” μ£Όμ˜μ‚¬ν•­: ν”ν•œ λΆ€μž‘μš©, ν”Όν•΄μ•Ό ν•  행동\n"
304
+ "2. μ–΄λ €μš΄ μ˜ν•™ μš©μ–΄λŠ” ν”Όν•˜κ³ , 쀑학생도 이해할 수 μžˆλŠ” 말투둜 μž‘μ„±ν•©λ‹ˆλ‹€.\n"
305
+ "3. μ•½ 이름을 ν™•μ‹€νžˆ λͺ¨λ₯΄λ©΄ β€˜μ΄λ¦„ 미확인’이라고 μ“°κ³ , μ•½μ‚¬μ—κ²Œ ν™•μΈν•˜λΌκ³  μ•ˆλ‚΄ν•©λ‹ˆλ‹€.\n"
306
+ "4. λ§ˆμ§€λ§‰ 문단에 λ°˜λ“œμ‹œ β€˜μ‹€μ œ 볡약은 μ˜μ‚¬Β·μ•½μ‚¬μ˜ μ§€μ‹œλ₯Ό λ”°λ₯΄μ„Έμš”’ λ¬Έμž₯을 ν¬ν•¨ν•˜μ„Έμš”.\n"
307
+ f"\nμ•½ λͺ©λ‘(μΆ”μΆœ μš”μ•½):\n{context}\n\nOCR 원문 전체:\n{raw_text}\n"
308
  )
309
 
310
  messages = [
requirements.txt CHANGED
@@ -3,3 +3,6 @@ torch
3
  gradio
4
  Pillow
5
  sentencepiece
 
 
 
 
3
  gradio
4
  Pillow
5
  sentencepiece
6
+ easyocr
7
+ opencv-python-headless
8
+ numpy