blueradiance commited on
Commit
b8dff18
·
verified ·
1 Parent(s): e54ebf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -11
app.py CHANGED
@@ -1,4 +1,22 @@
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  # 📦 PART 1: 이름 추출기 + 태그 치환기
3
 
4
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
@@ -39,17 +57,21 @@ def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str
39
  tagged_text = text
40
  counter = start_index
41
 
42
- # ✅ 긴 이름 우선 정렬
43
- names = sorted(set(names), key=len, reverse=True)
44
-
45
- for name in names:
46
- tag = f"{TAG_PREFIX}{counter:03d}"
47
- pattern = re.compile(rf'([\s\(\["\']*){re.escape(name)}([가-힣\s.,;:!?()\[\]"\'"]*)', re.IGNORECASE)
48
- tagged_text, n = pattern.subn(tag, tagged_text)
49
- if n > 0:
50
- mapping[tag] = name
51
- counter += 1
52
- return tagged_text, mapping
 
 
 
 
53
 
54
 
55
 
 
1
 
2
+ # 🚫 스마트 따옴표 제거기 - 런타임에서 1회 실행됨
3
+ def clean_smart_quotes_inplace():
4
+ import os
5
+ path = os.path.abspath(__file__)
6
+ with open(path, "r", encoding="utf-8") as f:
7
+ content = f.read()
8
+
9
+ for wrong, right in {"‘": "'", "’": "'", "“": '"', "”": '"'}.items():
10
+ content = content.replace(wrong, right)
11
+
12
+ with open(path, "w", encoding="utf-8") as f:
13
+ f.write(content)
14
+
15
+ # 🔥 단 한 번 실행 (다시 실행되면 의미 없음)
16
+ clean_smart_quotes_inplace()
17
+
18
+
19
+
20
  # 📦 PART 1: 이름 추출기 + 태그 치환기
21
 
22
  from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
 
57
  tagged_text = text
58
  counter = start_index
59
 
60
+ # ✅ 긴 이름 우선 정렬
61
+ names = sorted(set(names), key=len, reverse=True)
62
+
63
+ for name in names:
64
+ tag = f"{TAG_PREFIX}{counter:03d}"
65
+ pattern = re.compile(
66
+ rf'([\s\(\["\']*){re.escape(name)}([가-힣\s.,;:!?()\[\]"\'"]*)',
67
+ re.IGNORECASE
68
+ )
69
+ tagged_text, n = pattern.subn(tag, tagged_text)
70
+ if n > 0:
71
+ mapping[tag] = name
72
+ counter += 1
73
+ return tagged_text, mapping
74
+
75
 
76
 
77