blueradiance commited on
Commit
7ba2b0d
·
verified ·
1 Parent(s): b8dff18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -29
app.py CHANGED
@@ -24,30 +24,6 @@ import re
24
 
25
  TAG_PREFIX = "N"
26
 
27
- # 모델 설정
28
- model_name = "Leo97/KoELECTRA-small-v3-modu-ner"
29
- tokenizer = AutoTokenizer.from_pretrained(model_name)
30
- model = AutoModelForTokenClassification.from_pretrained(model_name)
31
- ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
32
-
33
- # 예외 단어 (태깅 제외)
34
- NAME_ENTITY_EXCEPTIONS = set([
35
- '법적', '사회적', '행정적', '심리적', '의료적', '법률적', '해당', '본인', '소속', '상담'
36
- ])
37
-
38
- def extract_names(text: str) -> list:
39
- """
40
- 🤖 KoELECTRA 기반 NER로 이름 후보 추출 (2글자 이상, PS만)
41
- """
42
- results = ner_pipeline(text)
43
- names = []
44
- for entity in results:
45
- if entity.get("entity_group") == "PS":
46
- name = entity["word"].replace("##", "").strip()
47
- if len(name) >= 2 and name not in NAME_ENTITY_EXCEPTIONS:
48
- names.append(name)
49
- return list(set(names))
50
-
51
  def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str, dict]:
52
  """
53
  🏷 이름 리스트를 태그로 치환: 김철수 → N100
@@ -63,7 +39,7 @@ def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str
63
  for name in names:
64
  tag = f"{TAG_PREFIX}{counter:03d}"
65
  pattern = re.compile(
66
- rf'([\s\(\["\']*){re.escape(name)}([가-힣\s.,;:!?()\[\]"\'"]*)',
67
  re.IGNORECASE
68
  )
69
  tagged_text, n = pattern.subn(tag, tagged_text)
@@ -73,11 +49,19 @@ def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str
73
  return tagged_text, mapping
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
-
78
-
79
-
80
-
81
 
82
  # 📦 PART 2 (Extended & Fixed): 호칭/조사 확장기 + 태그 매핑 보정기 - 특수문자 오류 수정판
83
 
 
24
 
25
  TAG_PREFIX = "N"
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str, dict]:
28
  """
29
  🏷 이름 리스트를 태그로 치환: 김철수 → N100
 
39
  for name in names:
40
  tag = f"{TAG_PREFIX}{counter:03d}"
41
  pattern = re.compile(
42
+ rf'([\s\(\["']*){re.escape(name)}([가-힣\s.,;:!?()\[\]"'"]*)',
43
  re.IGNORECASE
44
  )
45
  tagged_text, n = pattern.subn(tag, tagged_text)
 
49
  return tagged_text, mapping
50
 
51
 
52
+ def replace_institution_keywords(text: str, keywords: list, replace_word: str) -> str:
53
+ """
54
+ 🏢 키워드 기반 기관명 → 치환어로 변경
55
+ """
56
+ for kw in keywords:
57
+ pattern = re.compile(
58
+ rf'([\s\(\["']*){re.escape(kw)}([가-힣\s.,;:!?()\[\]"'"]*)',
59
+ re.IGNORECASE
60
+ )
61
+ text = pattern.sub(lambda m: m.group(1) + replace_word + m.group(2), text)
62
+ return text
63
 
64
+
 
 
 
65
 
66
  # 📦 PART 2 (Extended & Fixed): 호칭/조사 확장기 + 태그 매핑 보정기 - 특수문자 오류 수정판
67