Spaces:

blueradiance
/

Masking2

Runtime error

App Files Files Community

blueradiance commited on Apr 17, 2025

Commit

7ba2b0d

verified ·

1 Parent(s): b8dff18

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -29

app.py CHANGED Viewed

@@ -24,30 +24,6 @@ import re
 TAG_PREFIX = "N"
-# 모델 설정
-model_name = "Leo97/KoELECTRA-small-v3-modu-ner"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForTokenClassification.from_pretrained(model_name)
-ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
-# 예외 단어 (태깅 제외)
-NAME_ENTITY_EXCEPTIONS = set([
-    '법적', '사회적', '행정적', '심리적', '의료적', '법률적', '해당', '본인', '소속', '상담'
-])
-def extract_names(text: str) -> list:
-    """
-    🤖 KoELECTRA 기반 NER로 이름 후보 추출 (2글자 이상, PS만)
-    """
-    results = ner_pipeline(text)
-    names = []
-    for entity in results:
-        if entity.get("entity_group") == "PS":
-            name = entity["word"].replace("##", "").strip()
-            if len(name) >= 2 and name not in NAME_ENTITY_EXCEPTIONS:
-                names.append(name)
-    return list(set(names))
 def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str, dict]:
     """
     🏷 이름 리스트를 태그로 치환: 김철수 → N100
@@ -63,7 +39,7 @@ def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str
     for name in names:
         tag = f"{TAG_PREFIX}{counter:03d}"
         pattern = re.compile(
-            rf'([\s\(\["\']*){re.escape(name)}([가-힣\s.,;:!?()\[\]"\'"]*)',
             re.IGNORECASE
         )
         tagged_text, n = pattern.subn(tag, tagged_text)
@@ -73,11 +49,19 @@ def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str
     return tagged_text, mapping
 # 📦 PART 2 (Extended & Fixed): 호칭/조사 확장기 + 태그 매핑 보정기 - 특수문자 오류 수정판

 TAG_PREFIX = "N"
 def apply_name_tags(text: str, names: list, start_index: int = 100) -> tuple[str, dict]:
     """
     🏷 이름 리스트를 태그로 치환: 김철수 → N100
     for name in names:
         tag = f"{TAG_PREFIX}{counter:03d}"
         pattern = re.compile(
+            rf'([\s\(\["']*){re.escape(name)}([가-힣\s.,;:!?()\[\]"'"]*)',
             re.IGNORECASE
         )
         tagged_text, n = pattern.subn(tag, tagged_text)
     return tagged_text, mapping
+def replace_institution_keywords(text: str, keywords: list, replace_word: str) -> str:
+    """
+    🏢 키워드 기반 기관명 → 치환어로 변경
+    """
+    for kw in keywords:
+        pattern = re.compile(
+            rf'([\s\(\["']*){re.escape(kw)}([가-힣\s.,;:!?()\[\]"'"]*)',
+            re.IGNORECASE
+        )
+        text = pattern.sub(lambda m: m.group(1) + replace_word + m.group(2), text)
+    return text
 # 📦 PART 2 (Extended & Fixed): 호칭/조사 확장기 + 태그 매핑 보정기 - 특수문자 오류 수정판