File size: 2,752 Bytes
119f7b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import re

model_name = "ckiplab/bert-base-chinese-ner"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")


def extract_conditions(text):
    print("🧠 開始分析文字內容...")
    entities = ner(text)
    print("🧠 模型辨識到:", entities)

    result = {}
    merged_text = text.replace(" ", "")

    for i, e in enumerate(entities):
        word = e["word"]
        label = e["entity_group"]
        start = e["start"]
        end = e["end"]

        # 數字擷取
        nums = re.findall(r"\d+", word)
        if not nums:
            continue
        num = int(nums[0])

        # 百分比類型(建蔽率/容積率)
        if "%" in word or label == "PERCENT":
            # 找出目前百分比的實際位置
            percent_pos = start
            context = merged_text[max(0, start-10):start]

            # 往回搜尋最近出現的關鍵詞
            context_text = merged_text[:percent_pos]
            nearest = ""

            if "容積獎勵" in context_text:
                nearest = "容積獎勵"
            elif "容積率" in context_text:
                nearest = "容積率"
            elif "建蔽率" in context_text:
                nearest = "建蔽率"

            if nearest == "建蔽率":
                print(f"🏗️ 建蔽率:{num}%")
                result["BCR"] = num
            elif nearest == "容積率":
                print(f"🏗️ 容積率:{num}%")
                result["FAR"] = num
            elif nearest == "容積獎勵":
                print(f"🎁 容積獎勵:{num}%")
                result["bonus_far"] = num

        # 面積類型
        elif label == "QUANTITY":
            # 將拆開的單位合併
            next_token = entities[i+1]["word"] if i + 1 < len(entities) else ""
            suffix_candidate = (word + next_token).replace(" ", "").replace("尺", "尺")
            
            unit_keywords = ["坪", "平方公尺", "㎡", "m2", "m²"]
            if any(u in suffix_candidate for u in unit_keywords):
                if "坪" in suffix_candidate:
                    sqm = round(num * 3.3058)
                    print(f"📏 偵測到 {num} 坪 → {sqm} m²")
                    result["site_area"] = sqm
                else:
                    print(f"📏 偵測到 {num} 平方公尺")
                    result["site_area"] = num

    print("🧠 NLP 萃取結果:", result)
    return result