deneve07 commited on
Commit
208e33a
·
verified ·
1 Parent(s): 7c8bbb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +225 -199
app.py CHANGED
@@ -1,254 +1,278 @@
 
1
  import re
2
- import requests
3
  import datetime
 
4
  from urllib.parse import quote
5
  from bs4 import BeautifulSoup
6
  from playwright.sync_api import sync_playwright
7
  from curl_cffi import requests as curl_req
8
  import gradio as gr
9
- import os
10
 
11
  os.system("playwright install chromium")
12
 
13
- def translate_en_to_ja(text):
 
 
 
14
  try:
15
- url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl=ja&dt=t&q={quote(text)}"
16
  res = requests.get(url, timeout=5)
17
- if res.status_code == 200:
18
- return res.json()[0][0][0].strip()
19
- except Exception as e:
20
- return f"翻譯失敗: {e}"
21
  return text
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # ==========================================
24
- # 🇺🇸 美國 FDA Orange Book (改用 curl_cffi 突破防火牆)
25
  # ==========================================
26
- def get_usa_originator(ingredient_query):
27
- log = []
28
- brands = set()
29
- companies = set()
30
-
31
  try:
32
- log.append("1. 使用 curl_cffi 偽裝成 Chrome 120 發送請求...")
33
  session = curl_req.Session(impersonate="chrome120")
34
- url = "https://www.accessdata.fda.gov/scripts/cder/ob/search_product.cfm"
35
-
36
- # 根據您提供的 HTML,直接建構表單 Payload
37
- payload = {
38
- "drugname": ingredient_query,
39
- "discontinued": "RX,OTC,DISCN",
40
- "submit": "Search"
41
- }
42
- res = session.post(url, data=payload, timeout=30, verify=False)
43
-
44
- log.append("2. 成功取得 FDA 伺服器回應,解析表格...")
45
  soup = BeautifulSoup(res.text, 'html.parser')
46
  table = soup.find('table', id='example')
47
-
48
  if table:
49
  headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
50
- brand_idx = next((i for i, h in enumerate(headers) if 'proprietary name' in h), 2)
51
- rld_idx = next((i for i, h in enumerate(headers) if 'rld' in h), 8)
52
- mfg_idx = next((i for i, h in enumerate(headers) if 'applicant holder' in h), 10)
53
-
54
- rows = table.find('tbody').find_all('tr') if table.find('tbody') else table.find_all('tr')
55
- for tr in rows:
56
  tds = tr.find_all('td')
57
- if len(tds) > max(rld_idx, brand_idx):
58
- if "RLD" in tds[rld_idx].get_text(strip=True).upper():
59
- brands.add(tds[brand_idx].get_text(strip=True))
60
- if len(tds) > mfg_idx:
61
- companies.add(tds[mfg_idx].get_text(strip=True))
62
-
63
- if brands:
64
- log.append("✅ 成功找到 RLD 原廠藥!")
65
- return ", ".join(brands), ", ".join(companies), "\n".join(log)
66
- else:
67
- log.append("❌ 表格中未發現 RLD,或該藥品尚未核准。")
68
- return "查無原廠", "-", "\n".join(log)
69
-
70
- except Exception as e:
71
- log.append(f"❌ 發生錯誤: {str(e)}")
72
- return "執行失敗", "-", "\n".join(log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  # ==========================================
75
- # 🇬🇧 英國 eMC
76
  # ==========================================
77
- def get_uk_originator(ingredient_query, page):
78
- log = []
79
- brands = set()
80
- companies = set()
81
-
82
  try:
83
- log.append("1. 前往 eMC 搜尋頁面...")
84
- page.goto(f"https://www.medicines.org.uk/emc/search?q={ingredient_query}", timeout=30000)
85
  page.wait_for_selector('.search-results-product-info-title-link', timeout=15000)
86
-
87
  soup = BeautifulSoup(page.content(), 'html.parser')
88
- links = soup.find_all('a', class_='search-results-product-info-title-link')
89
- for link in links:
90
- raw_title = link.get_text(strip=True)
91
- if not raw_title.lower().startswith(ingredient_query.lower()):
92
- clean_brand = re.split(r'\s+\d', raw_title)[0].strip()
93
- brands.add(clean_brand)
94
- parent_div = link.find_parent(class_='search-results-product-info')
95
- if parent_div:
96
- comp_tag = parent_div.find(class_='search-results-product-info-company')
97
- if comp_tag: companies.add(comp_tag.get_text(strip=True))
98
-
99
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功找到原廠藥!"
100
  return "查無原廠", "-", "❌ 皆以成分名開頭"
101
  except Exception as e: return "執行失敗", "-", str(e)
102
 
103
- # ==========================================
104
- # 🇨🇦 加拿大 DPD (修正 domcontentloaded 避免超時)
105
- # ==========================================
106
- def get_canada_originator(ingredient_query, page):
107
- log = []
108
- generic_companies = ['apotex', 'teva', 'sandoz', 'jamp', 'mint', 'pharmascience', 'sanis', 'sivem', 'auro', 'glenmark', 'taro', 'marcan', 'nora', 'mantra', 'reddy']
109
-
110
  try:
111
- log.append("1. 前往 Canada DPD (放寬載入條件)...")
112
- # 🟢 修正:使用 domcontentloaded 且放寬到 45 ��
113
  page.goto("https://health-products.canada.ca/dpd-bdpp/index-eng.jsp", timeout=45000, wait_until="domcontentloaded")
114
- page.locator('input[id="activeIngredient"]').fill(ingredient_query)
115
  page.keyboard.press("Enter")
116
-
117
- log.append("2. 等待結果表格...")
118
  page.wait_for_selector('table#results', timeout=15000)
119
- soup = BeautifulSoup(page.content(), 'html.parser')
120
- table = soup.find('table', id='results')
121
 
122
- if not table or not table.find('tbody'):
123
- return "查無資料", "-", "\n".join(log)
124
-
125
- rows = table.find('tbody').find_all('tr')
126
- all_candidates = []
127
 
128
- for tr in rows:
129
  tds = tr.find_all('td')
130
  if len(tds) >= 4:
131
- comp_name = tds[2].get_text(strip=True)
132
- if any(gc in comp_name.lower() for gc in generic_companies): continue
133
-
134
- din_match = re.search(r'\d+', tds[1].get_text(strip=True))
135
- if din_match:
136
- link_tag = tds[1].find('a')
137
- if link_tag:
138
- all_candidates.append({
139
- "company": comp_name,
140
- "product": tds[3].get_text(strip=True),
141
- "din": int(din_match.group()),
142
- "url": "https://health-products.canada.ca" + link_tag['href']
143
- })
144
-
145
- if not all_candidates:
146
- return "查無原廠", "-", "❌ 剩下的全為學名藥廠。"
147
-
148
- all_candidates = sorted(all_candidates, key=lambda x: x['din'])
149
- originator_company = all_candidates[0]['company']
150
- log.append(f"3. 依 DIN 排序後,鎖定最古老藥廠: {originator_company}")
151
 
152
- try:
153
- page.goto(all_candidates[0]['url'], timeout=15000)
154
- detail_soup = BeautifulSoup(page.content(), 'html.parser')
155
- strong_tag = detail_soup.find(lambda tag: tag.name == "strong" and "Original market date" in tag.get_text(strip=True))
156
- if strong_tag:
157
- parent_row = strong_tag.find_parent('div', class_='row')
158
- if parent_row:
159
- date_str = parent_row.find('p', class_='col-sm-8').get_text(strip=True)
160
- log.append(f"✅ 上市日: {date_str}")
161
- except: pass
162
-
163
- final_brands = set([c['product'] for c in all_candidates if c['company'] == originator_company])
164
- return ", ".join(final_brands), originator_company, "\n".join(log)
165
-
166
  except Exception as e: return "執行失敗", "-", str(e)
167
 
168
- # ==========================================
169
- # 🇯🇵 日本 PMDA (依據真實 HTML 精準定位)
170
- # ==========================================
171
- def get_japan_originator(ingredient_query_ja, page):
172
- log = []
173
- brands = set()
174
- companies = set()
175
-
176
- log.append(f"使用日文名: {ingredient_query_ja} 進行搜尋")
177
  try:
178
  page.goto("https://www.pmda.go.jp/PmdaSearch/iyakuSearch/", timeout=30000, wait_until="domcontentloaded")
179
-
180
  try:
181
- agree_btn = page.locator('text=同意する, input[value="同意する"], a:has-text("同意する")').first
182
- if agree_btn.is_visible(timeout=3000): agree_btn.click(); page.wait_for_load_state('networkidle')
183
  except: pass
184
-
185
- log.append("1. 尋找輸入框並送出...")
186
- # 🟢 修正:使用您提供的 id="txtName" 與 name="nameWord"
187
- page.locator('input#txtName, input[name="nameWord"]').first.fill(ingredient_query_ja)
188
- # 🟢 修正:使用您提供的 name="btnA" 與 type="image"
189
- page.locator('input[name="btnA"], input[type="image"][src*="SearchBtn"]').first.click()
190
 
191
- log.append("2. 等待表格 id=ResultList...")
 
192
  page.wait_for_selector('table#ResultList', timeout=15000)
193
-
194
- soup = BeautifulSoup(page.content(), 'html.parser')
195
- table = soup.find('table', id='ResultList')
196
 
197
- if table:
198
- rows = table.find_all('tr')
199
- for tr in rows:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  tds = tr.find_all('td')
201
- if len(tds) >= 3:
202
- raw_brand = tds[1].get_text(strip=True)
203
- if '「' not in raw_brand and '(' not in raw_brand and ingredient_query_ja not in raw_brand:
204
- clean_brand = re.split(r'(皮下注|錠|カプセル|顆粒|シロップ|OD|細粒|液|\d+)', raw_brand)[0].strip()
205
- if clean_brand:
206
- brands.add(clean_brand)
207
- comp = tds[2].get_text(separator=" ", strip=True).replace('製造販売元/', '')
208
- companies.add(comp)
209
-
210
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功找到原廠!"
211
- return "查無原廠", "-", "❌ 皆為學名藥括號"
212
-
213
  except Exception as e: return "執行失敗", "-", str(e)
214
 
215
  # ==========================================
216
- # 🚀 主執行函數 (美國使用 curl_cffi,其他使用 Playwright 分頁隔離)
217
  # ==========================================
218
- def run_diagnostic_search(ingredient_en, ingredient_ja_manual):
219
- if not ingredient_en:
220
- return [["錯誤", "請輸入英文成分名", "-", ""]]
221
-
222
- ingredient_ja = ingredient_ja_manual if ingredient_ja_manual else translate_en_to_ja(ingredient_en)
223
-
224
  results = []
 
 
 
 
225
 
226
- # 🇺🇸 美國:獨立使用 curl_cffi 執行,完全不受 Playwright 影響
227
- us_b, us_c, us_log = get_usa_originator(ingredient_en)
228
- results.append(["🇺🇸 美國 (FDA)", us_b, us_c, us_log])
 
 
229
 
230
- # 🇬🇧🇨🇦🇯🇵 英加日:使用 Playwright 執行
231
  with sync_playwright() as p:
232
  browser = p.chromium.launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
233
- context = browser.new_context(
234
- user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
235
- )
236
 
237
- page_uk = context.new_page()
238
- uk_b, uk_c, uk_log = get_uk_originator(ingredient_en, page_uk)
239
- page_uk.close()
240
- results.append(["🇬🇧 英國 (eMC)", uk_b, uk_c, uk_log])
241
-
242
- page_ca = context.new_page()
243
- ca_b, ca_c, ca_log = get_canada_originator(ingredient_en, page_ca)
244
- page_ca.close()
245
- results.append(["🇨🇦 加拿大 (DPD)", ca_b, ca_c, ca_log])
246
-
247
- page_ja = context.new_page()
248
- ja_b, ja_c, ja_log = get_japan_originator(ingredient_ja, page_ja)
249
- page_ja.close()
250
- results.append(["🇯🇵 日本 (PMDA)", ja_b, ja_c, ja_log])
251
 
 
 
 
 
 
 
 
 
252
  browser.close()
253
 
254
  return results
@@ -256,22 +280,24 @@ def run_diagnostic_search(ingredient_en, ingredient_ja_manual):
256
  # ==========================================
257
  # 🎨 UI 介面
258
  # ==========================================
259
- with gr.Blocks(title="國原廠智能檢索 (抗防護終極版)") as demo:
260
- gr.Markdown("## 🌐 跨國原廠商品名檢索器 (支援多重商品名與防爬蟲突破)")
261
 
262
  with gr.Row():
263
- ing_input = gr.Textbox(label="🧪 英文成分名 (必填)", placeholder="例如: Semaglutide")
264
- ja_input = gr.Textbox(label="🇯🇵 日文成分名 (選填)", placeholder="例如: セマグルチド (若空白則自動翻譯)")
265
-
266
- search_btn = gr.Button("🚀 啟動查詢", variant="primary")
 
 
 
267
 
268
  result_table = gr.Dataframe(
269
- headers=["國家", "🌟 判定為原廠的商品名", "🏭 藥廠名稱", "🛠️ 系統執行診斷日誌"],
270
  datatype=["str", "str", "str", "str"],
271
- wrap=True,
272
- interactive=False
273
  )
274
- search_btn.click(fn=run_diagnostic_search, inputs=[ing_input, ja_input], outputs=[result_table])
275
 
276
  if __name__ == "__main__":
277
  demo.launch()
 
1
+ import os
2
  import re
 
3
  import datetime
4
+ import requests
5
  from urllib.parse import quote
6
  from bs4 import BeautifulSoup
7
  from playwright.sync_api import sync_playwright
8
  from curl_cffi import requests as curl_req
9
  import gradio as gr
 
10
 
11
  os.system("playwright install chromium")
12
 
13
+ # ==========================================
14
+ # 🛠️ 共用工具:翻譯與學名藥濾網
15
+ # ==========================================
16
+ def translate_lang(text, target_lang):
17
  try:
18
+ url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={target_lang}&dt=t&q={quote(text)}"
19
  res = requests.get(url, timeout=5)
20
+ if res.status_code == 200: return res.json()[0][0][0].strip()
21
+ except: pass
 
 
22
  return text
23
 
24
+ def is_generic(brand_name, company_name, ingredient):
25
+ """通用學名藥濾網:排除成分名開頭或常見學名藥廠"""
26
+ b_lower = brand_name.lower()
27
+ c_lower = company_name.lower()
28
+ i_lower = ingredient.lower()
29
+
30
+ generic_keywords = ['sandoz', 'teva', 'apotex', 'ratiopharm', 'jamp', 'mint', 'pharmascience', 'sanis', 'sivem',
31
+ 'auro', 'glenmark', 'taro', 'marcan', 'nora', 'mantra', 'reddy', 'mepha', 'axapharm',
32
+ 'helvepharm', 'zentiva', 'spirig', 'aliud', 'puren', 'stada', 'eg ', '- gé']
33
+
34
+ if b_lower.startswith(i_lower) or i_lower in b_lower: return True
35
+ if any(gk in b_lower or gk in c_lower for gk in generic_keywords): return True
36
+ if '「' in brand_name or '(' in brand_name: return True # 日本學名藥特徵
37
+ return False
38
+
39
+ def clean_brand_name(raw_name):
40
+ """切除劑量、劑型等後綴,保留純商品名"""
41
+ return re.split(r'(皮下注|錠|カプセル|顆粒|シロップ|OD|細粒|液|\d+)', raw_name)[0].replace('®', '').strip()
42
+
43
  # ==========================================
44
+ # 🚀 模組 A:使用 curl_cffi 抓取 (美、比、法)
45
  # ==========================================
46
+ def get_usa_originator(ingredient):
47
+ log, brands, companies = [], set(), set()
 
 
 
48
  try:
 
49
  session = curl_req.Session(impersonate="chrome120")
50
+ payload = {"drugname": ingredient, "discontinued": "RX,OTC,DISCN", "submit": "Search"}
51
+ res = session.post("https://www.accessdata.fda.gov/scripts/cder/ob/search_product.cfm", data=payload, timeout=30, verify=False)
 
 
 
 
 
 
 
 
 
52
  soup = BeautifulSoup(res.text, 'html.parser')
53
  table = soup.find('table', id='example')
 
54
  if table:
55
  headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
56
+ brand_idx, rld_idx, mfg_idx = 2, 8, 10
57
+ for tr in (table.find('tbody') or table).find_all('tr'):
 
 
 
 
58
  tds = tr.find_all('td')
59
+ if len(tds) > max(rld_idx, brand_idx) and "RLD" in tds[rld_idx].get_text(strip=True).upper():
60
+ brands.add(tds[brand_idx].get_text(strip=True))
61
+ if len(tds) > mfg_idx: companies.add(tds[mfg_idx].get_text(strip=True))
62
+ if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功找到 RLD"
63
+ return "查無原廠", "-", "❌ 查無 RLD 或未核准"
64
+ except Exception as e: return "執行失敗", "-", str(e)
65
+
66
+ def get_belgium_originator(ingredient):
67
+ log, brands = [], set()
68
+ try:
69
+ session = curl_req.Session(impersonate="chrome120")
70
+ calc_id, ds = "5b55a713-dc8e-46bd-a82a-cea4641fb7b1", "c9852b22-1d58-4471-b8cc-a821763e821e"
71
+ url = f"https://www.test-achats.be/CalculatorsAPI/Medicine/GetSearchResults/{calc_id}?datasource={ds}&term={quote(ingredient)}"
72
+ res = session.get(url, headers={"X-Requested-With": "XMLHttpRequest"}, timeout=30, verify=False)
73
+ if res.status_code == 200:
74
+ for item in res.json():
75
+ label = item.get('label', '')
76
+ if not is_generic(label, "", ingredient): brands.add(clean_brand_name(label))
77
+ if brands: return ", ".join(brands), "Test-achats 無藥廠資料", "✅ API 抓取成功"
78
+ return "查無原廠", "-", "❌ 皆為學名藥"
79
+ except Exception as e: return "執行失敗", "-", str(e)
80
+
81
+ def get_france_originator(ingredient):
82
+ brands = set()
83
+ try:
84
+ session = curl_req.Session(impersonate="chrome120")
85
+ res = session.get(f"https://base-donnees-publique.medicaments.gouv.fr/medicament/recherche/resultat?contains={quote(ingredient)}", timeout=30, verify=False)
86
+ soup = BeautifulSoup(res.text, 'html.parser')
87
+ for a in soup.find_all('a', class_='standart_titre') or soup.find_all('h3'):
88
+ title = a.get_text(strip=True)
89
+ if not is_generic(title, "", ingredient) and '- gé' not in title.lower():
90
+ brands.add(clean_brand_name(title))
91
+ if brands: return ", ".join(brands), "需進階解析", "✅ 成功找到非 Gé 藥品"
92
+ return "查無原廠", "-", "❌ 查無資料或皆為 Gé"
93
+ except Exception as e: return "執行失敗", "-", str(e)
94
 
95
  # ==========================================
96
+ # 🚀 模組 B:使用 Playwright 抓取 (英、加、日、澳、瑞、德、瑞典)
97
  # ==========================================
98
+ def get_uk_originator(ingredient, page):
99
+ brands, companies = set(), set()
 
 
 
100
  try:
101
+ page.goto(f"https://www.medicines.org.uk/emc/search?q={ingredient}", timeout=30000)
 
102
  page.wait_for_selector('.search-results-product-info-title-link', timeout=15000)
 
103
  soup = BeautifulSoup(page.content(), 'html.parser')
104
+ for link in soup.find_all('a', class_='search-results-product-info-title-link'):
105
+ title = link.get_text(strip=True)
106
+ if not title.lower().startswith(ingredient.lower()):
107
+ brands.add(clean_brand_name(title))
108
+ p_div = link.find_parent(class_='search-results-product-info')
109
+ if p_div and p_div.find(class_='search-results-product-info-company'):
110
+ companies.add(p_div.find(class_='search-results-product-info-company').get_text(strip=True))
111
+ if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
 
 
 
 
112
  return "查無原廠", "-", "❌ 皆以成分名開頭"
113
  except Exception as e: return "執行失敗", "-", str(e)
114
 
115
+ def get_canada_originator(ingredient, page):
 
 
 
 
 
 
116
  try:
 
 
117
  page.goto("https://health-products.canada.ca/dpd-bdpp/index-eng.jsp", timeout=45000, wait_until="domcontentloaded")
118
+ page.locator('input[id="activeIngredient"]').fill(ingredient)
119
  page.keyboard.press("Enter")
 
 
120
  page.wait_for_selector('table#results', timeout=15000)
 
 
121
 
122
+ soup, all_cands = BeautifulSoup(page.content(), 'html.parser'), []
123
+ table = soup.find('table', id='results')
124
+ if not table or not table.find('tbody'): return "查無資料", "-", "❌ 表格未生成"
 
 
125
 
126
+ for tr in table.find('tbody').find_all('tr'):
127
  tds = tr.find_all('td')
128
  if len(tds) >= 4:
129
+ comp = tds[2].get_text(strip=True)
130
+ if is_generic(tds[3].get_text(strip=True), comp, ingredient): continue
131
+ m = re.search(r'\d+', tds[1].get_text(strip=True))
132
+ if m and tds[1].find('a'):
133
+ all_cands.append({"company": comp, "product": tds[3].get_text(strip=True), "din": int(m.group()), "url": "https://health-products.canada.ca" + tds[1].find('a')['href']})
134
+
135
+ if not all_cands: return "查無原廠", "-", "❌ 剩餘皆為學名藥"
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ all_cands = sorted(all_cands, key=lambda x: x['din'])
138
+ orig_comp = all_cands[0]['company']
139
+ brands = set([c['product'] for c in all_cands if c['company'] == orig_comp])
140
+ return ", ".join(brands), orig_comp, f" DIN 鎖定最古老藥廠"
 
 
 
 
 
 
 
 
 
 
141
  except Exception as e: return "執行失敗", "-", str(e)
142
 
143
+ def get_japan_originator(ing_ja, page):
144
+ brands, companies = set(), set()
 
 
 
 
 
 
 
145
  try:
146
  page.goto("https://www.pmda.go.jp/PmdaSearch/iyakuSearch/", timeout=30000, wait_until="domcontentloaded")
 
147
  try:
148
+ btn = page.locator('text=同意する, input[value="同意する"]').first
149
+ if btn.is_visible(timeout=3000): btn.click(); page.wait_for_load_state('networkidle')
150
  except: pass
 
 
 
 
 
 
151
 
152
+ page.locator('input#txtName, input[name="nameWord"]').first.fill(ing_ja)
153
+ page.locator('input[name="btnA"], input[type="image"][src*="SearchBtn"]').first.click()
154
  page.wait_for_selector('table#ResultList', timeout=15000)
 
 
 
155
 
156
+ for tr in BeautifulSoup(page.content(), 'html.parser').find('table', id='ResultList').find_all('tr'):
157
+ tds = tr.find_all('td')
158
+ if len(tds) >= 3:
159
+ title = tds[1].get_text(strip=True)
160
+ if not is_generic(title, "", ing_ja):
161
+ brands.add(clean_brand_name(title))
162
+ companies.add(tds[2].get_text(separator=" ", strip=True).replace('製造販売元/', ''))
163
+ if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
164
+ return "查無原廠", "-", "❌ 皆為學名藥"
165
+ except Exception as e: return "執行失敗", "-", str(e)
166
+
167
+ def get_australia_originator(ingredient, page):
168
+ try:
169
+ page.goto(f"https://www.tga.gov.au/resources/artg?keywords={ingredient}", timeout=30000)
170
+ page.wait_for_selector('.health-listing', timeout=15000)
171
+ cands = []
172
+ for article in BeautifulSoup(page.content(), 'html.parser').find_all('article', class_='node--artg'):
173
+ title, time_tag = article.find('h3'), article.find('time')
174
+ if title and time_tag:
175
+ full_t = title.get_text(strip=True)
176
+ parts = re.split(ingredient, full_t, flags=re.IGNORECASE)
177
+ if len(parts) > 1 and parts[0].strip() and not is_generic(parts[0], "", ingredient):
178
+ cands.append({"brand": parts[0].strip(), "date": time_tag.get('datetime')})
179
+ if cands:
180
+ cands = sorted(cands, key=lambda x: x['date'])
181
+ return cands[0]['brand'], "TGA資料庫", f"✅ 最早註冊: {cands[0]['date'][:10]}"
182
+ return "查無原廠", "-", "❌ 查無資料"
183
+ except Exception as e: return "執行失敗", "-", str(e)
184
+
185
+ def get_switzerland_originator(ing_de, page):
186
+ brands, companies = set(), set()
187
+ try:
188
+ page.goto(f"https://compendium.ch/search?q={ing_de}", timeout=30000)
189
+ page.wait_for_selector('.medicament-card', timeout=15000)
190
+ for card in BeautifulSoup(page.content(), 'html.parser').find_all('div', class_=re.compile('medicament-card')):
191
+ h3, strong = card.find('h3'), card.find('strong', class_='info')
192
+ if h3 and strong:
193
+ title, comp = h3.get_text(strip=True), strong.get_text(strip=True)
194
+ if not is_generic(title, comp, ing_de):
195
+ brands.add(clean_brand_name(title))
196
+ companies.add(comp)
197
+ if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
198
+ return "查無原廠", "-", "❌ 查無資料"
199
+ except Exception as e: return "執行失敗", "-", str(e)
200
+
201
+ def get_germany_originator(ing_de, page):
202
+ brands, companies = set(), set()
203
+ try:
204
+ page.goto(f"https://www.gelbe-liste.de/profi-suche/results?substance={quote(ing_de)}", timeout=30000)
205
+ page.wait_for_selector('.product-list', timeout=15000)
206
+ ul = BeautifulSoup(page.content(), 'html.parser').find('ul', class_='product-list')
207
+ if ul:
208
+ for li in ul.find_all('li'):
209
+ h5, p_tag = li.find('h5'), li.find('p', class_='small')
210
+ if h5:
211
+ title, comp = h5.get_text(strip=True), (p_tag.get_text(strip=True) if p_tag else "-")
212
+ if not is_generic(title, comp, ing_de):
213
+ brands.add(clean_brand_name(title))
214
+ companies.add(comp)
215
+ if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
216
+ return "查無原廠", "-", "❌ 查無資料"
217
+ except Exception as e: return "執行失敗", "-", str(e)
218
+
219
+ def get_sweden_originator(ingredient, page):
220
+ brands, companies = set(), set()
221
+ try:
222
+ page.goto(f"https://www.tlv.se/beslut/sok-priser-och-beslut-i-databasen.html?product={ingredient}&tab=1", timeout=30000)
223
+ page.wait_for_selector('table', timeout=15000)
224
+ table = BeautifulSoup(page.content(), 'html.parser').find('table')
225
+ if table and table.find('tbody'):
226
+ for tr in table.find('tbody').find_all('tr'):
227
  tds = tr.find_all('td')
228
+ if len(tds) >= 4:
229
+ title, comp = tds[0].get_text(strip=True), tds[3].get_text(strip=True)
230
+ if not is_generic(title, comp, ingredient):
231
+ brands.add(clean_brand_name(title))
232
+ companies.add(comp)
233
+ if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功 (TLV)"
234
+ return "查無原廠", "-", "❌ 查無資料"
 
 
 
 
 
235
  except Exception as e: return "執行失敗", "-", str(e)
236
 
237
  # ==========================================
238
+ # 🚀 主執行中樞:併發與隔離
239
  # ==========================================
240
+ def run_all_ten_countries(ing_en, ing_ja_manual, ing_de_manual):
241
+ if not ing_en: return [["錯誤", "請輸入英文成分名", "-", ""]]
242
+
243
+ ing_ja = ing_ja_manual if ing_ja_manual else translate_lang(ing_en, 'ja')
244
+ ing_de = ing_de_manual if ing_de_manual else translate_lang(ing_en, 'de')
 
245
  results = []
246
+
247
+ # 1. API 模組 (不需瀏覽器,極速)
248
+ usa_b, usa_c, usa_log = get_usa_originator(ing_en)
249
+ results.append(["🇺🇸 美國 (FDA)", usa_b, usa_c, usa_log])
250
 
251
+ be_b, be_c, be_log = get_belgium_originator(ing_en)
252
+ results.append(["🇧🇪 比利時 (Test-Achats)", be_b, be_c, be_log])
253
+
254
+ fr_b, fr_c, fr_log = get_france_originator(ing_en)
255
+ results.append(["🇫🇷 法國 (Medicaments)", fr_b, fr_c, fr_log])
256
 
257
+ # 2. 瀏覽器模組 (Playwright 分頁隔離)
258
  with sync_playwright() as p:
259
  browser = p.chromium.launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
260
+ context = browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/122.0.0.0 Safari/537.36")
 
 
261
 
262
+ def run_pw(func, arg, name):
263
+ page = context.new_page()
264
+ b, c, log = func(arg, page)
265
+ page.close()
266
+ results.append([name, b, c, log])
 
 
 
 
 
 
 
 
 
267
 
268
+ run_pw(get_uk_originator, ing_en, "🇬🇧 英國 (eMC)")
269
+ run_pw(get_canada_originator, ing_en, "🇨🇦 加拿大 (DPD)")
270
+ run_pw(get_japan_originator, ing_ja, "🇯🇵 日本 (PMDA)")
271
+ run_pw(get_australia_originator, ing_en, "🇦🇺 澳洲 (TGA)")
272
+ run_pw(get_switzerland_originator, ing_de, "🇨🇭 瑞士 (Compendium)")
273
+ run_pw(get_germany_originator, ing_de, "🇩🇪 德國 (Gelbe Liste)")
274
+ run_pw(get_sweden_originator, ing_en, "🇸🇪 瑞典 (TLV)")
275
+
276
  browser.close()
277
 
278
  return results
 
280
  # ==========================================
281
  # 🎨 UI 介面
282
  # ==========================================
283
+ with gr.Blocks(title="國原廠商品名智能檢索") as demo:
284
+ gr.Markdown("## 🌐 跨國原廠商品名檢索器 (十國完整版)")
285
 
286
  with gr.Row():
287
+ ing_en = gr.Textbox(label="🧪 英文成分名 (必填)", placeholder="例如: Semaglutide")
288
+ with gr.Row():
289
+ with gr.Accordion("⚙️ 手動覆寫翻譯 (進階)", open=False):
290
+ ing_ja = gr.Textbox(label="🇯🇵 日文成分名", placeholder="若空白則自動翻譯")
291
+ ing_de = gr.Textbox(label="🇩🇪 德文成分名", placeholder="若空白則自動翻譯")
292
+
293
+ search_btn = gr.Button("🚀 啟動十國查詢", variant="primary")
294
 
295
  result_table = gr.Dataframe(
296
+ headers=["國家", "🌟 判定為原廠的商品名", "🏭 藥廠名稱", "🛠️ 系統狀態"],
297
  datatype=["str", "str", "str", "str"],
298
+ wrap=True, interactive=False
 
299
  )
300
+ search_btn.click(fn=run_all_ten_countries, inputs=[ing_en, ing_ja, ing_de], outputs=[result_table])
301
 
302
  if __name__ == "__main__":
303
  demo.launch()