Spaces:
Running
Running
File size: 27,184 Bytes
208e33a 2646f44 d004243 208e33a 434174c 2646f44 434174c 2646f44 4a85dfe 7b13559 2646f44 7b13559 de279c4 7b13559 de279c4 7b13559 f703657 208e33a 7b13559 f703657 de279c4 7b13559 de279c4 f703657 de279c4 f703657 434174c 208e33a 4a85dfe de279c4 0207f8d 2646f44 208e33a de279c4 916f2f1 3f339c3 567da09 de279c4 916f2f1 208e33a 1152cee 208e33a a32f4e8 208e33a de279c4 bc56123 de279c4 ac39756 4a85dfe bc56123 ac39756 bc56123 ac39756 de279c4 434174c bc56123 7c8bbb7 7b13559 4a85dfe 7b13559 bc56123 de279c4 7b13559 de279c4 4a85dfe de279c4 97f8a91 bc56123 de279c4 97f8a91 bc56123 97f8a91 de279c4 97f8a91 bc56123 4a85dfe de279c4 97f8a91 de279c4 bc56123 de279c4 97f8a91 de279c4 97f8a91 de279c4 7b13559 de279c4 97f8a91 4a85dfe 7b13559 bc56123 7b13559 de279c4 bc56123 de279c4 bc56123 de279c4 7b13559 de279c4 87a305c 208e33a 916f2f1 7c8bbb7 bc56123 7c8bbb7 de279c4 208e33a de279c4 208e33a 916f2f1 2646f44 208e33a 916f2f1 2646f44 bc56123 208e33a 0207f8d bc56123 916f2f1 208e33a 7b13559 de279c4 916f2f1 de279c4 d004243 de279c4 208e33a de279c4 208e33a 7b13559 916f2f1 2646f44 208e33a 916f2f1 434174c bc56123 2ada690 bc56123 2ada690 916f2f1 87a305c dd37c90 87a305c 4a85dfe bc56123 4a85dfe de279c4 916f2f1 de279c4 7b13559 4a85dfe 7b13559 4a85dfe 87a305c 916f2f1 7b13559 208e33a 916f2f1 208e33a bc56123 567da09 bc56123 2ada690 6001425 bc56123 4a85dfe de279c4 6001425 de279c4 6001425 de279c4 6001425 916f2f1 7b13559 208e33a de279c4 bc56123 de279c4 208e33a 916f2f1 208e33a bc56123 97f8a91 ac39756 97f8a91 ac39756 97f8a91 ac39756 bc56123 bc6a450 bc56123 bc6a450 4a85dfe bc56123 de279c4 bc6a450 de279c4 bc6a450 de279c4 bc6a450 de279c4 bc56123 de279c4 4a85dfe de279c4 bc6a450 916f2f1 de279c4 208e33a b0bbf11 4a85dfe 434174c 208e33a de279c4 208e33a 434174c 208e33a a3e3ade 7c8bbb7 434174c 4a85dfe 434174c 208e33a 2646f44 7b13559 87a305c 7b13559 208e33a 87a305c 567da09 de279c4 434174c 7c8bbb7 bc56123 de279c4 bc56123 de279c4 434174c 208e33a 7b13559 2646f44 de279c4 208e33a 7b13559 208e33a bc56123 de279c4 2646f44 916f2f1 0207f8d 208e33a 2646f44 de279c4 2646f44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | import os
import re
import datetime
import requests
from urllib.parse import quote
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright
import gradio as gr
from curl_cffi import requests as curl_req
import torch
from transformers import pipeline
os.system("playwright install chromium")
# ==========================================
# 🧠 載入原生 ElanMT 醫療翻譯模型
# ==========================================
print("⏳ 系統啟動中:正在載入 ElanMT 醫療翻譯模型...")
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
translator_en_ja = pipeline("translation", model="Mitsua/elan-mt-bt-en-ja", device=device)
print(f"✅ ElanMT 模型載入完成!(執行環境: {device})")
except Exception as e:
print(f"⚠️ ElanMT 模型載入失敗,將退回備用機制。錯誤: {e}")
translator_en_ja = None
# ==========================================
# 🛠️ 共用工具:翻譯與進階學名藥濾網
# ==========================================
def get_official_japanese_name(ingredient_en):
try:
search_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={quote(ingredient_en)}&utf8=&format=json"
res = requests.get(search_url, timeout=5).json()
if not res['query']['search']: return None
en_title = res['query']['search'][0]['title']
lang_url = f"https://en.wikipedia.org/w/api.php?action=query&titles={quote(en_title)}&prop=langlinks&lllang=ja&format=json"
lang_res = requests.get(lang_url, timeout=5).json()
pages = lang_res['query']['pages']
for page_id in pages:
if 'langlinks' in pages[page_id]:
ja_title = pages[page_id]['langlinks'][0]['*']
return ja_title.split(' ')[0].split('(')[0]
except Exception:
pass
return None
def translate_lang(text, target_lang):
text_lower = text.lower().strip()
if target_lang == 'ja':
ja_overrides = {"bilastine": "ビラスチン", "semaglutide": "セマグルチド"}
if text_lower in ja_overrides: return ja_overrides[text_lower]
if translator_en_ja is not None:
try: return translator_en_ja(text)[0]['translation_text']
except Exception: pass
official_ja = get_official_japanese_name(text)
if official_ja: return official_ja
try:
url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl={target_lang}&dt=t&q={quote(text)}"
res = requests.get(url, timeout=10)
if res.status_code == 200: return res.json()[0][0][0].strip()
except Exception: pass
return text
def is_generic(brand_name, company_name, ingredient):
b_lower, c_lower, i_lower = brand_name.lower(), company_name.lower(), ingredient.lower()
generic_keywords = [
'sandoz', 'teva', 'apotex', 'ratiopharm', 'jamp', 'mint', 'pharmascience', 'sanis', 'sivem',
'auro', 'glenmark', 'taro', 'marcan', 'nora', 'mantra', 'reddy', 'mepha', 'axapharm',
'helvepharm', 'zentiva', 'spirig', 'aliud', 'puren', 'stada', 'eg ', '- gé', 'biogaran',
'arrow', 'viatris', 'zydus', 'kohlpharma', 'eurim', 'abacus', 'emra', 'cc pharma',
'orifarm', 'bb farma', 'fd pharma', 'mpa pharma', '1 4 u', '2care4', 'axicorp',
'nattermann', '1 0 1 carefarm', 'haemato', 'eql', 'krka', 'actavis', 'accord', 'aristo',
'mylan', 'sun pharma', 'hexal', 'beximco', 'bionorica', '1a pharma',
'tiefenbacher', 'cinfa', 'polpharma', 'pharmaceutical innovation services', 'eurogenerics', 'eg'
]
if b_lower.startswith(i_lower) or i_lower in b_lower: return True
if any(gk in b_lower or gk in c_lower for gk in generic_keywords): return True
if '「' in brand_name or '(' in brand_name: return True
return False
def clean_brand_name(raw_name):
pattern = r'(皮下注|錠|カプセル|顆粒|シロップ|OD|細粒|液|Augentropfen|Schmelztabletten|Tabletten|kids|Lingual|Sol|cp|inj|pen|prefilled|flex|\d+)'
cleaned = re.split(pattern, raw_name, flags=re.IGNORECASE)[0]
return cleaned.replace('®', '').replace('™', '').strip(' -_')
# ==========================================
# 🚀 模組 A:澳洲 TGA
# ==========================================
def get_australia_originator(ingredient):
log, brands, companies = [], set(), set()
try:
log.append("1. 發送 GET 至澳洲 TGA 搜尋頁面 (啟用 curl_cffi 偽裝)...")
session = curl_req.Session(impersonate="chrome120")
res = session.get(f"https://www.tga.gov.au/resources/artg?keywords={ingredient}", timeout=45, verify=False)
soup = BeautifulSoup(res.text, 'html.parser')
articles = soup.find_all('article', class_='node--artg')
log.append(f"2. 找到 {len(articles)} 筆 ARTG 紀錄。")
cands = []
for article in articles:
title_tag, time_tag = article.find('h3'), article.find('time')
if title_tag and time_tag:
full_t = title_tag.get_text(strip=True)
parts = re.split(ingredient, full_t, flags=re.IGNORECASE)
if len(parts) > 1 and parts[0].strip():
brand = parts[0].strip()
if not is_generic(brand, "", ingredient):
a_tag = title_tag.find('a')
href = a_tag['href'] if a_tag else None
cands.append({"brand": brand, "date": time_tag.get('datetime'), "href": href})
if cands:
cands = sorted(cands, key=lambda x: x['date'])
target = cands[0]
brand = target['brand']
company = "TGA資料庫"
if target['href']:
detail_url = f"https://www.tga.gov.au{target['href']}" if target['href'].startswith('/') else target['href']
res_detail = session.get(detail_url, timeout=45, verify=False)
detail_soup = BeautifulSoup(res_detail.text, 'html.parser')
sponsor_div = detail_soup.find('div', class_=re.compile(r'field--name-field-sponsor'))
if sponsor_div and sponsor_div.find('a'):
company = sponsor_div.find('a').get_text(strip=True)
return brand, company, "\n".join(log)
return "查無原廠", "-", "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
# ==========================================
# 🚀 模組 B:Playwright 抓取 (加長超時、修復德國邏輯)
# ==========================================
def get_usa_originator(ingredient, page):
log, brands, companies = [], set(), set()
try:
log.append(f"1. 前往 FDA Orange Book...")
url = f"https://www.accessdata.fda.gov/scripts/cder/ob/results_product.cfm?Generic_Name={quote(ingredient)}&rx_otc=All"
# 💡 放寬美國載入時間
page.goto(url, timeout=60000, wait_until="domcontentloaded")
try: page.wait_for_selector('table#example, .alert-warning', state="attached", timeout=15000)
except Exception: pass
page.wait_for_timeout(2000)
soup = BeautifulSoup(page.content(), 'html.parser')
table = soup.find('table', id='example')
if table and table.find('tbody'):
rows = table.find('tbody').find_all('tr')
headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
brand_idx = next((i for i, h in enumerate(headers) if 'proprietary name' in h), 2)
rld_idx = next((i for i, h in enumerate(headers) if 'rld' in h), 8)
mfg_idx = next((i for i, h in enumerate(headers) if 'applicant holder' in h), 10)
for tr in rows:
tds = tr.find_all('td')
if len(tds) > max(rld_idx, brand_idx):
rld_text = tds[rld_idx].get_text(strip=True).upper()
if "RLD" in rld_text or "RS" in rld_text:
title = tds[brand_idx].get_text(strip=True)
comp = tds[mfg_idx].get_text(strip=True) if len(tds) > mfg_idx else "-"
if not is_generic(title, comp, ingredient):
brands.add(clean_brand_name(title))
if comp != "-": companies.add(comp)
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
return "查無原廠", "-", "\n".join(log) + "\n❌ 查無資料"
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_belgium_originator(ingredient, page):
log, brands = [], set()
try:
log.append(f"1. 前往 CBIP (比利時) 並準備定位 Select2 搜尋框...")
page.goto("https://www.cbip.be/fr/", timeout=60000, wait_until="domcontentloaded")
page.wait_for_timeout(2000)
try: page.locator('text="Tout accepter"').click(timeout=3000)
except Exception: pass
# 💡 精準打擊:使用您提供的 Select2 專屬 ID 與 Class
log.append("2. 定位隱藏的 Select2 輸入框...")
search_input = page.locator('#s2id_autogen1, input.select2-focusser, input[type="search"]').first
search_input.wait_for(state="attached", timeout=15000)
# Select2 通常需要先點擊才會展開真正的輸入框
try:
page.locator('#s2id_search-dummy-input').click(timeout=5000)
except Exception: pass
search_input.fill(ingredient)
log.append("3. 等待下拉選單 (Autocomplete) 出現...")
dropdown = page.locator('.select2-results, .tt-menu, .autocomplete-suggestions')
dropdown.first.wait_for(state='visible', timeout=15000)
page.wait_for_timeout(1500)
log.append("4. 點擊下拉選單中的目標連結...")
links = page.locator('.select2-results__option, .tt-menu a, .autocomplete-suggestions a')
if links.count() > 0:
links.first.click(force=True)
page.wait_for_load_state('domcontentloaded', timeout=30000)
page.wait_for_timeout(2000)
else: return "查無原廠", "-", "\n".join(log) + "\n❌ 下拉選單未出現可點擊選項"
log.append("5. 解析藥品表格內商品名...")
soup = BeautifulSoup(page.content(), 'html.parser')
for tr in soup.find_all('tr'):
tds = tr.find_all('td')
if len(tds) >= 2:
raw_name = tds[1].get_text(separator=" ", strip=True) if len(tds) > 1 else tds[0].get_text(separator=" ", strip=True)
name = re.sub(r'(compr\.|gél\.|flac\.|amp\.|sol\.).*', '', raw_name, flags=re.IGNORECASE).strip()
if len(name) > 3 and not is_generic(name, "", ingredient):
brands.add(clean_brand_name(name))
if brands: return ", ".join(brands), "CBIP 資料庫", "\n".join(log)
return "查無原廠", "-", "\n".join(log) + "\n❌ 查無資料或全為學名藥"
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_france_originator(ingredient, page):
log, brands, companies = [], set(), set()
try:
page.goto(f"https://www.vidal.fr/recherche.html?query={quote(ingredient)}", timeout=45000, wait_until="domcontentloaded")
page.wait_for_selector('.results, .searchbar', timeout=15000)
soup = BeautifulSoup(page.content(), 'html.parser')
divs = soup.find_all('div', class_=re.compile(r'result drug'))
for div in divs:
info_div = div.find('div', class_='infos')
if info_div and info_div.find('a'):
a_tag = info_div.find('a')
title = a_tag.get_text(strip=True)
href = a_tag.get('href', '')
if not is_generic(title, "", ingredient):
detail_url = href if href.startswith('http') else (f"https://www.vidal.fr{href}" if href.startswith('/') else f"https://www.vidal.fr/{href}")
page.goto(detail_url, timeout=30000, wait_until="domcontentloaded")
comp = "-"
try:
page.wait_for_selector('div.nomlab', timeout=10000)
comp = BeautifulSoup(page.content(), 'html.parser').find('div', class_='nomlab').get_text(strip=True)
except Exception: pass
brands.add(clean_brand_name(title))
if comp != "-": companies.add(comp)
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
return "查無原廠", "-", "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_uk_originator(ingredient, page):
log, brands, companies = [], set(), set()
try:
page.goto(f"https://www.medicines.org.uk/emc/search?q={ingredient}", timeout=45000)
page.wait_for_selector('.search-results-product-info-title-link', timeout=15000)
soup = BeautifulSoup(page.content(), 'html.parser')
for link in soup.find_all('a', class_='search-results-product-info-title-link'):
title = link.get_text(strip=True)
if not title.lower().startswith(ingredient.lower()) and not is_generic(title, "", ingredient):
brands.add(clean_brand_name(title))
p_div = link.find_parent(class_='search-results-product-info')
if p_div and p_div.find(class_='search-results-product-info-company'):
companies.add(p_div.find(class_='search-results-product-info-company').get_text(strip=True))
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
return "查無原廠", "-", "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_canada_originator(ingredient, page):
log = []
try:
page.goto("https://health-products.canada.ca/dpd-bdpp/index-eng.jsp", timeout=45000, wait_until="domcontentloaded")
page.locator('input[id="activeIngredient"]').fill(ingredient)
page.keyboard.press("Enter")
page.wait_for_selector('table#results, .alert-info, .alert-warning', timeout=15000)
soup = BeautifulSoup(page.content(), 'html.parser')
table = soup.find('table', id='results')
if not table or not table.find('tbody'): return "查無資料", "-", "\n".join(log)
all_cands = []
for tr in table.find('tbody').find_all('tr'):
tds = tr.find_all('td')
if len(tds) >= 4:
comp, prod = tds[2].get_text(strip=True), tds[3].get_text(strip=True)
if not is_generic(prod, comp, ingredient):
m = re.search(r'\d+', tds[1].get_text(strip=True))
if m: all_cands.append({"company": comp, "product": prod, "din": int(m.group())})
if not all_cands: return "查無原廠", "-", "\n".join(log)
orig_comp = sorted(all_cands, key=lambda x: x['din'])[0]['company']
brands = set([c['product'] for c in all_cands if c['company'] == orig_comp])
return ", ".join(clean_brand_name(b) for b in brands), orig_comp, "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_japan_originator(ing_ja, page):
log, brands, companies = [], set(), set()
try:
page.goto("https://www.pmda.go.jp/PmdaSearch/iyakuSearch/", timeout=45000, wait_until="domcontentloaded")
search_input = page.locator('input#txtName')
search_input.wait_for(state="attached", timeout=15000)
search_input.fill(ing_ja, force=True)
with page.expect_popup() as popup_info:
search_input.press("Enter")
popup = popup_info.value
page_limit = 5
current_page = 1
while current_page <= page_limit:
try: popup.wait_for_selector('table#ResultList, .errormsg, .non-result', timeout=15000)
except Exception: break
table = BeautifulSoup(popup.content(), 'html.parser').find('table', id='ResultList')
if not table: break
for tr in table.find_all('tr'):
tds = tr.find_all('td')
if len(tds) >= 3:
title = tds[1].get_text(strip=True)
if not is_generic(title, "", ing_ja):
brands.add(clean_brand_name(title))
raw_comp = tds[2].get_text(separator=" ", strip=True)
raw_comp = re.sub(r'(製造販売元/|販売元/|提携先/)', ' ', raw_comp).strip()
clean_comp = re.split(r'\s+', raw_comp)[0]
companies.add(clean_comp)
next_link = popup.locator(f'a[href*="changePg"]')
if next_link.count() > 0:
next_link.last.click(force=True)
popup.wait_for_timeout(2000)
current_page += 1
else: break
popup.close()
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
return "查無原廠", "-", "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_switzerland_originator(ing_de, page):
log, brands, companies = [], set(), set()
try:
page.goto("https://swissmedicinfo-pro.ch/?Lang=EN", timeout=45000, wait_until="domcontentloaded")
search_input = page.locator('input#MainContent_ucSearch1_txtSubstance')
search_input.wait_for(state="attached", timeout=15000)
search_input.fill(ing_de, force=True)
search_input.press("Enter")
try: page.wait_for_selector('table[id*="GVMonographies"], #MainContent_LabelNoResult', timeout=15000)
except Exception: pass
table = BeautifulSoup(page.content(), 'html.parser').find('table', id=re.compile(r'GVMonographies'))
if table and table.find('tbody'):
for tr in table.find('tbody').find_all('tr', class_=re.compile(r'clickable-row')):
tds = tr.find_all('td')
if len(tds) >= 4:
title, comp = tds[0].get_text(strip=True), tds[3].get_text(strip=True)
if not is_generic(title, comp, ing_de):
brands.add(clean_brand_name(title))
if comp != "-": companies.add(comp)
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
return "查無原廠", "-", "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_sweden_originator(ingredient, page):
log, brands, companies = [], set(), set()
try:
page.goto(f"https://fass.se/search?query={quote(ingredient)}", timeout=45000, wait_until="domcontentloaded")
try: page.wait_for_selector('details.app-toggle-details-icon, .no-results', state="attached", timeout=15000)
except Exception: pass
page.wait_for_timeout(2000)
for item in BeautifulSoup(page.content(), 'html.parser').find_all('details', class_=re.compile(r'app-toggle-details-icon')):
summary = item.find('summary')
if not summary: continue
title_span = summary.find('span', class_=re.compile(r'font-semibold'))
title = title_span.get_text(strip=True) if title_span else ""
comp, ol = "-", item.find('ol')
if ol:
comp_span = ol.find('span', class_='text-label-md')
if comp_span: comp = comp_span.get_text(strip=True)
if title and not is_generic(title, comp, ingredient):
brands.add(clean_brand_name(title))
if comp != "-": companies.add(comp)
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
return "查無原廠", "-", "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
def get_germany_originator(ing_de, page):
log, brands, companies = [], set(), set()
try:
log.append("1. 前往 PharmNet.Bund...")
page.goto("https://portal.bfarm.de/amguifree/am/search.xhtml", timeout=60000)
page.wait_for_timeout(3000) # 給予跳轉時間
# 💡 精準打擊:使用您提供的 <a> 標籤 ID 點擊同意條款
if "termsofuse" in page.url:
log.append(" -> 偵測到條款頁面,點擊超連結同意並等待跳轉...")
try:
# 尋找特定的 a 標籤 ID
accept_link = page.locator('a#docOutputPromptForm\\:acceptLink, a.button.next')
if accept_link.count() > 0:
accept_link.first.click()
else:
# 備用方案:如果 ID 找不到,尋找帶有 accept=true 的連結
page.locator('a[href*="accept=true"]').first.click()
# 等待網頁跳轉回搜尋頁面
page.wait_for_url("**/amguifree/am/search.xhtml**", timeout=15000)
page.wait_for_timeout(2000)
except Exception as e:
log.append(f" ⚠️ 點擊同意條款失敗: {e}")
log.append("2. 定位搜尋框...")
search_input = page.locator('input[id="searchForm:searchInputsComponent:searchRows:1:firstSearchTerm"]')
search_input.wait_for(state="attached", timeout=15000)
search_input.fill(ing_de)
page.locator('input[name="searchForm:searchInputsComponent:suchestarten"]').click()
page_limit = 5
current_page = 1
while current_page <= page_limit:
try: page.wait_for_selector('table[id="searchResultsForm:searchResultsComponent:titles"]', timeout=15000)
except Exception: break
soup = BeautifulSoup(page.content(), 'html.parser')
table = soup.find('table', id='searchResultsForm:searchResultsComponent:titles')
if not table or not table.find('tbody'): break
for tr in table.find('tbody').find_all('tr'):
tds = tr.find_all('td')
if len(tds) >= 4:
title, comp = tds[1].get_text(strip=True), tds[3].get_text(strip=True)
if not is_generic(title, comp, ing_de):
brands.add(clean_brand_name(title))
if comp != "-": companies.add(comp)
paging_div = soup.find('div', class_='browse')
has_next = False
if paging_div:
buttons = paging_div.find_all('input', class_='submit')
for i, btn in enumerate(buttons):
if 'disabled' in btn.get('class', []) and i + 1 < len(buttons):
next_btn_id = buttons[i+1].get('id')
page.locator(f'input[id="{next_btn_id}"]').click()
page.wait_for_timeout(2500)
has_next = True
current_page += 1
break
if not has_next: break
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
return "查無原廠", "-", "\n".join(log)
except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
# ==========================================
# 🚀 主執行中樞
# ==========================================
def run_all_ten_countries(ing_en, ing_ja_manual, ing_de_manual):
if not ing_en: return [["錯誤", "請輸入英文成分名", "-", ""]], ""
ing_ja = ing_ja_manual if ing_ja_manual else translate_lang(ing_en, 'ja')
ing_de = ing_de_manual if ing_de_manual else translate_lang(ing_en, 'de')
results = []
au_b, au_c, au_log = get_australia_originator(ing_en)
results.append(["🇦🇺 澳洲 (TGA)", au_b, au_c, au_log])
with sync_playwright() as p:
browser = p.chromium.launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
context = browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/122.0")
def run_pw(func, arg, name):
page = context.new_page()
b, c, log = func(arg, page)
page.close()
results.append([name, b, c, log])
run_pw(get_usa_originator, ing_en, "🇺🇸 美國 (FDA)")
run_pw(get_belgium_originator, ing_en, "🇧🇪 比利時 (CBIP)")
run_pw(get_france_originator, ing_en, "🇫🇷 法國 (Vidal)")
run_pw(get_uk_originator, ing_en, "🇬🇧 英國 (eMC)")
run_pw(get_canada_originator, ing_en, "🇨🇦 加拿大 (DPD)")
run_pw(get_japan_originator, ing_ja, "🇯🇵 日本 (PMDA)")
run_pw(get_switzerland_originator, ing_de, "🇨🇭 瑞士 (Swissmedicinfo)")
run_pw(get_sweden_originator, ing_en, "🇸🇪 瑞典 (FASS)")
run_pw(get_germany_originator, ing_de, "🇩🇪 德國 (PharmNet.Bund)")
browser.close()
# 💡 修正:將日誌加入複製區域,並把換行替換為直線,防止貼上 Excel 跑版
copy_text = "國家\t🌟 判定為原廠的商品名\t🏭 藥廠名稱\t🛠️ 詳細日誌\n"
for r in results:
clean_log = str(r[3]).replace("\n", " | ")
copy_text += f"{r[0]}\t{r[1]}\t{r[2]}\t{clean_log}\n"
return results, copy_text
# ==========================================
# 🎨 UI 介面
# ==========================================
with gr.Blocks(title="十國原廠商品名智能檢索器") as demo:
gr.Markdown("## 🌐 跨國原廠商品名檢索器 (搭載原生 ElanMT 醫療翻譯)")
with gr.Row():
ing_en = gr.Textbox(label="🧪 英文成分名 (必填)", placeholder="例如: bilastine")
with gr.Row():
with gr.Accordion("⚙️ 手動覆寫翻譯 (進階)", open=False):
ing_ja = gr.Textbox(label="🇯🇵 日文成分名", placeholder="若空白則自動啟動 ElanMT 翻譯")
ing_de = gr.Textbox(label="🇩🇪 德文成分名", placeholder="若空白則自動翻譯")
search_btn = gr.Button("🚀 啟動十國查詢", variant="primary")
copy_output = gr.Textbox(label="📋 一鍵複製用文字 (包含日誌,點擊右上角圖示,直接貼上 Excel)", show_copy_button=True, interactive=False, lines=11)
result_table = gr.Dataframe(
headers=["國家", "🌟 判定為原廠的商品名", "🏭 藥廠名稱", "🛠️ 系統狀態與詳細日誌"],
datatype=["str", "str", "str", "str"],
wrap=True, interactive=False
)
search_btn.click(fn=run_all_ten_countries, inputs=[ing_en, ing_ja, ing_de], outputs=[result_table, copy_output])
if __name__ == "__main__":
demo.launch() |