Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -462,22 +462,77 @@ def get_switzerland_originator(ing_de, page):
|
|
| 462 |
def get_germany_originator(ing_de, page):
|
| 463 |
log, brands, companies = [], set(), set()
|
| 464 |
try:
|
| 465 |
-
|
| 466 |
-
page.
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
|
| 479 |
-
return "查無原廠", "-", "\n".join(log)
|
| 480 |
-
except Exception as e:
|
|
|
|
| 481 |
|
| 482 |
# ==========================================
|
| 483 |
# 🚀 主執行中樞
|
|
|
|
| 462 |
def get_germany_originator(ing_de, page):
|
| 463 |
log, brands, companies = [], set(), set()
|
| 464 |
try:
|
| 465 |
+
log.append(f"1. 前往 PharmNet.Bund (搜尋: {ing_de})...")
|
| 466 |
+
page.goto("https://portal.bfarm.de/amguifree/am/search.xhtml", timeout=45000, wait_until="domcontentloaded")
|
| 467 |
+
|
| 468 |
+
log.append("2. 定位並輸入 Stoffname (第二格有效成分)...")
|
| 469 |
+
# 💡 使用您提供的精準 ID 定位第二個搜尋框
|
| 470 |
+
search_input = page.locator('input[id="searchForm:searchInputsComponent:searchRows:1:firstSearchTerm"]')
|
| 471 |
+
search_input.wait_for(state="attached", timeout=15000)
|
| 472 |
+
search_input.fill(ing_de)
|
| 473 |
+
|
| 474 |
+
# 點擊搜尋按鈕
|
| 475 |
+
page.locator('input[name="searchForm:searchInputsComponent:suchestarten"]').click()
|
| 476 |
+
|
| 477 |
+
current_page = 1
|
| 478 |
+
while True:
|
| 479 |
+
log.append(f" -> 掃描第 {current_page} 頁資料...")
|
| 480 |
+
try:
|
| 481 |
+
# 等待搜尋結果表格載入
|
| 482 |
+
page.wait_for_selector('table[id="searchResultsForm:searchResultsComponent:titles"]', timeout=15000)
|
| 483 |
+
except Exception:
|
| 484 |
+
log.append("❌ 查無資料或等待表格超時。")
|
| 485 |
+
break
|
| 486 |
+
|
| 487 |
+
soup = BeautifulSoup(page.content(), 'html.parser')
|
| 488 |
+
table = soup.find('table', id='searchResultsForm:searchResultsComponent:titles')
|
| 489 |
+
|
| 490 |
+
if table and table.find('tbody'):
|
| 491 |
+
rows = table.find('tbody').find_all('tr')
|
| 492 |
+
for tr in rows:
|
| 493 |
+
tds = tr.find_all('td')
|
| 494 |
+
# 依照您提供的 HTML:索引 1 是藥名,索引 3 是藥廠
|
| 495 |
+
if len(tds) >= 4:
|
| 496 |
+
title = tds[1].get_text(strip=True)
|
| 497 |
+
comp = tds[3].get_text(strip=True)
|
| 498 |
+
|
| 499 |
+
# 進入黑名單與去劑型化過濾
|
| 500 |
+
if not is_generic(title, comp, ing_de):
|
| 501 |
+
brands.add(clean_brand_name(title))
|
| 502 |
+
if comp != "-": companies.add(comp)
|
| 503 |
+
else:
|
| 504 |
+
break
|
| 505 |
+
|
| 506 |
+
# ==========================
|
| 507 |
+
# 💡 自動翻頁邏輯 (處理 JSF Postback)
|
| 508 |
+
# ==========================
|
| 509 |
+
paging_div = soup.find('div', class_='browse')
|
| 510 |
+
has_next = False
|
| 511 |
+
|
| 512 |
+
if paging_div:
|
| 513 |
+
buttons = paging_div.find_all('input', class_='submit')
|
| 514 |
+
for i, btn in enumerate(buttons):
|
| 515 |
+
# 找到目前所在的頁碼按鈕 (帶有 disabled class)
|
| 516 |
+
if 'disabled' in btn.get('class', []):
|
| 517 |
+
# 如果後面還有按鈕,代表有下一頁
|
| 518 |
+
if i + 1 < len(buttons):
|
| 519 |
+
next_btn_id = buttons[i+1].get('id')
|
| 520 |
+
# Playwright 觸發點擊下一頁
|
| 521 |
+
page.locator(f'input[id="{next_btn_id}"]').click()
|
| 522 |
+
page.wait_for_timeout(2500) # 給予 JSF 更新表格的緩衝時間
|
| 523 |
+
current_page += 1
|
| 524 |
+
has_next = True
|
| 525 |
+
break
|
| 526 |
+
|
| 527 |
+
# 如果沒有找到下一頁按鈕,結束迴圈
|
| 528 |
+
if not has_next:
|
| 529 |
+
log.append("✅ 已掃描至最後一頁。")
|
| 530 |
+
break
|
| 531 |
+
|
| 532 |
if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
|
| 533 |
+
return "查無原廠", "-", "\n".join(log) + "\n❌ 查無資料或全為學名藥"
|
| 534 |
+
except Exception as e:
|
| 535 |
+
return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
|
| 536 |
|
| 537 |
# ==========================================
|
| 538 |
# 🚀 主執行中樞
|