deneve07 commited on
Commit
bc6a450
·
verified ·
1 Parent(s): b51e8d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -15
app.py CHANGED
@@ -462,22 +462,77 @@ def get_switzerland_originator(ing_de, page):
462
  def get_germany_originator(ing_de, page):
463
  log, brands, companies = [], set(), set()
464
  try:
465
- page.goto(f"https://www.gelbe-liste.de/profi-suche/results?substance={quote(ing_de)}", timeout=30000)
466
- page.wait_for_selector('.product-list', timeout=15000)
467
- soup = BeautifulSoup(page.content(), 'html.parser')
468
- ul = soup.find('ul', class_='product-list')
469
- if ul:
470
- lis = ul.find_all('li')
471
- for li in lis:
472
- h5, p_tag = li.find('h5'), li.find('p', class_='small')
473
- if h5:
474
- title, comp = h5.get_text(strip=True), (p_tag.get_text(strip=True) if p_tag else "-")
475
- if not is_generic(title, comp, ing_de):
476
- brands.add(clean_brand_name(title))
477
- companies.add(comp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
479
- return "查無原廠", "-", "\n".join(log)
480
- except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
 
481
 
482
  # ==========================================
483
  # 🚀 主執行中樞
 
462
  def get_germany_originator(ing_de, page):
463
  log, brands, companies = [], set(), set()
464
  try:
465
+ log.append(f"1. 前往 PharmNet.Bund (搜尋: {ing_de})...")
466
+ page.goto("https://portal.bfarm.de/amguifree/am/search.xhtml", timeout=45000, wait_until="domcontentloaded")
467
+
468
+ log.append("2. 定位並輸入 Stoffname (第二格有效成分)...")
469
+ # 💡 使用您提供的精準 ID 定位第二個搜尋框
470
+ search_input = page.locator('input[id="searchForm:searchInputsComponent:searchRows:1:firstSearchTerm"]')
471
+ search_input.wait_for(state="attached", timeout=15000)
472
+ search_input.fill(ing_de)
473
+
474
+ # 點擊搜尋按鈕
475
+ page.locator('input[name="searchForm:searchInputsComponent:suchestarten"]').click()
476
+
477
+ current_page = 1
478
+ while True:
479
+ log.append(f" -> 掃描第 {current_page} 頁資料...")
480
+ try:
481
+ # 等待搜尋結果表格載入
482
+ page.wait_for_selector('table[id="searchResultsForm:searchResultsComponent:titles"]', timeout=15000)
483
+ except Exception:
484
+ log.append("❌ 查無資料或等待表格超時。")
485
+ break
486
+
487
+ soup = BeautifulSoup(page.content(), 'html.parser')
488
+ table = soup.find('table', id='searchResultsForm:searchResultsComponent:titles')
489
+
490
+ if table and table.find('tbody'):
491
+ rows = table.find('tbody').find_all('tr')
492
+ for tr in rows:
493
+ tds = tr.find_all('td')
494
+ # 依照您提供的 HTML:索引 1 是藥名,索引 3 是藥廠
495
+ if len(tds) >= 4:
496
+ title = tds[1].get_text(strip=True)
497
+ comp = tds[3].get_text(strip=True)
498
+
499
+ # 進入黑名單與去劑型化過濾
500
+ if not is_generic(title, comp, ing_de):
501
+ brands.add(clean_brand_name(title))
502
+ if comp != "-": companies.add(comp)
503
+ else:
504
+ break
505
+
506
+ # ==========================
507
+ # 💡 自動翻頁邏輯 (處理 JSF Postback)
508
+ # ==========================
509
+ paging_div = soup.find('div', class_='browse')
510
+ has_next = False
511
+
512
+ if paging_div:
513
+ buttons = paging_div.find_all('input', class_='submit')
514
+ for i, btn in enumerate(buttons):
515
+ # 找到目前所在的頁碼按鈕 (帶有 disabled class)
516
+ if 'disabled' in btn.get('class', []):
517
+ # 如果後面還有按鈕,代表有下一頁
518
+ if i + 1 < len(buttons):
519
+ next_btn_id = buttons[i+1].get('id')
520
+ # Playwright 觸發點擊下一頁
521
+ page.locator(f'input[id="{next_btn_id}"]').click()
522
+ page.wait_for_timeout(2500) # 給予 JSF 更新表格的緩衝時間
523
+ current_page += 1
524
+ has_next = True
525
+ break
526
+
527
+ # 如果沒有找到下一頁按鈕,結束迴圈
528
+ if not has_next:
529
+ log.append("✅ 已掃描至最後一頁。")
530
+ break
531
+
532
  if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
533
+ return "查無原廠", "-", "\n".join(log) + "\n❌ 查無資料或全為學名藥"
534
+ except Exception as e:
535
+ return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
536
 
537
  # ==========================================
538
  # 🚀 主執行中樞