deneve07 commited on
Commit
916f2f1
·
verified ·
1 Parent(s): 1152cee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -75
app.py CHANGED
@@ -11,7 +11,7 @@ import gradio as gr
11
  os.system("playwright install chromium")
12
 
13
  # ==========================================
14
- # 🛠️ 共用工具:翻譯與學名藥濾網
15
  # ==========================================
16
  def translate_lang(text, target_lang):
17
  try:
@@ -22,14 +22,20 @@ def translate_lang(text, target_lang):
22
  return text
23
 
24
  def is_generic(brand_name, company_name, ingredient):
25
- """通用學名藥濾網:排除成分名開頭常見學名藥廠"""
26
  b_lower = brand_name.lower()
27
  c_lower = company_name.lower()
28
  i_lower = ingredient.lower()
29
 
30
- generic_keywords = ['sandoz', 'teva', 'apotex', 'ratiopharm', 'jamp', 'mint', 'pharmascience', 'sanis', 'sivem',
31
- 'auro', 'glenmark', 'taro', 'marcan', 'nora', 'mantra', 'reddy', 'mepha', 'axapharm',
32
- 'helvepharm', 'zentiva', 'spirig', 'aliud', 'puren', 'stada', 'eg ', '- gé', 'biogaran', 'arrow', 'viatris', 'zydus']
 
 
 
 
 
 
33
 
34
  if b_lower.startswith(i_lower) or i_lower in b_lower: return True
35
  if any(gk in b_lower or gk in c_lower for gk in generic_keywords): return True
@@ -37,7 +43,6 @@ def is_generic(brand_name, company_name, ingredient):
37
  return False
38
 
39
  def clean_brand_name(raw_name):
40
- """切除劑量、劑型等後綴,保留純商品名"""
41
  return re.split(r'(皮下注|錠|カプセル|顆粒|シロップ|OD|細粒|液|\d+)', raw_name)[0].replace('®', '').strip()
42
 
43
  # ==========================================
@@ -47,66 +52,113 @@ def get_usa_originator(ingredient):
47
  log, brands, companies = [], set(), set()
48
  try:
49
  session = curl_req.Session(impersonate="chrome120")
 
 
 
 
 
50
  payload = {"drugname": ingredient, "discontinued": "RX,OTC,DISCN", "submit": "Search"}
51
  res = session.post("https://www.accessdata.fda.gov/scripts/cder/ob/search_product.cfm", data=payload, timeout=30, verify=False)
 
 
52
  soup = BeautifulSoup(res.text, 'html.parser')
53
  table = soup.find('table', id='example')
54
  if table:
 
 
55
  headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
56
  brand_idx = next((i for i, h in enumerate(headers) if 'proprietary name' in h), 2)
57
  rld_idx = next((i for i, h in enumerate(headers) if 'rld' in h), 8)
58
  mfg_idx = next((i for i, h in enumerate(headers) if 'applicant holder' in h), 10)
59
- for tr in (table.find('tbody') or table).find_all('tr'):
 
60
  tds = tr.find_all('td')
61
  if len(tds) > max(rld_idx, brand_idx) and "RLD" in tds[rld_idx].get_text(strip=True).upper():
62
  brands.add(tds[brand_idx].get_text(strip=True))
63
  if len(tds) > mfg_idx: companies.add(tds[mfg_idx].get_text(strip=True))
64
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功找到 RLD"
65
- return "查無原廠", "-", "查無 RLD未核准"
66
- except Exception as e: return "執行失敗", "-", str(e)
 
 
 
67
 
68
  def get_belgium_originator(ingredient):
 
69
  log, brands = [], set()
70
  try:
71
  session = curl_req.Session(impersonate="chrome120")
 
72
  calc_id, ds = "5b55a713-dc8e-46bd-a82a-cea4641fb7b1", "c9852b22-1d58-4471-b8cc-a821763e821e"
73
  url = f"https://www.test-achats.be/CalculatorsAPI/Medicine/GetSearchResults/{calc_id}?datasource={ds}&term={quote(ingredient)}"
74
  res = session.get(url, headers={"X-Requested-With": "XMLHttpRequest"}, timeout=30, verify=False)
 
 
 
75
  if res.status_code == 200:
76
- for item in res.json():
 
 
77
  label = item.get('label', '')
78
- if not is_generic(label, "", ingredient): brands.add(clean_brand_name(label))
79
- if brands: return ", ".join(brands), "Test-achats 無藥廠資料", "✅ API 抓取成功"
80
- return "查無原廠", "-", "❌ 皆為學名藥"
81
- except Exception as e: return "執行失敗", "-", str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  def get_france_originator(ingredient):
84
- brands = set()
85
  try:
86
  session = curl_req.Session(impersonate="chrome120")
 
87
  res = session.get(f"https://base-donnees-publique.medicaments.gouv.fr/medicament/recherche/resultat?contains={quote(ingredient)}", timeout=30, verify=False)
 
88
  soup = BeautifulSoup(res.text, 'html.parser')
89
 
90
- # 🟢 確保使用精確的法國 DOM 解析邏輯
91
- for div in soup.find_all('div', class_='result drug'):
 
92
  info_div = div.find('div', class_='infos')
93
  if info_div and info_div.find('a'):
94
  title = info_div.find('a').get_text(strip=True)
95
  if not is_generic(title, "", ingredient) and '- gé' not in title.lower():
96
  brands.add(clean_brand_name(title))
97
 
98
- if brands: return ", ".join(brands), "法國公共資料庫", "✅ 成功找到非 Gé 藥品"
99
- return "查無原廠", "-", "❌ 查無資料或皆為 Gé"
100
- except Exception as e: return "執行失敗", "-", str(e)
101
 
102
  def get_australia_originator(ingredient):
103
- brands = set()
104
  try:
105
  session = curl_req.Session(impersonate="chrome120")
 
106
  res = session.get(f"https://www.tga.gov.au/resources/artg?keywords={ingredient}", timeout=30, verify=False)
 
107
  soup = BeautifulSoup(res.text, 'html.parser')
 
 
 
108
  cands = []
109
- for article in soup.find_all('article', class_='node--artg'):
110
  title_tag, time_tag = article.find('h3'), article.find('time')
111
  if title_tag and time_tag:
112
  full_t = title_tag.get_text(strip=True)
@@ -117,131 +169,176 @@ def get_australia_originator(ingredient):
117
  cands.append({"brand": brand, "date": time_tag.get('datetime')})
118
  if cands:
119
  cands = sorted(cands, key=lambda x: x['date'])
120
- return cands[0]['brand'], "TGA資料庫", f"✅ 最早註冊: {cands[0]['date'][:10]}"
121
- return "查無原廠", "-", "❌ 查無資料"
122
- except Exception as e: return "執行失敗", "-", str(e)
 
123
 
124
  def get_sweden_originator(ingredient):
125
- brands, companies = set(), set()
126
  try:
127
  session = curl_req.Session(impersonate="chrome120")
 
128
  res = session.get(f"https://www.fass.se/LIF/search?query={quote(ingredient)}", timeout=30, verify=False)
 
129
  soup = BeautifulSoup(res.text, 'html.parser')
130
- for item in soup.find_all('li', class_=re.compile(r'search-result-item')):
 
 
 
131
  title_tag, comp_tag = item.find('a', class_='product-name'), item.find('span', class_='company-name')
132
  if title_tag and comp_tag:
133
  title, comp = title_tag.get_text(strip=True), comp_tag.get_text(strip=True)
134
  if not is_generic(title, comp, ingredient):
135
  brands.add(clean_brand_name(title))
136
  companies.add(comp)
137
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功 (FASS)"
138
- return "查無原廠", "-", "❌ 查無資料"
139
- except Exception as e: return "執行失敗", "-", str(e)
140
 
141
  # ==========================================
142
  # 🚀 模組 B:使用 Playwright 抓取 (英、加、日、德、瑞)
143
  # ==========================================
144
  def get_uk_originator(ingredient, page):
145
- brands, companies = set(), set()
146
  try:
 
147
  page.goto(f"https://www.medicines.org.uk/emc/search?q={ingredient}", timeout=30000)
148
  page.wait_for_selector('.search-results-product-info-title-link', timeout=15000)
149
  soup = BeautifulSoup(page.content(), 'html.parser')
150
- for link in soup.find_all('a', class_='search-results-product-info-title-link'):
 
 
151
  title = link.get_text(strip=True)
152
  if not title.lower().startswith(ingredient.lower()):
153
  brands.add(clean_brand_name(title))
154
  p_div = link.find_parent(class_='search-results-product-info')
155
  if p_div and p_div.find(class_='search-results-product-info-company'):
156
  companies.add(p_div.find(class_='search-results-product-info-company').get_text(strip=True))
157
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
158
- return "查無原廠", "-", "❌ 皆以成分名開頭"
159
- except Exception as e: return "執行失敗", "-", str(e)
160
 
161
  def get_canada_originator(ingredient, page):
 
162
  try:
 
163
  page.goto("https://health-products.canada.ca/dpd-bdpp/index-eng.jsp", timeout=45000, wait_until="domcontentloaded")
164
  page.locator('input[id="activeIngredient"]').fill(ingredient)
165
  page.keyboard.press("Enter")
166
- page.wait_for_selector('table#results', timeout=15000)
167
 
168
- soup, all_cands = BeautifulSoup(page.content(), 'html.parser'), []
 
 
 
169
  table = soup.find('table', id='results')
170
- if not table or not table.find('tbody'): return "查無資料", "-", "❌ 表格未生成"
171
 
172
- for tr in table.find('tbody').find_all('tr'):
 
 
 
 
 
 
 
173
  tds = tr.find_all('td')
174
  if len(tds) >= 4:
175
  comp = tds[2].get_text(strip=True)
176
  if is_generic(tds[3].get_text(strip=True), comp, ingredient): continue
177
  m = re.search(r'\d+', tds[1].get_text(strip=True))
178
  if m and tds[1].find('a'):
179
- all_cands.append({"company": comp, "product": tds[3].get_text(strip=True), "din": int(m.group()), "url": "https://health-products.canada.ca" + tds[1].find('a')['href']})
180
 
181
- if not all_cands: return "查無原廠", "-", "❌ 剩餘皆為學名藥"
182
 
183
  all_cands = sorted(all_cands, key=lambda x: x['din'])
184
  orig_comp = all_cands[0]['company']
185
  brands = set([c['product'] for c in all_cands if c['company'] == orig_comp])
186
- return ", ".join(brands), orig_comp, f"✅ 依 DIN 鎖定最古老藥廠"
187
- except Exception as e: return "執行失敗", "-", str(e)
 
188
 
189
  def get_japan_originator(ing_ja, page):
190
- brands, companies = set(), set()
191
  try:
192
- # 🟢 修正:直接前往網頁,等待輸入框,輸入後按 Enter 提交
193
  page.goto("https://www.pmda.go.jp/PmdaSearch/iyakuSearch/", timeout=30000, wait_until="domcontentloaded")
194
- page.wait_for_selector('input#txtName', timeout=15000)
195
- page.fill('input#txtName', ing_ja)
196
- page.press('input#txtName', 'Enter')
197
 
198
- page.wait_for_selector('table#ResultList', timeout=15000)
199
- for tr in BeautifulSoup(page.content(), 'html.parser').find('table', id='ResultList').find_all('tr'):
200
- tds = tr.find_all('td')
201
- if len(tds) >= 3:
202
- title = tds[1].get_text(strip=True)
203
- if not is_generic(title, "", ing_ja):
204
- brands.add(clean_brand_name(title))
205
- companies.add(tds[2].get_text(separator=" ", strip=True).replace('製造販売元/', ''))
206
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
207
- return "查無原廠", "-", "❌ 皆為學名藥"
208
- except Exception as e: return "執行失敗", "-", str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  def get_switzerland_originator(ing_de, page):
211
- brands, companies = set(), set()
212
  try:
 
213
  page.goto(f"https://compendium.ch/search?q={ing_de}", timeout=30000)
214
- # 🟢 修正:Vue.js 需要時間渲染,改為等待網路請求靜止
 
215
  page.wait_for_load_state('networkidle', timeout=20000)
216
 
217
- for card in BeautifulSoup(page.content(), 'html.parser').find_all('div', class_=re.compile('medicament-card')):
 
 
 
 
218
  h3, strong = card.find('h3'), card.find('strong', class_='info')
219
  if h3 and strong:
220
  title, comp = h3.get_text(strip=True), strong.get_text(strip=True)
221
  if not is_generic(title, comp, ing_de):
222
  brands.add(clean_brand_name(title))
223
  companies.add(comp)
224
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
225
- return "查無原廠", "-", "❌ 查無資料"
226
- except Exception as e: return "執行失敗", "-", str(e)
227
 
228
  def get_germany_originator(ing_de, page):
229
- brands, companies = set(), set()
230
  try:
 
231
  page.goto(f"https://www.gelbe-liste.de/profi-suche/results?substance={quote(ing_de)}", timeout=30000)
232
  page.wait_for_selector('.product-list', timeout=15000)
233
- ul = BeautifulSoup(page.content(), 'html.parser').find('ul', class_='product-list')
 
 
234
  if ul:
235
- for li in ul.find_all('li'):
 
 
236
  h5, p_tag = li.find('h5'), li.find('p', class_='small')
237
  if h5:
238
  title, comp = h5.get_text(strip=True), (p_tag.get_text(strip=True) if p_tag else "-")
239
  if not is_generic(title, comp, ing_de):
240
  brands.add(clean_brand_name(title))
241
  companies.add(comp)
242
- if brands: return ", ".join(brands), ", ".join(companies), "✅ 成功"
243
- return "查無原廠", "-", "❌ 查無資料"
244
- except Exception as e: return "執行失敗", "-", str(e)
245
 
246
  # ==========================================
247
  # 🚀 主執行中樞
@@ -253,7 +350,7 @@ def run_all_ten_countries(ing_en, ing_ja_manual, ing_de_manual):
253
  ing_de = ing_de_manual if ing_de_manual else translate_lang(ing_en, 'de')
254
  results = []
255
 
256
- # 1. API 模組 (澳洲與瑞典已移至此處)
257
  usa_b, usa_c, usa_log = get_usa_originator(ing_en)
258
  results.append(["🇺🇸 美國 (FDA)", usa_b, usa_c, usa_log])
259
 
@@ -294,7 +391,7 @@ def run_all_ten_countries(ing_en, ing_ja_manual, ing_de_manual):
294
  # 🎨 UI 介面
295
  # ==========================================
296
  with gr.Blocks(title="十國原廠商品名智能檢索器") as demo:
297
- gr.Markdown("## 🌐 跨國原廠商品名檢索器 (十國無重複修復版)")
298
 
299
  with gr.Row():
300
  ing_en = gr.Textbox(label="🧪 英文成分名 (必填)", placeholder="例如: Semaglutide")
@@ -306,7 +403,7 @@ with gr.Blocks(title="十國原廠商品名智能檢索器") as demo:
306
  search_btn = gr.Button("🚀 啟動十國查詢", variant="primary")
307
 
308
  result_table = gr.Dataframe(
309
- headers=["國家", "🌟 判定為原廠的商品名", "🏭 藥廠名稱", "🛠️ 系統狀態"],
310
  datatype=["str", "str", "str", "str"],
311
  wrap=True, interactive=False
312
  )
 
11
  os.system("playwright install chromium")
12
 
13
  # ==========================================
14
+ # 🛠️ 共用工具:翻譯與進階學名藥濾網
15
  # ==========================================
16
  def translate_lang(text, target_lang):
17
  try:
 
22
  return text
23
 
24
  def is_generic(brand_name, company_name, ingredient):
25
+ """通用學名藥濾網:排除成分名開頭常見學名藥廠、以及歐洲平行輸入商"""
26
  b_lower = brand_name.lower()
27
  c_lower = company_name.lower()
28
  i_lower = ingredient.lower()
29
 
30
+ # 新增德國平行輸入商 (Reimporte) 與法國/歐洲常見學名藥廠
31
+ generic_keywords = [
32
+ 'sandoz', 'teva', 'apotex', 'ratiopharm', 'jamp', 'mint', 'pharmascience', 'sanis', 'sivem',
33
+ 'auro', 'glenmark', 'taro', 'marcan', 'nora', 'mantra', 'reddy', 'mepha', 'axapharm',
34
+ 'helvepharm', 'zentiva', 'spirig', 'aliud', 'puren', 'stada', 'eg ', '- gé', 'biogaran',
35
+ 'arrow', 'viatris', 'zydus', 'kohlpharma', 'eurim', 'abacus', 'emra', 'cc pharma',
36
+ 'orifarm', 'bb farma', 'fd pharma', 'mpa pharma', '1 4 u', '2care4', 'axicorp',
37
+ 'nattermann', '1 0 1 carefarm', 'haemato'
38
+ ]
39
 
40
  if b_lower.startswith(i_lower) or i_lower in b_lower: return True
41
  if any(gk in b_lower or gk in c_lower for gk in generic_keywords): return True
 
43
  return False
44
 
45
  def clean_brand_name(raw_name):
 
46
  return re.split(r'(皮下注|錠|カプセル|顆粒|シロップ|OD|細粒|液|\d+)', raw_name)[0].replace('®', '').strip()
47
 
48
  # ==========================================
 
52
  log, brands, companies = [], set(), set()
53
  try:
54
  session = curl_req.Session(impersonate="chrome120")
55
+ log.append("1. 發送 GET 請求獲取 FDA Session Cookies...")
56
+ res_get = session.get("https://www.accessdata.fda.gov/scripts/cder/ob/index.cfm", timeout=30, verify=False)
57
+ log.append(f" -> 狀態碼: {res_get.status_code}, 內容長度: {len(res_get.text)}")
58
+
59
+ log.append("2. 發送 POST 請求提交搜尋...")
60
  payload = {"drugname": ingredient, "discontinued": "RX,OTC,DISCN", "submit": "Search"}
61
  res = session.post("https://www.accessdata.fda.gov/scripts/cder/ob/search_product.cfm", data=payload, timeout=30, verify=False)
62
+ log.append(f" -> 狀態碼: {res.status_code}, 內容長度: {len(res.text)}")
63
+
64
  soup = BeautifulSoup(res.text, 'html.parser')
65
  table = soup.find('table', id='example')
66
  if table:
67
+ rows = table.find('tbody').find_all('tr') if table.find('tbody') else table.find_all('tr')
68
+ log.append(f"3. 找到結果表格,共 {len(rows)} 列資料。")
69
  headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
70
  brand_idx = next((i for i, h in enumerate(headers) if 'proprietary name' in h), 2)
71
  rld_idx = next((i for i, h in enumerate(headers) if 'rld' in h), 8)
72
  mfg_idx = next((i for i, h in enumerate(headers) if 'applicant holder' in h), 10)
73
+
74
+ for tr in rows:
75
  tds = tr.find_all('td')
76
  if len(tds) > max(rld_idx, brand_idx) and "RLD" in tds[rld_idx].get_text(strip=True).upper():
77
  brands.add(tds[brand_idx].get_text(strip=True))
78
  if len(tds) > mfg_idx: companies.add(tds[mfg_idx].get_text(strip=True))
79
+ else:
80
+ log.append("❌ HTML 中未發現 table#example,可能是無此藥被阻擋。")
81
+
82
+ if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
83
+ return "查無原廠", "-", "\n".join(log)
84
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
85
 
86
  def get_belgium_originator(ingredient):
87
+ """還原為您藥價工具中使用的 Test-Achats 後端 API 邏輯"""
88
  log, brands = [], set()
89
  try:
90
  session = curl_req.Session(impersonate="chrome120")
91
+ log.append("1. 連線至 Test-Achats 後端 API...")
92
  calc_id, ds = "5b55a713-dc8e-46bd-a82a-cea4641fb7b1", "c9852b22-1d58-4471-b8cc-a821763e821e"
93
  url = f"https://www.test-achats.be/CalculatorsAPI/Medicine/GetSearchResults/{calc_id}?datasource={ds}&term={quote(ingredient)}"
94
  res = session.get(url, headers={"X-Requested-With": "XMLHttpRequest"}, timeout=30, verify=False)
95
+ log.append(f" -> API 狀態碼: {res.status_code}")
96
+
97
+ exact_label = ingredient
98
  if res.status_code == 200:
99
+ json_data = res.json()
100
+ log.append(f" -> API 回傳 {len(json_data)} 筆候選項目")
101
+ for item in json_data:
102
  label = item.get('label', '')
103
+ if ingredient.lower() in label.lower():
104
+ exact_label = label
105
+ break
106
+
107
+ log.append(f"2. 使用標籤 '{exact_label}' 請求劑型資料 (/dosage)...")
108
+ post_headers = {"Content-Type": "application/x-www-form-urlencoded", "X-Requested-With": "XMLHttpRequest"}
109
+ res_dosage = session.post("https://www.test-achats.be/sante/maladies-et-medicaments/medicaments/calculateur/banque-de-donnees-medicaments/dosage", data=f"LabelValue={quote(exact_label)}", headers=post_headers, timeout=30, verify=False)
110
+ log.append(f" -> Dosage 狀態碼: {res_dosage.status_code}")
111
+
112
+ soup = BeautifulSoup(res_dosage.text, 'html.parser')
113
+ trs = soup.find_all('tr', attrs={'data-galenicform': True})
114
+ log.append(f"3. 找到 {len(trs)} 筆實際藥品資料。")
115
+
116
+ for tr in trs:
117
+ brand_tag = tr.find('a', class_='jq_detailPageLink') or tr.find('th', scope='row')
118
+ if brand_tag:
119
+ b_name = brand_tag.get_text(strip=True)
120
+ if not is_generic(b_name, "", ingredient):
121
+ brands.add(clean_brand_name(b_name))
122
+
123
+ if brands: return ", ".join(brands), "Test-Achats 無藥廠", "\n".join(log)
124
+ return "查無原廠", "-", "\n".join(log)
125
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
126
 
127
  def get_france_originator(ingredient):
128
+ log, brands = [], set()
129
  try:
130
  session = curl_req.Session(impersonate="chrome120")
131
+ log.append("1. 發送 GET 至法國公共資料庫...")
132
  res = session.get(f"https://base-donnees-publique.medicaments.gouv.fr/medicament/recherche/resultat?contains={quote(ingredient)}", timeout=30, verify=False)
133
+ log.append(f" -> 狀態碼: {res.status_code}, 內容長度: {len(res.text)}")
134
  soup = BeautifulSoup(res.text, 'html.parser')
135
 
136
+ divs = soup.find_all('div', class_='result drug')
137
+ log.append(f"2. 找到 {len(divs)} 'result drug' 區塊。")
138
+ for div in divs:
139
  info_div = div.find('div', class_='infos')
140
  if info_div and info_div.find('a'):
141
  title = info_div.find('a').get_text(strip=True)
142
  if not is_generic(title, "", ingredient) and '- gé' not in title.lower():
143
  brands.add(clean_brand_name(title))
144
 
145
+ if brands: return ", ".join(brands), "法國系統無藥廠", "\n".join(log)
146
+ return "查無原廠", "-", "\n".join(log)
147
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
148
 
149
  def get_australia_originator(ingredient):
150
+ log, brands = [], set()
151
  try:
152
  session = curl_req.Session(impersonate="chrome120")
153
+ log.append("1. 發送 GET 至澳洲 TGA...")
154
  res = session.get(f"https://www.tga.gov.au/resources/artg?keywords={ingredient}", timeout=30, verify=False)
155
+ log.append(f" -> 狀態碼: {res.status_code}, 內容長度: {len(res.text)}")
156
  soup = BeautifulSoup(res.text, 'html.parser')
157
+
158
+ articles = soup.find_all('article', class_='node--artg')
159
+ log.append(f"2. 找到 {len(articles)} 筆 ARTG 紀錄。")
160
  cands = []
161
+ for article in articles:
162
  title_tag, time_tag = article.find('h3'), article.find('time')
163
  if title_tag and time_tag:
164
  full_t = title_tag.get_text(strip=True)
 
169
  cands.append({"brand": brand, "date": time_tag.get('datetime')})
170
  if cands:
171
  cands = sorted(cands, key=lambda x: x['date'])
172
+ log.append(f"✅ 依日期排序鎖定最早註冊: {cands[0]['date'][:10]}")
173
+ return cands[0]['brand'], "TGA資料庫", "\n".join(log)
174
+ return "查無原廠", "-", "\n".join(log)
175
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
176
 
177
  def get_sweden_originator(ingredient):
178
+ log, brands, companies = [], set(), set()
179
  try:
180
  session = curl_req.Session(impersonate="chrome120")
181
+ log.append("1. 發送 GET 至瑞典 FASS...")
182
  res = session.get(f"https://www.fass.se/LIF/search?query={quote(ingredient)}", timeout=30, verify=False)
183
+ log.append(f" -> 狀態碼: {res.status_code}, 內容長度: {len(res.text)}")
184
  soup = BeautifulSoup(res.text, 'html.parser')
185
+
186
+ items = soup.find_all('li', class_=re.compile(r'search-result-item'))
187
+ log.append(f"2. 找到 {len(items)} 筆搜尋結果。")
188
+ for item in items:
189
  title_tag, comp_tag = item.find('a', class_='product-name'), item.find('span', class_='company-name')
190
  if title_tag and comp_tag:
191
  title, comp = title_tag.get_text(strip=True), comp_tag.get_text(strip=True)
192
  if not is_generic(title, comp, ingredient):
193
  brands.add(clean_brand_name(title))
194
  companies.add(comp)
195
+ if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
196
+ return "查無原廠", "-", "\n".join(log)
197
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
198
 
199
  # ==========================================
200
  # 🚀 模組 B:使用 Playwright 抓取 (英、加、日、德、瑞)
201
  # ==========================================
202
  def get_uk_originator(ingredient, page):
203
+ log, brands, companies = [], set(), set()
204
  try:
205
+ log.append("1. 前往 eMC...")
206
  page.goto(f"https://www.medicines.org.uk/emc/search?q={ingredient}", timeout=30000)
207
  page.wait_for_selector('.search-results-product-info-title-link', timeout=15000)
208
  soup = BeautifulSoup(page.content(), 'html.parser')
209
+ links = soup.find_all('a', class_='search-results-product-info-title-link')
210
+ log.append(f"2. 找到 {len(links)} 筆標題,開始篩選。")
211
+ for link in links:
212
  title = link.get_text(strip=True)
213
  if not title.lower().startswith(ingredient.lower()):
214
  brands.add(clean_brand_name(title))
215
  p_div = link.find_parent(class_='search-results-product-info')
216
  if p_div and p_div.find(class_='search-results-product-info-company'):
217
  companies.add(p_div.find(class_='search-results-product-info-company').get_text(strip=True))
218
+ if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
219
+ return "查無原廠", "-", "\n".join(log)
220
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
221
 
222
  def get_canada_originator(ingredient, page):
223
+ log = []
224
  try:
225
+ log.append("1. 前往 Canada DPD...")
226
  page.goto("https://health-products.canada.ca/dpd-bdpp/index-eng.jsp", timeout=45000, wait_until="domcontentloaded")
227
  page.locator('input[id="activeIngredient"]').fill(ingredient)
228
  page.keyboard.press("Enter")
 
229
 
230
+ log.append("2. 等待結果表格或查無資料訊息...")
231
+ # 🟢 修正:同時等待 表格 或 錯誤提示,避免超時
232
+ page.wait_for_selector('table#results, .alert-info, .alert-warning', timeout=15000)
233
+ soup = BeautifulSoup(page.content(), 'html.parser')
234
  table = soup.find('table', id='results')
 
235
 
236
+ if not table or not table.find('tbody'):
237
+ log.append("❌ 畫面上未出現表格,可能查無此藥。")
238
+ return "查無資料", "-", "\n".join(log)
239
+
240
+ rows = table.find('tbody').find_all('tr')
241
+ log.append(f"3. 找到 {len(rows)} 筆資料,進行黑名單過濾。")
242
+ all_cands = []
243
+ for tr in rows:
244
  tds = tr.find_all('td')
245
  if len(tds) >= 4:
246
  comp = tds[2].get_text(strip=True)
247
  if is_generic(tds[3].get_text(strip=True), comp, ingredient): continue
248
  m = re.search(r'\d+', tds[1].get_text(strip=True))
249
  if m and tds[1].find('a'):
250
+ all_cands.append({"company": comp, "product": tds[3].get_text(strip=True), "din": int(m.group())})
251
 
252
+ if not all_cands: return "查無原廠", "-", "\n".join(log) + "\n❌ 剩餘皆為學名藥"
253
 
254
  all_cands = sorted(all_cands, key=lambda x: x['din'])
255
  orig_comp = all_cands[0]['company']
256
  brands = set([c['product'] for c in all_cands if c['company'] == orig_comp])
257
+ log.append(f"✅ 依 DIN 鎖定最古老藥廠: {orig_comp}")
258
+ return ", ".join(brands), orig_comp, "\n".join(log)
259
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
260
 
261
  def get_japan_originator(ing_ja, page):
262
+ log, brands, companies = [], set(), set()
263
  try:
264
+ log.append(f"1. 前往 PMDA (搜尋: {ing_ja})...")
265
  page.goto("https://www.pmda.go.jp/PmdaSearch/iyakuSearch/", timeout=30000, wait_until="domcontentloaded")
 
 
 
266
 
267
+ # 尋找輸入框並按 Enter
268
+ page.locator('input#txtName, input[name="nameWord"], input[name="general_name"]').first.fill(ing_ja)
269
+ page.keyboard.press("Enter")
270
+
271
+ log.append("2. 等待表格 ResultList 或 錯誤訊息...")
272
+ # 🟢 修正:同時等待 表格 或 錯誤訊息,防止超時
273
+ page.wait_for_selector('table#ResultList, .errormsg, .non-result', timeout=15000)
274
+ log.append(f" -> 跳轉後網址: {page.url}")
275
+
276
+ soup = BeautifulSoup(page.content(), 'html.parser')
277
+ table = soup.find('table', id='ResultList')
278
+
279
+ if table:
280
+ rows = table.find_all('tr')
281
+ log.append(f"3. 找到表格,共 {len(rows)} 列。")
282
+ for tr in rows:
283
+ tds = tr.find_all('td')
284
+ if len(tds) >= 3:
285
+ title = tds[1].get_text(strip=True)
286
+ if not is_generic(title, "", ing_ja):
287
+ brands.add(clean_brand_name(title))
288
+ companies.add(tds[2].get_text(separator=" ", strip=True).replace('製造販売元/', ''))
289
+ else:
290
+ log.append("❌ 畫面上未出現 ResultList,查無此藥。")
291
+
292
+ if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
293
+ return "查無原廠", "-", "\n".join(log)
294
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
295
 
296
  def get_switzerland_originator(ing_de, page):
297
+ log, brands, companies = [], set(), set()
298
  try:
299
+ log.append(f"1. 前往 Compendium (搜尋: {ing_de})...")
300
  page.goto(f"https://compendium.ch/search?q={ing_de}", timeout=30000)
301
+
302
+ log.append("2. 等待網路請求完成 (networkidle)...")
303
  page.wait_for_load_state('networkidle', timeout=20000)
304
 
305
+ soup = BeautifulSoup(page.content(), 'html.parser')
306
+ cards = soup.find_all('div', class_=re.compile('medicament-card'))
307
+ log.append(f"3. 畫面渲染完成,找到 {len(cards)} 筆卡片。")
308
+
309
+ for card in cards:
310
  h3, strong = card.find('h3'), card.find('strong', class_='info')
311
  if h3 and strong:
312
  title, comp = h3.get_text(strip=True), strong.get_text(strip=True)
313
  if not is_generic(title, comp, ing_de):
314
  brands.add(clean_brand_name(title))
315
  companies.add(comp)
316
+ if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
317
+ return "查無原廠", "-", "\n".join(log) + "\n❌ 查無資料或全為學名藥"
318
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
319
 
320
  def get_germany_originator(ing_de, page):
321
+ log, brands, companies = [], set(), set()
322
  try:
323
+ log.append(f"1. 前往 Gelbe Liste (搜尋: {ing_de})...")
324
  page.goto(f"https://www.gelbe-liste.de/profi-suche/results?substance={quote(ing_de)}", timeout=30000)
325
  page.wait_for_selector('.product-list', timeout=15000)
326
+
327
+ soup = BeautifulSoup(page.content(), 'html.parser')
328
+ ul = soup.find('ul', class_='product-list')
329
  if ul:
330
+ lis = ul.find_all('li')
331
+ log.append(f"2. 找到 {len(lis)} 筆結果,過濾平行輸入商。")
332
+ for li in lis:
333
  h5, p_tag = li.find('h5'), li.find('p', class_='small')
334
  if h5:
335
  title, comp = h5.get_text(strip=True), (p_tag.get_text(strip=True) if p_tag else "-")
336
  if not is_generic(title, comp, ing_de):
337
  brands.add(clean_brand_name(title))
338
  companies.add(comp)
339
+ if brands: return ", ".join(brands), ", ".join(companies), "\n".join(log)
340
+ return "查無原廠", "-", "\n".join(log) + "\n❌ 查無資料"
341
+ except Exception as e: return "執行失敗", "-", "\n".join(log) + f"\n錯誤: {str(e)}"
342
 
343
  # ==========================================
344
  # 🚀 主執行中樞
 
350
  ing_de = ing_de_manual if ing_de_manual else translate_lang(ing_en, 'de')
351
  results = []
352
 
353
+ # 1. API 模組 (不受瀏覽器阻擋)
354
  usa_b, usa_c, usa_log = get_usa_originator(ing_en)
355
  results.append(["🇺🇸 美國 (FDA)", usa_b, usa_c, usa_log])
356
 
 
391
  # 🎨 UI 介面
392
  # ==========================================
393
  with gr.Blocks(title="十國原廠商品名智能檢索器") as demo:
394
+ gr.Markdown("## 🌐 跨國原廠商品名檢索器 (高階診斷版)")
395
 
396
  with gr.Row():
397
  ing_en = gr.Textbox(label="🧪 英文成分名 (必填)", placeholder="例如: Semaglutide")
 
403
  search_btn = gr.Button("🚀 啟動十國查詢", variant="primary")
404
 
405
  result_table = gr.Dataframe(
406
+ headers=["國家", "🌟 判定為原廠的商品名", "🏭 藥廠名稱", "🛠️ 系統狀態與詳細日誌"],
407
  datatype=["str", "str", "str", "str"],
408
  wrap=True, interactive=False
409
  )