import os from urllib.parse import quote_plus, urljoin import httpx from bs4 import BeautifulSoup _PARAM_CACHE: dict[str, tuple[str, str, str]] = {} def _discover_params(base: str) -> tuple[str, str, str]: if base in _PARAM_CACHE: return _PARAM_CACHE[base] cas_param = os.getenv("FEMA_CAS_PARAM", "field_cas_tid_1") name_param = os.getenv("FEMA_NAME_PARAM", "field_chemical_synonym_tid") action = os.getenv("FEMA_FORM_ACTION", base) try: r = httpx.get(base, timeout=15, headers={"User-Agent": "Mozilla/5.0"}) if r.status_code < 400: soup = BeautifulSoup(r.text, "lxml") form = soup.find("form") if form and form.get("action"): action = urljoin(base, form.get("action")) inputs = soup.find_all("input") for inp in inputs: name = (inp.get("name") or "").strip() if not name: continue placeholder = (inp.get("placeholder") or "").lower() lower_name = name.lower() if "cas" in placeholder or lower_name == "cas" or "cas" in lower_name: cas_param = name if ( "synonym" in placeholder or "chemical" in placeholder or "synonym" in lower_name or "chemical" in lower_name ): name_param = name except Exception: pass _PARAM_CACHE[base] = (cas_param, name_param, action) return cas_param, name_param, action def fema_link(cas_or_query: str, name_query: str | None = None) -> dict: """Build the FEMA / Fragrance Materials Safety Resource search URL. Production uses Elsevier's Fragrance Materials Safety Resource with CAS query params. """ q = (cas_or_query or "").strip() name_q = (name_query or "").strip() if not q and not name_q: return {"ok": False, "error": "Empty query"} # NOTE: domain spelling matters; the older '...materialssafety...' variant often 404s. base = os.getenv("FEMA_BASE_URL", "https://fragrancematerialsafetyresource.elsevier.com/") cas_param, name_param, action = _discover_params(base) cas_value = quote_plus(q) if q else "" name_value = quote_plus(name_q or q) cas_url = f"{action}?{cas_param}={cas_value}&{name_param}=" if cas_value else "" name_url = f"{action}?{cas_param}=&{name_param}={name_value}" if name_value else "" combo_url = ( f"{action}?{cas_param}={cas_value}&{name_param}={name_value}" if cas_value and name_value else "" ) # Generic search fallback (some deployments ignore filter params) search_term = name_q or q search_url = f"{base}search/node?keys={quote_plus(search_term)}" if search_term else "" search_api_url = ( f"{base}search/node?search_api_fulltext={quote_plus(search_term)}" if search_term else "" ) return { "ok": True, "cas_url": cas_url, "name_url": name_url, "combo_url": combo_url, "alt_url": search_url, "search_api_url": search_api_url, }