hchevva's picture
Upload 4 files
02835d5 verified
import os
from urllib.parse import quote_plus, urljoin
import httpx
from bs4 import BeautifulSoup
_PARAM_CACHE: dict[str, tuple[str, str, str]] = {}
def _discover_params(base: str) -> tuple[str, str, str]:
if base in _PARAM_CACHE:
return _PARAM_CACHE[base]
cas_param = os.getenv("FEMA_CAS_PARAM", "field_cas_tid_1")
name_param = os.getenv("FEMA_NAME_PARAM", "field_chemical_synonym_tid")
action = os.getenv("FEMA_FORM_ACTION", base)
try:
r = httpx.get(base, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
if r.status_code < 400:
soup = BeautifulSoup(r.text, "lxml")
form = soup.find("form")
if form and form.get("action"):
action = urljoin(base, form.get("action"))
inputs = soup.find_all("input")
for inp in inputs:
name = (inp.get("name") or "").strip()
if not name:
continue
placeholder = (inp.get("placeholder") or "").lower()
lower_name = name.lower()
if "cas" in placeholder or lower_name == "cas" or "cas" in lower_name:
cas_param = name
if (
"synonym" in placeholder
or "chemical" in placeholder
or "synonym" in lower_name
or "chemical" in lower_name
):
name_param = name
except Exception:
pass
_PARAM_CACHE[base] = (cas_param, name_param, action)
return cas_param, name_param, action
def fema_link(cas_or_query: str, name_query: str | None = None) -> dict:
"""Build the FEMA / Fragrance Materials Safety Resource search URL.
Production uses Elsevier's Fragrance Materials Safety Resource with CAS query params.
"""
q = (cas_or_query or "").strip()
name_q = (name_query or "").strip()
if not q and not name_q:
return {"ok": False, "error": "Empty query"}
# NOTE: domain spelling matters; the older '...materialssafety...' variant often 404s.
base = os.getenv("FEMA_BASE_URL", "https://fragrancematerialsafetyresource.elsevier.com/")
cas_param, name_param, action = _discover_params(base)
cas_value = quote_plus(q) if q else ""
name_value = quote_plus(name_q or q)
cas_url = f"{action}?{cas_param}={cas_value}&{name_param}=" if cas_value else ""
name_url = f"{action}?{cas_param}=&{name_param}={name_value}" if name_value else ""
combo_url = (
f"{action}?{cas_param}={cas_value}&{name_param}={name_value}"
if cas_value and name_value
else ""
)
# Generic search fallback (some deployments ignore filter params)
search_term = name_q or q
search_url = f"{base}search/node?keys={quote_plus(search_term)}" if search_term else ""
search_api_url = (
f"{base}search/node?search_api_fulltext={quote_plus(search_term)}" if search_term else ""
)
return {
"ok": True,
"cas_url": cas_url,
"name_url": name_url,
"combo_url": combo_url,
"alt_url": search_url,
"search_api_url": search_api_url,
}