Spaces:

hchevva
/

TOXRA.AI

Sleeping

App Files Files Community

hchevva commited on Feb 1

Commit

426090f

verified ·

1 Parent(s): e1f40f0

Upload 5 files

Browse files

Files changed (5) hide show

core/sources/ctx.py +128 -96
core/sources/fema.py +14 -12
core/sources/iarc.py +4 -9
core/sources/ntp.py +115 -65
core/sources/pubchem.py +167 -172

core/sources/ctx.py CHANGED Viewed

@@ -1,114 +1,146 @@
-# core/sources/ctx.py
-from __future__ import annotations
 import re
-from typing import Any, Dict, Optional
 from urllib.parse import quote
-DTXSID_RE = re.compile(r"DTXSID\d+", re.IGNORECASE)
-async def resolve_dtxsid_from_dashboard(search: str, http) -> Optional[str]:
-    search = (search or "").strip()
-    if not search:
         return None
-    url = f"https://comptox.epa.gov/dashboard/dsstoxdb/results?search={quote(search, safe='')}"
-    r = await http.get(url, follow_redirects=True, timeout=30.0)
-    if r.status_code != 200:
-        return None
-    m = DTXSID_RE.search(r.text)
-    return m.group(0).upper() if m else None
-async def _try_json(http, url: str) -> Optional[Dict[str, Any]]:
-    try:
-        r = await http.get(url, follow_redirects=True, timeout=30.0)
-        if r.status_code != 200:
-            return None
-        return r.json()
-    except Exception:
-        return None
-def _extract_counts(raw: Any) -> Dict[str, Any]:
-    """
-    Best-effort mapping to production-like fields:
-    Reports: + / - / other ; Ames ; MN (Micronucleus)
     """
-    out = {"pos": None, "neg": None, "other": None, "ames": None, "mn": None}
-    if not isinstance(raw, (dict, list)):
-        return out
-    # Search for common keys in whatever JSON we get back.
-    def scan(obj):
-        if isinstance(obj, dict):
-            for k, v in obj.items():
-                lk = str(k).lower()
-                if lk in ("positive", "pos", "positivecount", "reportpositive", "positivereports"):
-                    if isinstance(v, int):
-                        out["pos"] = v
-                if lk in ("negative", "neg", "negativecount", "reportnegative", "negativereports"):
-                    if isinstance(v, int):
-                        out["neg"] = v
-                if lk in ("other", "othercount", "unknown", "uncertain"):
-                    if isinstance(v, int):
-                        out["other"] = v
-                if "ames" in lk and out["ames"] is None:
-                    if isinstance(v, (str, bool, int)):
-                        out["ames"] = v
-                if ("micronucleus" in lk or lk == "mn") and out["mn"] is None:
-                    if isinstance(v, (str, bool, int)):
-                        out["mn"] = v
-                scan(v)
-        elif isinstance(obj, list):
-            for it in obj:
-                scan(it)
-    scan(raw)
-    return out
-async def fetch_ctx_genetox(cas: str, http) -> Dict[str, Any]:
-    cas = (cas or "").strip()
-    resolve_url = f"https://comptox.epa.gov/dashboard/dsstoxdb/results?search={quote(cas, safe='')}"
-    dtxsid = await resolve_dtxsid_from_dashboard(cas, http)
     if not dtxsid:
         return {
-            "ok": True,
-            "dtxsid": None,
-            "message": "No DTXSID found for this query.",
-            "resolveUrl": resolve_url,
-            "raw": None,
         }
-    # Best-effort: CompTox APIs vary; try a few common patterns.
-    candidates = [
-        f"https://comptox.epa.gov/dashboard/api/genetox?dtxsid={quote(dtxsid, safe='')}",
-        f"https://comptox.epa.gov/dashboard/api/genetox/{quote(dtxsid, safe='')}",
-        f"https://comptox.epa.gov/dashboard/api/assay/genetox?dtxsid={quote(dtxsid, safe='')}",
-        f"https://comptox.epa.gov/dashboard/api/chemical/{quote(dtxsid, safe='')}",
-    ]
-    raw = None
-    used = None
-    for u in candidates:
-        j = await _try_json(http, u)
-        if j is not None:
-            raw = j
-            used = u
-            break
-    counts = _extract_counts(raw) if raw is not None else {"pos": None, "neg": None, "other": None, "ames": None, "mn": None}
-    return {
-        "ok": True,
-        "dtxsid": dtxsid,
-        "resolveUrl": resolve_url,
-        "apiUrl": used,
-        "counts": counts,
-        "raw": raw,
-    }

+import os
 import re
+from typing import Any, Dict, List, Optional
 from urllib.parse import quote
+import httpx
+# Matches production worker default
+CTX_BASE_URL = os.getenv("CTX_BASE_URL", "https://comptox.epa.gov/ctx-api")
+CTX_API_KEY = os.getenv("CTX_API_KEY") or os.getenv("COMPTOX_API_KEY") or os.getenv("CTX_KEY")
+CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
+def is_cas(s: str) -> bool:
+    return bool(CAS_RE.match((s or "").strip()))
+def _pick_dtxsid(rows: List[Any]) -> Optional[str]:
+    for r in rows or []:
+        if not isinstance(r, dict):
+            continue
+        id_ = (
+            r.get("dtxsid")
+            or r.get("DTXSID")
+            or r.get("dtxSid")
+            or (r.get("identifier") or {}).get("dtxsid")
+            or (r.get("chemical") or {}).get("dtxsid")
+            or r.get("DTXSIDv2")
+            or r.get("dtxsidv2")
+        )
+        if id_:
+            return str(id_).strip()
+    return None
+def _as_rows(data: Any) -> List[Any]:
+    if isinstance(data, list):
+        return data
+    if isinstance(data, dict):
+        for key in ("data", "results", "items"):
+            v = data.get(key)
+            if isinstance(v, list):
+                return v
+    return []
+async def _ctx_get(path: str, http: httpx.AsyncClient, params: Dict[str, Any] | None = None) -> Any:
+    url = CTX_BASE_URL.rstrip("/") + path
+    headers = {"accept": "application/json"}
+    if CTX_API_KEY:
+        headers["x-api-key"] = CTX_API_KEY
+    r = await http.get(url, params=params, headers=headers, timeout=25.0, follow_redirects=True)
+    r.raise_for_status()
+    # Some endpoints return JSON but with text/plain content-type
+    try:
+        return r.json()
+    except Exception:
+        return {"raw": r.text}
+async def resolve_dtxsid(query: str, http: httpx.AsyncClient) -> Optional[str]:
+    q = (query or "").strip()
+    if not q:
         return None
+    chem_tries = [
+        ("/chemical/search", {"casrn": q}),
+        ("/chemical/search", {"name": q}),
+        (f"/chemical/search/by-cas/{quote(q)}", None),
+        (f"/chemical/search/by-name/{quote(q)}", None),
+    ]
+    for path, params in chem_tries:
+        try:
+            data = await _ctx_get(path, http, params=params)
+            rows = _as_rows(data)
+            dtxsid = _pick_dtxsid(rows)
+            if dtxsid:
+                return dtxsid
+        except Exception:
+            pass
+    haz_tries = [
+        ("/hazard/genetox/summary/search", {"name": q}),
+        (f"/hazard/genetox/summary/search/by-name/{quote(q)}", None),
+    ]
+    for path, params in haz_tries:
+        try:
+            data = await _ctx_get(path, http, params=params)
+            rows = _as_rows(data)
+            dtxsid = _pick_dtxsid(rows)
+            if dtxsid:
+                return dtxsid
+        except Exception:
+            pass
+    return None
+def dashboard_search_url(query: str) -> str:
+    q = quote((query or "").strip())
+    return f"https://comptox.epa.gov/dashboard/chemical/search?query={q}"
+def dashboard_details_url(dtxsid: str) -> str:
+    return f"https://comptox.epa.gov/dashboard/chemical/details/{quote((dtxsid or '').strip())}"
+async def fetch_ctx_genetox(cas_or_query: str, http: httpx.AsyncClient) -> Dict[str, Any]:
+    """Fetch Genetox summary from EPA CompTox (CTX) similar to production Worker.
+    Returns:
+      { ok, dtxsid, summary, dashboard_url }
     """
+    q = (cas_or_query or "").strip()
+    if not q:
+        return {"ok": False, "error": "Empty query"}
+    dtxsid = await resolve_dtxsid(q, http)
     if not dtxsid:
         return {
+            "ok": False,
+            "error": "No DTXSID found for this query.",
+            "dashboard_search": dashboard_search_url(q),
         }
+    try:
+        summary = await _ctx_get(
+            f"/hazard/genetox/summary/search/by-dtxsid/{quote(dtxsid)}", http
+        )
+        return {
+            "ok": True,
+            "dtxsid": dtxsid,
+            "summary": summary,
+            "dashboard_url": dashboard_details_url(dtxsid),
+        }
+    except Exception as e:
+        return {
+            "ok": False,
+            "dtxsid": dtxsid,
+            "error": f"CTX genetox summary fetch failed: {e}",
+            "dashboard_url": dashboard_details_url(dtxsid),
+        }

core/sources/fema.py CHANGED Viewed

@@ -1,15 +1,17 @@
-# core/sources/fema.py
-from __future__ import annotations
-from urllib.parse import quote
-def fema_link(query: str):
-    q = (query or "").strip()
-    return {
-        "ok": True,
-        "url": (
-            "https://fragrancematerialssafetyresource.elsevier.com/"
-            f"?field_cas_tid_1={quote(q, safe='')}&field_chemical_synonym_tid="
-        ),
-    }

+from urllib.parse import quote_plus
+def fema_link(cas_or_query: str) -> dict:
+    """Build the FEMA / Fragrance Materials Safety Resource search URL.
+    Production uses Elsevier's Fragrance Materials Safety Resource with CAS query params.
+    """
+    q = (cas_or_query or "").strip()
+    if not q:
+        return {"ok": False, "error": "Empty query"}
+    # NOTE: domain spelling matters; the older '...materialssafety...' variant often 404s.
+    base = "https://fragrancematerialsafetyresource.elsevier.com/"
+    url = f"{base}?field_cas_tid_1={quote_plus(q)}&field_chemical_synonym_tid="
+    return {"ok": True, "url": url}

core/sources/iarc.py CHANGED Viewed

@@ -1,13 +1,8 @@
-# core/sources/iarc.py
 from __future__ import annotations
 from urllib.parse import quote
-def bookshelf_link(query: str):
     q = (query or "").strip()
-    return {
-        "ok": True,
-        "label": f'IARC Monographs — results for "{q}" (Bookshelf)',
-        "url": f"https://www.ncbi.nlm.nih.gov/books/?term={quote(q, safe='')}",
-    }

 from __future__ import annotations
 from urllib.parse import quote
+import httpx
+def bookshelf_link(query: str) -> dict:
     q = (query or "").strip()
+    url = "https://www.ncbi.nlm.nih.gov/books/?term=" + quote(f'{q} "IARC Monographs"', safe="")
+    return {"ok": True, "results": [{"title": f'IARC Monographs — results for “{q}” (Bookshelf)', "url": url}]}

core/sources/ntp.py CHANGED Viewed

@@ -1,83 +1,133 @@
-# core/sources/ntp.py
-from __future__ import annotations
 import re
-from typing import Any, Dict, List
-from urllib.parse import quote
-TR_RE = re.compile(r"\bTR[-\s]?(\d{2,4})\b", re.IGNORECASE)
-URL_RE = re.compile(r'href="([^"]+)"', re.IGNORECASE)
-def _abs(url: str) -> str:
-    if url.startswith("http"):
-        return url
-    return "https://ntp.niehs.nih.gov" + (url if url.startswith("/") else "/" + url)
-async def _get_text(http, url: str) -> str:
-    r = await http.get(url, follow_redirects=True, timeout=30.0)
-    return r.text if r.status_code == 200 else ""
-def _extract_tr_hits(html: str, cas: str) -> List[Dict[str, Any]]:
-    """
-    Parse search result HTML and keep only entries where CAS appears near the TR listing.
-    This is best-effort but enforces: ONLY TR hits for that CAS.
-    """
-    cas = (cas or "").strip()
-    hits: List[Dict[str, Any]] = []
-    # crude block split: many NTP pages separate results into <article> or <div class="search-result">
-    blocks = re.split(r"(<article\b|<div[^>]+search[^>]*>)", html, flags=re.IGNORECASE)
-    if len(blocks) <= 1:
-        blocks = [html]
-    for b in blocks:
-        if cas and cas not in b:
             continue
-        urls = URL_RE.findall(b)
-        tr_nums = TR_RE.findall(b)
-        if not tr_nums:
             continue
-        # pick a likely report page URL
-        report_url = None
-        for u in urls:
-            au = _abs(u)
-            if "/publications/reports/tr/" in au or "/reports/tr/" in au:
-                report_url = au
-                break
-        # title: best-effort from <a> text
-        title = re.sub(r"\s+", " ", re.sub(r"<[^>]+>", " ", b)).strip()
-        title = title[:220]
-        tr_label = f"TR-{tr_nums[0]}"
-        hits.append({"tr": tr_label, "title": title, "url": report_url})
-    # de-dupe by TR
     seen = set()
-    out = []
-    for h in hits:
-        if h["tr"] in seen:
-            continue
-        seen.add(h["tr"])
-        out.append(h)
-    return out
-async def search_technical_reports(cas: str, http, limit: int = 8) -> Dict[str, Any]:
-    cas = (cas or "").strip()
-    if not cas:
-        return {"ok": False, "error": "Missing CAS.", "items": []}
-    # Use NTP site search; we filter to TR + exact CAS presence in result blocks.
-    search_url = f"https://ntp.niehs.nih.gov/search?query={quote(cas, safe='')}"
-    html = await _get_text(http, search_url)
-    items = _extract_tr_hits(html, cas)[: max(1, int(limit or 8))]
-    return {"ok": True, "query": cas, "searchUrl": search_url, "items": items}

+import html
 import re
+from typing import Any, Dict, List, Optional
+from urllib.parse import urljoin
+import httpx
+REPORTS_URL = "https://ntp.niehs.nih.gov/publications/reports"
+BASE = "https://ntp.niehs.nih.gov"
+TR_RE = re.compile(r"\bTR-?(\d{3,4})\b", re.IGNORECASE)
+HREF_RE = re.compile(r"href=[\"']([^\"']+)[\"']", re.IGNORECASE)
+TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.IGNORECASE | re.DOTALL)
+def _strip_tags(html_text: str) -> str:
+    # crude but robust enough for the NTP index page
+    text = re.sub(r"<script[\s\S]*?</script>", " ", html_text, flags=re.IGNORECASE)
+    text = re.sub(r"<style[\s\S]*?</style>", " ", text, flags=re.IGNORECASE)
+    text = re.sub(r"<[^>]+>", " ", text)
+    text = html.unescape(text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+def _extract_title(page_html: str) -> str:
+    m = TITLE_RE.search(page_html or "")
+    if not m:
+        return ""
+    t = html.unescape(m.group(1))
+    t = re.sub(r"\s+", " ", t).strip()
+    # common suffix
+    t = re.sub(r"\s*\|\s*NTP.*$", "", t, flags=re.IGNORECASE).strip()
+    return t
+def _extract_pdf_url(page_html: str, page_url: str) -> Optional[str]:
+    # Look for any href ending in .pdf
+    hrefs = HREF_RE.findall(page_html or "")
+    for href in hrefs:
+        if ".pdf" not in href.lower():
             continue
+        if href.startswith("#"):
             continue
+        return urljoin(page_url, href)
+    return None
+async def _fetch_tr_page(num: str, http: httpx.AsyncClient) -> Optional[Dict[str, Any]]:
+    page_url = f"{BASE}/publications/reports/tr{num}"
+    try:
+        r = await http.get(page_url, timeout=25, follow_redirects=True)
+        if r.status_code >= 400:
+            return None
+        page_html = r.text
+    except Exception:
+        return None
+    title = _extract_title(page_html)
+    pdf_url = _extract_pdf_url(page_html, str(r.url))
+    # Try to find a year in the title
+    year = None
+    if title:
+        years = re.findall(r"\b(19\d{2}|20\d{2})\b", title)
+        if years:
+            year = years[-1]
+    item = {
+        "num": num,
+        "tr": f"TR-{num}",
+        "report_page": str(r.url),
+        "title": title,
+        "year": year,
+        "pdf": pdf_url,
+    }
+    return item
+async def search_technical_reports(query: str, http: httpx.AsyncClient, limit: int = 8) -> Dict[str, Any]:
+    """Search NTP Technical Reports and return ONLY TR hits relevant to the query.
+    Implementation mirrors production (Cloudflare worker):
+    - download the NTP reports index HTML
+    - locate TR-### occurrences
+    - keep a TR if the query appears in the surrounding neighborhood text
+    - fetch each TR page to obtain report page + PDF
+    """
+    q = (query or "").strip()
+    if not q:
+        return {"ok": False, "error": "Empty query", "items": []}
+    try:
+        r = await http.get(REPORTS_URL, timeout=25, follow_redirects=True)
+        r.raise_for_status()
+        index_html = r.text
+    except Exception as e:
+        return {"ok": False, "error": f"Failed to fetch NTP index: {e}", "items": []}
+    plain = _strip_tags(index_html)
+    q_low = q.lower()
+    nums: List[str] = []
     seen = set()
+    for m in TR_RE.finditer(plain):
+        num = m.group(1)
+        # neighborhood window similar to production
+        start = max(0, m.start() - 250)
+        end = min(len(plain), m.end() + 250)
+        neighborhood = plain[start:end].lower()
+        if q_low not in neighborhood:
+            continue
+        if num in seen:
+            continue
+        seen.add(num)
+        nums.append(num)
+        if len(nums) >= max(1, int(limit)):
+            break
+    if not nums:
+        return {"ok": True, "query": q, "items": []}
+    items: List[Dict[str, Any]] = []
+    for num in nums:
+        item = await _fetch_tr_page(num, http)
+        if item:
+            items.append(item)
+        if len(items) >= max(1, int(limit)):
+            break
+    return {"ok": True, "query": q, "items": items}

core/sources/pubchem.py CHANGED Viewed

@@ -1,13 +1,12 @@
-# core/sources/pubchem.py
-from __future__ import annotations
 import re
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote
-PUBCHEM = "https://pubchem.ncbi.nlm.nih.gov"
-PUG = f"{PUBCHEM}/rest/pug"
-PUG_VIEW = f"{PUBCHEM}/rest/pug_view"
 CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
@@ -16,199 +15,195 @@ def is_cas(s: str) -> bool:
     return bool(CAS_RE.match((s or "").strip()))
-async def _get_json(http, url: str) -> Dict[str, Any]:
-    try:
-        r = await http.get(url, follow_redirects=True, timeout=30.0)
-        if r.status_code != 200:
-            return {"ok": False, "status": r.status_code, "url": url, "error": r.text[:500]}
-        return {"ok": True, "url": url, "data": r.json()}
-    except Exception as e:
-        return {"ok": False, "url": url, "error": str(e)}
-async def _get_text(http, url: str) -> Dict[str, Any]:
-    try:
-        r = await http.get(url, follow_redirects=True, timeout=30.0)
-        if r.status_code != 200:
-            return {"ok": False, "status": r.status_code, "url": url, "error": r.text[:500]}
-        return {"ok": True, "url": url, "text": r.text}
-    except Exception as e:
-        return {"ok": False, "url": url, "error": str(e)}
-async def cid_from_cas(cas: str, http) -> Optional[int]:
-    cas = (cas or "").strip()
-    url = f"{PUG}/compound/xref/RN/{quote(cas, safe='')}/cids/JSON"
-    j = await _get_json(http, url)
-    if not j.get("ok"):
-        return None
-    data = j["data"]
-    cids = (data.get("IdentifierList") or {}).get("CID") or []
-    return int(cids[0]) if cids else None
-async def cid_from_name(name: str, http) -> Optional[int]:
-    name = (name or "").strip()
-    url = f"{PUG}/compound/name/{quote(name, safe='')}/cids/JSON"
-    j = await _get_json(http, url)
-    if not j.get("ok"):
-        return None
-    data = j["data"]
-    cids = (data.get("IdentifierList") or {}).get("CID") or []
-    return int(cids[0]) if cids else None
-async def fetch_properties(cid: int, http) -> Dict[str, Any]:
-    props = "MolecularFormula,MolecularWeight,CanonicalSMILES,IUPACName"
-    url = f"{PUG}/compound/cid/{cid}/property/{props}/JSON"
-    j = await _get_json(http, url)
-    if not j.get("ok"):
-        return {}
-    arr = ((j["data"].get("PropertyTable") or {}).get("Properties") or [])
-    return arr[0] if arr else {}
-async def fetch_synonyms(cid: int, http) -> List[str]:
-    url = f"{PUG}/compound/cid/{cid}/synonyms/JSON"
-    j = await _get_json(http, url)
-    if not j.get("ok"):
-        return []
-    info = (((j["data"].get("InformationList") or {}).get("Information")) or [])
-    syns = (info[0].get("Synonym") if info else []) or []
-    return [str(s) for s in syns]
-def _pick_resolved_cas(query: str, synonyms: List[str]) -> Optional[str]:
-    q = (query or "").strip()
-    if is_cas(q):
-        return q
-    for s in synonyms:
-        if is_cas(s):
-            return s
     return None
-def _extract_strings_from_value(value_obj: Any) -> List[str]:
-    """PUG_VIEW uses Value.StringWithMarkup[].String often."""
-    out: List[str] = []
-    if isinstance(value_obj, dict):
-        swm = value_obj.get("StringWithMarkup")
-        if isinstance(swm, list):
-            for item in swm:
-                if isinstance(item, dict) and item.get("String"):
-                    out.append(str(item["String"]).strip())
-        # Sometimes direct String
-        if value_obj.get("String"):
-            out.append(str(value_obj["String"]).strip())
-    elif isinstance(value_obj, str):
-        out.append(value_obj.strip())
-    return [x for x in out if x]
-def _walk_sections(section: Any) -> List[dict]:
-    """Flatten Record.Section tree."""
-    acc: List[dict] = []
-    if isinstance(section, dict):
-        acc.append(section)
-        kids = section.get("Section")
-        if isinstance(kids, list):
-            for k in kids:
-                acc.extend(_walk_sections(k))
-    elif isinstance(section, list):
-        for s in section:
-            acc.extend(_walk_sections(s))
-    return acc
-def _section_heading(sec: dict) -> str:
-    return str(sec.get("TOCHeading") or sec.get("Heading") or "")
-def _collect_hazard_paragraphs(pug_view_json: Dict[str, Any]) -> List[str]:
-    record = pug_view_json.get("Record") or {}
-    sections = _walk_sections(record.get("Section") or [])
-    # Production-like: show all hazard paragraphs under Safety & Hazards / GHS / ECHA
-    wanted = []
-    keys = (
-        "Safety and Hazards",
-        "Hazards Identification",
-        "GHS Classification",
-        "Hazard Statements",
-        "Precautionary Statement",
-        "ECHA",
-        "C&L",
-        "Classification",
-        "Label",
-        "Hazard",
     )
-    for sec in sections:
-        h = _section_heading(sec)
-        if not h:
-            continue
-        if any(k.lower() in h.lower() for k in keys):
-            info_list = sec.get("Information") or []
-            if isinstance(info_list, list):
-                for info in info_list:
-                    if not isinstance(info, dict):
-                        continue
-                    v = info.get("Value")
-                    for s in _extract_strings_from_value(v):
-                        wanted.append(s)
-    # De-dup while preserving order
-    seen = set()
-    out = []
-    for p in wanted:
-        p2 = " ".join(p.split())
-        if not p2:
-            continue
-        if p2 in seen:
-            continue
-        seen.add(p2)
-        out.append(p2)
-    return out
-async def fetch_hazards(cid: int, http) -> Dict[str, Any]:
-    url = f"{PUG_VIEW}/data/compound/{cid}/JSON"
-    j = await _get_json(http, url)
-    if not j.get("ok"):
-        return {"ok": False, "error": j.get("error"), "url": url, "hazard_paragraphs": []}
-    data = j["data"]
-    paragraphs = _collect_hazard_paragraphs(data)
-    return {"ok": True, "url": url, "hazard_paragraphs": paragraphs, "raw": data}
-async def pubchem_by_query(q: str, http) -> Dict[str, Any]:
-    q = (q or "").strip()
     if not q:
-        return {"ok": False, "error": "Empty query."}
-    cid = await (cid_from_cas(q, http) if is_cas(q) else cid_from_name(q, http))
     if not cid:
-        return {
-            "ok": False,
-            "error": "No PubChem CID found.",
-            "query": q,
-            "resolve_url": f"{PUBCHEM}/#query={quote(q)}",
-        }
-    props = await fetch_properties(cid, http)
-    synonyms = await fetch_synonyms(cid, http)
-    resolved_cas = _pick_resolved_cas(q, synonyms)
-    hazards = await fetch_hazards(cid, http)
     return {
         "ok": True,
         "query": q,
         "cid": cid,
         "resolved_cas": resolved_cas,
-        "url": f"{PUBCHEM}/compound/{cid}",
-        "structure_png": f"{PUG}/compound/cid/{cid}/PNG?record_type=2d",
-        "properties": props,
-        "synonyms": synonyms,
-        "hazards": hazards,
-    }

+import html
 import re
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote
+import httpx
+PUBCHEM_REST = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
+PUBCHEM_VIEW = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view"
 CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
     return bool(CAS_RE.match((s or "").strip()))
+def _first_cas_in_text(text: str) -> Optional[str]:
+    if not text:
+        return None
+    m = re.search(r"\b\d{2,7}-\d{2}-\d\b", text)
+    return m.group(0) if m else None
+def _fmt_value(value: Any) -> str:
+    """Port of production `fmtInfoValue()` for PubChem PUG-View values."""
+    if value is None:
+        return ""
+    # PUG-View Value is usually a dict
+    if isinstance(value, dict):
+        if "StringWithMarkup" in value and isinstance(value["StringWithMarkup"], list):
+            parts: List[str] = []
+            for item in value["StringWithMarkup"]:
+                if isinstance(item, dict) and item.get("String"):
+                    parts.append(str(item["String"]))
+                elif isinstance(item, str):
+                    parts.append(item)
+            return html.unescape("".join(parts)).strip()
+        if "String" in value:
+            return html.unescape(str(value["String"])).strip()
+        if "Number" in value:
+            return str(value["Number"])  # already numeric
+        if "Boolean" in value:
+            return str(value["Boolean"])
+        if "Date" in value:
+            return str(value["Date"])
+    # Fallback
+    return html.unescape(str(value)).strip()
+def _scan_hazards(section: Dict[str, Any], out: List[Dict[str, str]]):
+    """Recursively scan PubChem PUG-View sections for hazard-related info.
+    Mirrors production `scanHazards()` semantics.
+    """
+    info_list = section.get("Information") or []
+    for info in info_list:
+        name = (info.get("Name") or "").strip()
+        low = name.lower()
+        if (
+            "ghs hazard statements" in low
+            or "echa c&l notifications summary" in low
+            or "carcinogenicity" in low
+            or "mutagenicity" in low
+            or "genotoxicity" in low
+            or "toxic" in low
+            or "hazard" in low
+        ):
+            text = _fmt_value(info.get("Value"))
+            if text:
+                out.append({"name": name or "Hazard information", "text": text})
+    for sub in section.get("Section") or []:
+        _scan_hazards(sub, out)
+def _extract_synonyms(record: Dict[str, Any]) -> List[str]:
+    """Best-effort extraction of synonyms list from PubChem PUG-View record."""
+    if not record:
+        return []
+    def walk(sec: Dict[str, Any], acc: List[str]):
+        # Synonyms often appear under Names and Identifiers
+        if (sec.get("TOCHeading") or "").lower() == "synonyms":
+            for info in sec.get("Information") or []:
+                val = info.get("Value")
+                if isinstance(val, dict) and isinstance(val.get("StringWithMarkup"), list):
+                    for item in val["StringWithMarkup"]:
+                        if isinstance(item, dict) and item.get("String"):
+                            acc.append(str(item["String"]))
+        for sub in sec.get("Section") or []:
+            walk(sub, acc)
+    out: List[str] = []
+    for top in record.get("Section") or []:
+        walk(top, out)
+    # De-dupe preserve order
+    seen = set()
+    uniq: List[str] = []
+    for s in out:
+        s = s.strip()
+        if not s:
+            continue
+        if s.lower() in seen:
+            continue
+        seen.add(s.lower())
+        uniq.append(s)
+    return uniq
+def _structure_png_url(cid: int) -> str:
+    return f"{PUBCHEM_REST}/compound/cid/{cid}/PNG?record_type=2d"
+def _compound_url(cid: int) -> str:
+    return f"https://pubchem.ncbi.nlm.nih.gov/compound/{cid}"
+def _safe_first(items: Any) -> Optional[Any]:
+    if isinstance(items, list) and items:
+        return items[0]
     return None
+async def _cid_from_query(q: str, http: httpx.AsyncClient) -> Optional[int]:
+    url = f"{PUBCHEM_REST}/compound/name/{quote(q)}/cids/JSON"
+    try:
+        r = await http.get(url, timeout=20)
+        r.raise_for_status()
+        js = r.json()
+        cid = _safe_first(js.get("IdentifierList", {}).get("CID"))
+        return int(cid) if cid is not None else None
+    except Exception:
+        return None
+async def _props_from_cid(cid: int, http: httpx.AsyncClient) -> Dict[str, Any]:
+    # Request all props production needs.
+    url = (
+        f"{PUBCHEM_REST}/compound/cid/{cid}/property/"
+        "MolecularFormula,MolecularWeight,CanonicalSMILES,IUPACName/JSON"
     )
+    r = await http.get(url, timeout=20)
+    r.raise_for_status()
+    js = r.json()
+    props = _safe_first(js.get("PropertyTable", {}).get("Properties"))
+    return props or {}
+async def _view_record(cid: int, http: httpx.AsyncClient) -> Dict[str, Any]:
+    url = f"{PUBCHEM_VIEW}/data/compound/{cid}/JSON"
+    r = await http.get(url, timeout=25)
+    r.raise_for_status()
+    return r.json()
+async def pubchem_by_query(query: str, http: httpx.AsyncClient) -> Dict[str, Any]:
+    """Query PubChem by CAS or name.
+    Returns a dict compatible with app.py renderers.
+    """
+    q = (query or "").strip()
     if not q:
+        return {"ok": False, "error": "Empty query"}
+    cid = await _cid_from_query(q, http)
     if not cid:
+        return {"ok": False, "error": "No PubChem CID found"}
+    props = await _props_from_cid(cid, http)
+    record_json = await _view_record(cid, http)
+    record = record_json.get("Record") or {}
+    synonyms = _extract_synonyms(record)
+    resolved_cas = None
+    if is_cas(q):
+        resolved_cas = q
+    else:
+        resolved_cas = _first_cas_in_text("\n".join(synonyms))
+    hazards: List[Dict[str, str]] = []
+    for top in record.get("Section") or []:
+        _scan_hazards(top, hazards)
+    # De-dupe hazards by (name, text)
+    seen = set()
+    uniq_haz: List[Dict[str, str]] = []
+    for h in hazards:
+        key = (h.get("name", "").lower(), h.get("text", "").strip())
+        if key in seen:
+            continue
+        seen.add(key)
+        uniq_haz.append(h)
     return {
         "ok": True,
         "query": q,
         "cid": cid,
         "resolved_cas": resolved_cas,
+        "props": props,
+        "structure_png": _structure_png_url(cid),
+        "url": _compound_url(cid),
+        "synonyms": synonyms[:50],
+        "hazards": uniq_haz,
+    }