Upload 14 files
Browse files- core/.DS_Store +0 -0
- core/__init__.py +1 -0
- core/config.py +34 -12
- core/pdf_report.py +72 -60
- core/rate_limit.py +56 -43
- core/sources/__init__.py +1 -0
- core/sources/ai_summary.py +24 -16
- core/sources/cdc.py +268 -329
- core/sources/ctx.py +136 -16
- core/sources/ntp.py +47 -1
- core/sources/pubchem.py +159 -170
- core/sources/scholar.py +9 -5
core/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
core/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Core package for the HF demo app."""
|
core/config.py
CHANGED
|
@@ -1,17 +1,39 @@
|
|
| 1 |
-
import
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o")
|
| 7 |
|
| 8 |
-
ctx_api_key: str | None = os.getenv("CTX_API_KEY")
|
| 9 |
-
ctx_base_url: str = os.getenv("CTX_BASE_URL", "https://comptox.epa.gov/ctx-api")
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
|
|
|
| 2 |
|
| 3 |
+
from pydantic import Field, AliasChoices
|
| 4 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
class Settings(BaseSettings):
|
| 8 |
+
# App behavior
|
| 9 |
+
max_ai_summaries_per_day: int = 25
|
| 10 |
+
cache_ttl_seconds: int = 3600
|
| 11 |
|
| 12 |
+
# OpenAI
|
| 13 |
+
openai_api_key: str = Field(
|
| 14 |
+
default="",
|
| 15 |
+
validation_alias=AliasChoices("OPENAI_API_KEY", "OPENAI_KEY"),
|
| 16 |
+
)
|
| 17 |
+
openai_model: str = Field(
|
| 18 |
+
default="gpt-4o-mini",
|
| 19 |
+
validation_alias=AliasChoices("OPENAI_MODEL"),
|
| 20 |
+
)
|
| 21 |
+
openai_timeout_seconds: int = Field(default=30, validation_alias=AliasChoices("OPENAI_TIMEOUT"))
|
| 22 |
|
| 23 |
+
# Optional Redis backing for rate limits
|
| 24 |
+
redis_url: str = Field(default="", validation_alias=AliasChoices("REDIS_URL"))
|
| 25 |
+
|
| 26 |
+
# CTX / CompTox
|
| 27 |
+
ctx_base_url: str = Field(
|
| 28 |
+
default="https://comptox.epa.gov/ctx-api",
|
| 29 |
+
validation_alias=AliasChoices("CTX_BASE_URL"),
|
| 30 |
+
)
|
| 31 |
+
ctx_api_key: str = Field(
|
| 32 |
+
default="",
|
| 33 |
+
validation_alias=AliasChoices("CTX_API_KEY", "COMPTOX_API_KEY", "CTX_KEY"),
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
model_config = SettingsConfigDict(env_prefix="", case_sensitive=False)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
settings = Settings()
|
core/pdf_report.py
CHANGED
|
@@ -1,65 +1,77 @@
|
|
| 1 |
from __future__ import annotations
|
|
|
|
| 2 |
import json
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
| 5 |
from typing import Any, Dict, Tuple
|
| 6 |
|
| 7 |
from reportlab.lib.pagesizes import letter
|
| 8 |
-
from reportlab.
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
+
|
| 3 |
import json
|
| 4 |
+
import os
|
| 5 |
+
import tempfile
|
| 6 |
+
import textwrap
|
| 7 |
+
from datetime import datetime, timezone
|
| 8 |
from typing import Any, Dict, Tuple
|
| 9 |
|
| 10 |
from reportlab.lib.pagesizes import letter
|
| 11 |
+
from reportlab.pdfgen import canvas
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _safe_text(value: Any) -> str:
|
| 15 |
+
if value is None:
|
| 16 |
+
return ""
|
| 17 |
+
return str(value).encode("ascii", "replace").decode("ascii")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _wrap_lines(text: str, width: int = 95) -> list[str]:
|
| 21 |
+
lines: list[str] = []
|
| 22 |
+
for raw in (text or "").splitlines():
|
| 23 |
+
if not raw:
|
| 24 |
+
lines.append("")
|
| 25 |
+
continue
|
| 26 |
+
lines.extend(textwrap.wrap(raw, width=width))
|
| 27 |
+
return lines
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def build_pdf(cas: str, evidence: Dict[str, Any], ai_summary: str | None = None) -> Tuple[str, str]:
|
| 31 |
+
"""Generate a PDF + JSON evidence packet and return file paths."""
|
| 32 |
+
stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
| 33 |
+
safe_cas = _safe_text(cas or "unknown").replace("/", "_").replace(" ", "_")
|
| 34 |
+
out_dir = os.path.join(tempfile.gettempdir(), "toxrai_reports")
|
| 35 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 36 |
+
|
| 37 |
+
pdf_path = os.path.join(out_dir, f"toxrai_{safe_cas}_{stamp}.pdf")
|
| 38 |
+
json_path = os.path.join(out_dir, f"toxrai_{safe_cas}_{stamp}.json")
|
| 39 |
+
|
| 40 |
+
# JSON evidence
|
| 41 |
+
with open(json_path, "w", encoding="utf-8") as f:
|
| 42 |
+
json.dump(evidence, f, ensure_ascii=False, indent=2, default=str)
|
| 43 |
+
|
| 44 |
+
# PDF
|
| 45 |
+
c = canvas.Canvas(pdf_path, pagesize=letter)
|
| 46 |
+
width, height = letter
|
| 47 |
+
y = height - 40
|
| 48 |
+
|
| 49 |
+
def draw_line(line: str):
|
| 50 |
+
nonlocal y
|
| 51 |
+
if y < 50:
|
| 52 |
+
c.showPage()
|
| 53 |
+
y = height - 40
|
| 54 |
+
c.drawString(40, y, _safe_text(line))
|
| 55 |
+
y -= 14
|
| 56 |
+
|
| 57 |
+
draw_line("ToxRAI Evidence Report")
|
| 58 |
+
draw_line(f"CAS / Query: {safe_cas}")
|
| 59 |
+
draw_line(f"Generated: {datetime.now(timezone.utc).isoformat()} UTC")
|
| 60 |
+
draw_line("")
|
| 61 |
+
|
| 62 |
+
if ai_summary:
|
| 63 |
+
draw_line("AI Summary")
|
| 64 |
+
for line in _wrap_lines(ai_summary, width=90):
|
| 65 |
+
draw_line(line)
|
| 66 |
+
draw_line("")
|
| 67 |
+
|
| 68 |
+
# Include a compact evidence snapshot
|
| 69 |
+
evidence_text = json.dumps(evidence, ensure_ascii=False, indent=2, default=str)
|
| 70 |
+
if len(evidence_text) > 8000:
|
| 71 |
+
evidence_text = evidence_text[:8000] + "\n... (truncated)"
|
| 72 |
+
draw_line("Evidence JSON (truncated)")
|
| 73 |
+
for line in _wrap_lines(evidence_text, width=95):
|
| 74 |
+
draw_line(line)
|
| 75 |
+
|
| 76 |
+
c.save()
|
| 77 |
+
return pdf_path, json_path
|
core/rate_limit.py
CHANGED
|
@@ -1,50 +1,63 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
from
|
|
|
|
|
|
|
| 5 |
from .config import settings
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
def _today_key() -> str:
|
| 8 |
-
return datetime.
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
limit = settings.max_ai_summaries_per_day
|
| 21 |
-
day = _today_key()
|
| 22 |
-
|
| 23 |
-
r = _redis_client()
|
| 24 |
-
if r:
|
| 25 |
-
key = f"toxrai:ai_summaries:{day}"
|
| 26 |
-
val = r.incr(key)
|
| 27 |
-
if val == 1:
|
| 28 |
-
r.expire(key, 60 * 60 * 24 * 2)
|
| 29 |
-
return (val <= limit), {"store": "redis", "day": day, "count": int(val), "limit": limit}
|
| 30 |
-
|
| 31 |
-
# fallback: file (may reset on restart if no persistent volume)
|
| 32 |
-
path = Path("/data/toxrai_limits.json") if os.path.isdir("/data") else Path(".toxrai_limits.json")
|
| 33 |
-
data = {}
|
| 34 |
-
if path.exists():
|
| 35 |
-
try:
|
| 36 |
-
data = json.loads(path.read_text(encoding="utf-8") or "{}")
|
| 37 |
-
except Exception:
|
| 38 |
-
data = {}
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
|
| 44 |
-
try:
|
| 45 |
-
path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
| 46 |
-
except Exception:
|
| 47 |
-
pass
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import threading
|
| 4 |
+
from datetime import datetime, timezone
|
| 5 |
+
from typing import Dict, Tuple, Any
|
| 6 |
+
|
| 7 |
from .config import settings
|
| 8 |
|
| 9 |
+
_lock = threading.Lock()
|
| 10 |
+
_state: Dict[str, Any] = {"date": None, "count": 0}
|
| 11 |
+
|
| 12 |
+
|
| 13 |
def _today_key() -> str:
|
| 14 |
+
return datetime.now(timezone.utc).strftime("%Y%m%d")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _in_memory_increment(limit: int) -> Tuple[bool, Dict[str, Any]]:
|
| 18 |
+
info: Dict[str, Any] = {"limit": limit}
|
| 19 |
+
with _lock:
|
| 20 |
+
today = _today_key()
|
| 21 |
+
if _state["date"] != today:
|
| 22 |
+
_state["date"] = today
|
| 23 |
+
_state["count"] = 0
|
| 24 |
+
_state["count"] += 1
|
| 25 |
+
count = int(_state["count"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
info["count"] = count
|
| 28 |
+
info["remaining"] = max(0, limit - count)
|
| 29 |
+
return count <= limit, info
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
def _redis_increment(limit: int) -> Tuple[bool, Dict[str, Any]]:
|
| 33 |
+
info: Dict[str, Any] = {"limit": limit}
|
| 34 |
+
import redis # local import to keep optional
|
| 35 |
+
|
| 36 |
+
client = redis.from_url(settings.redis_url, decode_responses=True)
|
| 37 |
+
key = f"toxrai:ai_cap:{_today_key()}"
|
| 38 |
+
count = int(client.incr(key))
|
| 39 |
+
if count == 1:
|
| 40 |
+
# expire a bit after 24h to avoid unbounded growth
|
| 41 |
+
client.expire(key, 60 * 60 * 48)
|
| 42 |
+
|
| 43 |
+
info["count"] = count
|
| 44 |
+
info["remaining"] = max(0, limit - count)
|
| 45 |
+
return count <= limit, info
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def check_and_increment_global_ai_cap() -> Tuple[bool, Dict[str, Any]]:
|
| 49 |
+
"""Global daily cap to prevent overuse in public demos."""
|
| 50 |
+
limit = int(settings.max_ai_summaries_per_day or 0)
|
| 51 |
+
if limit <= 0:
|
| 52 |
+
return True, {"limit": limit, "count": 0, "remaining": None}
|
| 53 |
+
|
| 54 |
+
if settings.redis_url:
|
| 55 |
+
try:
|
| 56 |
+
return _redis_increment(limit)
|
| 57 |
+
except Exception as e:
|
| 58 |
+
# Fall back to in-memory, but report the error for debugging.
|
| 59 |
+
allowed, info = _in_memory_increment(limit)
|
| 60 |
+
info["redis_error"] = str(e)
|
| 61 |
+
return allowed, info
|
| 62 |
+
|
| 63 |
+
return _in_memory_increment(limit)
|
core/sources/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Source adapters for external datasets and services."""
|
core/sources/ai_summary.py
CHANGED
|
@@ -1,23 +1,31 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
-
from openai import OpenAI
|
| 3 |
-
from ..config import settings
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
| 10 |
|
| 11 |
-
resp = client.responses.create(
|
| 12 |
-
model=settings.openai_model,
|
| 13 |
-
input=[{"role": "user", "content": prompt}],
|
| 14 |
-
max_output_tokens=900,
|
| 15 |
-
)
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
try:
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
|
| 5 |
+
from openai import OpenAI
|
| 6 |
|
| 7 |
+
from core.config import settings
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
def generate_ai_summary(prompt: str) -> Dict[str, Any]:
|
| 11 |
+
if not prompt:
|
| 12 |
+
return {"ok": False, "error": "Empty prompt"}
|
| 13 |
+
if not settings.openai_api_key:
|
| 14 |
+
return {"ok": False, "error": "Missing OPENAI_API_KEY"}
|
| 15 |
|
| 16 |
try:
|
| 17 |
+
client = OpenAI(api_key=settings.openai_api_key, timeout=settings.openai_timeout_seconds)
|
| 18 |
+
resp = client.chat.completions.create(
|
| 19 |
+
model=settings.openai_model,
|
| 20 |
+
messages=[
|
| 21 |
+
{"role": "system", "content": "You are a careful, concise toxicology assistant."},
|
| 22 |
+
{"role": "user", "content": prompt},
|
| 23 |
+
],
|
| 24 |
+
temperature=0.2,
|
| 25 |
+
)
|
| 26 |
+
text = (resp.choices[0].message.content or "").strip()
|
| 27 |
+
if not text:
|
| 28 |
+
return {"ok": False, "error": "Empty response from model"}
|
| 29 |
+
return {"ok": True, "text": text}
|
| 30 |
+
except Exception as e:
|
| 31 |
+
return {"ok": False, "error": str(e)}
|
core/sources/cdc.py
CHANGED
|
@@ -1,343 +1,282 @@
|
|
| 1 |
-
"""CDC/ATSDR ToxProfiles local index + search.
|
| 2 |
-
|
| 3 |
-
Why local?
|
| 4 |
-
- CDC ToxProfiles don't have a simple public search API.
|
| 5 |
-
- Production code typically uses a prebuilt index (CAS/name -> URL).
|
| 6 |
-
|
| 7 |
-
This file mirrors that approach: you maintain TOXPROFILES as a list of dicts.
|
| 8 |
-
|
| 9 |
-
Each item MUST look like:
|
| 10 |
-
{"name": "Acetone", "cas": "67-64-1", "url": "https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=5&tid=1"}
|
| 11 |
-
|
| 12 |
-
Return shape is stable for Gradio rendering:
|
| 13 |
-
{"ok": True, "query": "...", "cas": "...", "matches": [...], "total": N}
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
from __future__ import annotations
|
| 17 |
|
| 18 |
import re
|
| 19 |
-
from typing import
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
{"name": "Zinc (elemental)", "cas": "7440-66-6", "url": "https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=302&tid=54" }
|
| 268 |
-
# --- SNIP ---
|
| 269 |
-
# Paste the full list exactly as provided in the generated file I based this on.
|
| 270 |
]
|
| 271 |
|
| 272 |
-
_CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
|
| 273 |
|
| 274 |
-
def
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
def is_cas(s: str) -> bool:
|
| 278 |
-
return bool(_CAS_RE.match((s or "").strip()))
|
| 279 |
|
| 280 |
|
| 281 |
-
def
|
| 282 |
-
"""
|
| 283 |
-
|
| 284 |
-
we skip them instead of crashing.
|
| 285 |
-
"""
|
| 286 |
-
if not isinstance(item, dict):
|
| 287 |
-
return None
|
| 288 |
-
name = (item.get("name") or "").strip()
|
| 289 |
-
cas = (item.get("cas") or "").strip()
|
| 290 |
-
url = (item.get("url") or "").strip()
|
| 291 |
-
if not name and not cas and not url:
|
| 292 |
-
return None
|
| 293 |
-
return {"name": name or "ToxProfile", "cas": cas, "url": url}
|
| 294 |
|
| 295 |
|
| 296 |
-
def search(query: str
|
| 297 |
-
"""
|
| 298 |
-
Search the local toxprofile index.
|
| 299 |
-
|
| 300 |
-
Behavior:
|
| 301 |
-
1) If resolved CAS exists -> try exact CAS match.
|
| 302 |
-
2) If query itself is CAS -> try exact CAS match.
|
| 303 |
-
3) If no CAS matches -> fall back to case-insensitive substring name match.
|
| 304 |
-
4) Always cap results at `limit` (default 8).
|
| 305 |
-
"""
|
| 306 |
q = (query or "").strip()
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
items.append(item)
|
| 314 |
-
|
| 315 |
-
matches: List[Dict[str, str]] = []
|
| 316 |
|
| 317 |
-
# CAS-first
|
| 318 |
-
cas_key = cas_q if is_cas(cas_q) else (q if is_cas(q) else "")
|
| 319 |
-
if cas_key:
|
| 320 |
-
for item in items:
|
| 321 |
-
if (item.get("cas") or "").strip() == cas_key:
|
| 322 |
-
matches.append(item)
|
| 323 |
|
| 324 |
-
|
| 325 |
-
if
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
if matches and q:
|
| 333 |
-
qn = _norm(q)
|
| 334 |
-
matches.sort(key=lambda m: 0 if _norm(m.get("name", "")) == qn else 1)
|
| 335 |
-
|
| 336 |
-
total = len(matches)
|
| 337 |
-
return {
|
| 338 |
-
"ok": True,
|
| 339 |
-
"query": q,
|
| 340 |
-
"cas": cas_key or (cas_q if is_cas(cas_q) else ""),
|
| 341 |
-
"total": total,
|
| 342 |
-
"matches": matches[: max(0, int(limit))],
|
| 343 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import re
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
|
| 6 |
+
CDC_TOXPROFILES: List[Dict[str, str]] = [
|
| 7 |
+
{'name': 'Acetone', 'cas': '67-64-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=5&tid=1'},
|
| 8 |
+
{'name': 'Acrolein', 'cas': '107-02-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=557&tid=102'},
|
| 9 |
+
{'name': 'Acrylamide', 'cas': '79-06-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1112&tid=236'},
|
| 10 |
+
{'name': 'Acrylonitrile', 'cas': '107-13-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=447&tid=78'},
|
| 11 |
+
{'name': 'Aldrin', 'cas': '309-00-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=317&tid=56'},
|
| 12 |
+
{'name': 'Aluminum', 'cas': '7429-90-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=191&tid=34'},
|
| 13 |
+
{'name': 'Americium', 'cas': '7440-35-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=811&tid=158'},
|
| 14 |
+
{'name': 'Ammonia', 'cas': '7664-41-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=11&tid=2'},
|
| 15 |
+
{'name': 'Antimony', 'cas': '7440-36-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=332&tid=58'},
|
| 16 |
+
{'name': 'Arsenic', 'cas': '7440-38-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=22&tid=3'},
|
| 17 |
+
{'name': 'Asbestos', 'cas': '1332-21-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=30&tid=4'},
|
| 18 |
+
{'name': 'Atrazine', 'cas': '1912-24-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=338&tid=59'},
|
| 19 |
+
{'name': '1,3-Butadiene', 'cas': '106-99-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=459&tid=81'},
|
| 20 |
+
{'name': '1-Bromopropane', 'cas': 'NA', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1471&tid=285'},
|
| 21 |
+
{'name': '2,3-Benzofuran', 'cas': '271-89-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=915&tid=187'},
|
| 22 |
+
{'name': '2-Butanone', 'cas': '78-93-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=343&tid=60'},
|
| 23 |
+
{'name': '2-Butoxyethanol', 'cas': '111-76-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=347&tid=61'},
|
| 24 |
+
{'name': '2-Butoxyethanol Acetate', 'cas': '112-07-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=347&tid=61'},
|
| 25 |
+
{'name': 'Barium', 'cas': '7440-39-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=327&tid=57'},
|
| 26 |
+
{'name': 'Benzene', 'cas': '71-43-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=40&tid=14'},
|
| 27 |
+
{'name': 'Benzidine', 'cas': '92-87-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=569&tid=105'},
|
| 28 |
+
{'name': 'Beryllium', 'cas': '7440-41-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1441&tid=33'},
|
| 29 |
+
{'name': 'Bis(2-chloroethyl) Ether (BCEE)', 'cas': '111-44-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=817&tid=159'},
|
| 30 |
+
{'name': 'Bis(chloromethyl) Ether', 'cas': '542-88-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=919&tid=188'},
|
| 31 |
+
{'name': 'Boron', 'cas': '7440-42-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=453&tid=80'},
|
| 32 |
+
{'name': 'Bromodichloromethane', 'cas': '75-27-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=708&tid=127'},
|
| 33 |
+
{'name': 'Bromoform', 'cas': '75-25-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=713&tid=128'},
|
| 34 |
+
{'name': 'Dibromochloromethane', 'cas': '124-48-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=713&tid=128'},
|
| 35 |
+
{'name': 'Bromomethane', 'cas': '74-83-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=822&tid=160'},
|
| 36 |
+
{'name': 'Cadmium', 'cas': '7440-43-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=48&tid=15'},
|
| 37 |
+
{'name': 'Carbon Disulfide', 'cas': '782-182-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=474&tid=84'},
|
| 38 |
+
{'name': 'Carbon Monoxide', 'cas': '630-08-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1145&tid=253'},
|
| 39 |
+
{'name': 'Carbon Tetrachloride', 'cas': '56-23-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=196&tid=35'},
|
| 40 |
+
{'name': 'Cesium', 'cas': '7440-46-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=578&tid=107'},
|
| 41 |
+
{'name': 'Chlordane', 'cas': '12789-03-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=355&tid=62'},
|
| 42 |
+
{'name': 'Chlordecone', 'cas': '143-50-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1190&tid=276'},
|
| 43 |
+
{'name': 'Chlorfenvinphos', 'cas': '470-90-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=932&tid=193'},
|
| 44 |
+
{'name': 'Chlorinated Dibenzo-p-dioxins (CDDs)', 'cas': 'NA', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=366&tid=63'},
|
| 45 |
+
{'name': 'Chlorine', 'cas': '7782-50-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1079&tid=36'},
|
| 46 |
+
{'name': 'Chlorine Dioxide', 'cas': '10049-04-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=582&tid=108'},
|
| 47 |
+
{'name': 'Chlorite', 'cas': '7758-19-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=582&tid=108'},
|
| 48 |
+
{'name': 'Chlorobenzene', 'cas': '108-90-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=489&tid=87'},
|
| 49 |
+
{'name': 'Chlorodibenzofurans (CDFs)', 'cas': 'NA', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=938&tid=194'},
|
| 50 |
+
{'name': 'Chloroethane', 'cas': '75-00-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=827&tid=161'},
|
| 51 |
+
{'name': 'Chloroform', 'cas': '67-66-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=53&tid=16'},
|
| 52 |
+
{'name': 'Chloromethane', 'cas': '74-87-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=587&tid=109'},
|
| 53 |
+
{'name': 'Chlorophenols', 'cas': '58-90-2, 88-06-2, 95-57-8, 95-95-4, 106-48-9, 120-83-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=941&tid=195'},
|
| 54 |
+
{'name': 'Chlorpyrifos', 'cas': '2921-88-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=495&tid=88'},
|
| 55 |
+
{'name': 'Chromium', 'cas': '7440-47-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=62&tid=17'},
|
| 56 |
+
{'name': 'Cobalt', 'cas': '7440-48-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=373&tid=64'},
|
| 57 |
+
{'name': 'Copper', 'cas': '7440-50-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=206&tid=37'},
|
| 58 |
+
{'name': 'Wood Creosote', 'cas': '8021-39-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=66&tid=18'},
|
| 59 |
+
{'name': 'Coal Tar Creosote', 'cas': '8001-58-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=66&tid=18'},
|
| 60 |
+
{'name': 'Coal Tar', 'cas': '8007-45-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=66&tid=18'},
|
| 61 |
+
{'name': 'Cresols', 'cas': '1319-77-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=946&tid=196'},
|
| 62 |
+
{'name': 'Cyanide', 'cas': '74-90-8; 143-33-9; 151-50-8; 592-01-8; 544-92-3; 506-61-6; 460-19-5; 506-77-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=72&tid=19'},
|
| 63 |
+
{'name': '1,1-Dichloroethane', 'cas': '75-34-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=718&tid=129'},
|
| 64 |
+
{'name': '1,1-Dichloroethene', 'cas': '75-35-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=722&tid=130'},
|
| 65 |
+
{'name': '1,2-Dibromo-3-chloropropane', 'cas': '96-12-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=852&tid=166'},
|
| 66 |
+
{'name': '1,2-Dibromoethane', 'cas': '106-93-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=726&tid=131'},
|
| 67 |
+
{'name': '1,2-Dichloroethane', 'cas': '107-06-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=592&tid=110'},
|
| 68 |
+
{'name': '1,2-Dichloropropane', 'cas': '78-87-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=831&tid=162'},
|
| 69 |
+
{'name': '1,2-Diphenylhydrazine', 'cas': '122-66-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=952&tid=198'},
|
| 70 |
+
{'name': '1,3-Dinitrobenzene', 'cas': '99-65-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=842&tid=164'},
|
| 71 |
+
{'name': '1,3,5-Trinitrobenzene', 'cas': '99-35-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=842&tid=164'},
|
| 72 |
+
{'name': '1,4 Dioxane', 'cas': '123-91-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=955&tid=199'},
|
| 73 |
+
{'name': '2,4-Dichlorophenoxyacetic Acid (2,4-D)', 'cas': 'NA', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1481&tid=288'},
|
| 74 |
+
{'name': "3,3'-Dichlorobenzidine", 'cas': '91-94-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=959&tid=200'},
|
| 75 |
+
{'name': 'DDT', 'cas': '50-29-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=81&tid=20'},
|
| 76 |
+
{'name': 'DDE', 'cas': '72-55-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=81&tid=20'},
|
| 77 |
+
{'name': 'DDD', 'cas': '72-54-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=81&tid=20'},
|
| 78 |
+
{'name': 'DEET (N,N-diethyl-meta-toluamide)', 'cas': 'NA', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1451&tid=201'},
|
| 79 |
+
{'name': 'Di(2-Ethylhexyl)Phthalate (DEHP)', 'cas': '117-81-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=684&tid=65'},
|
| 80 |
+
{'name': 'Di-n-butyl Phthalate', 'cas': '84-74-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=859&tid=167'},
|
| 81 |
+
{'name': 'Di-n-octylphthalate (DNOP)', 'cas': '117-84-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=973&tid=204'},
|
| 82 |
+
{'name': 'Diazinon', 'cas': '333-41-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=512&tid=90'},
|
| 83 |
+
{'name': '1,2-Dichlorobenzene', 'cas': '95-50-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=704&tid=126'},
|
| 84 |
+
{'name': '1,3-Dichlorobenzene', 'cas': '541-73-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=704&tid=126'},
|
| 85 |
+
{'name': '1,4-Dichlorobenzene', 'cas': '106-46-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=704&tid=126'},
|
| 86 |
+
{'name': 'Dichloropropenes', 'cas': '26952-23-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=836&tid=163'},
|
| 87 |
+
{'name': 'Dichlorvos', 'cas': '62-73-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=597&tid=111'},
|
| 88 |
+
{'name': 'Diethyl phthalate', 'cas': '84-66-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=603&tid=112'},
|
| 89 |
+
{'name': 'Diisopropyl Methylphosphonate (DIMP)', 'cas': '1445-75-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=969&tid=203'},
|
| 90 |
+
{'name': 'Dinitrocresols (4,6-DNOC)', 'cas': '534-52-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1025&tid=218'},
|
| 91 |
+
{'name': 'Dinitrophenols (2,4-DNP)', 'cas': '51-28-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=729&tid=132'},
|
| 92 |
+
{'name': 'Dinitrotoluenes (2,4-)', 'cas': '121-14-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=847&tid=165'},
|
| 93 |
+
{'name': 'Dinitrotoluenes (2,6-)', 'cas': '606-20-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=847&tid=165'},
|
| 94 |
+
{'name': 'Dinitrotoluenes (Mixture)', 'cas': '25321-14-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=847&tid=165'},
|
| 95 |
+
{'name': 'Dinitrotoluenes (Other isomers)', 'cas': '', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=847&tid=165'},
|
| 96 |
+
{'name': 'Disulfoton', 'cas': '0298-04-04', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=978&tid=205'},
|
| 97 |
+
{'name': 'Dieldrin', 'cas': '60-57-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=317&tid=56'},
|
| 98 |
+
{'name': 'Endosulfan (mixture)', 'cas': '115-29-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=609&tid=113'},
|
| 99 |
+
{'name': 'Endosulfan (alpha-isomer)', 'cas': '959-98-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=609&tid=113'},
|
| 100 |
+
{'name': 'Endosulfan (beta-isomer)', 'cas': '33213-65-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=609&tid=113'},
|
| 101 |
+
{'name': 'Endrin', 'cas': '72-20-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=617&tid=114'},
|
| 102 |
+
{'name': 'Ethion', 'cas': '0563-12-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=983&tid=206'},
|
| 103 |
+
{'name': 'Ethylbenzene', 'cas': '100-41-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=383&tid=66'},
|
| 104 |
+
{'name': 'Ethylene Glycol', 'cas': '107-21-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=86&tid=21'},
|
| 105 |
+
{'name': 'Ethylene Oxide', 'cas': '75-21-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=734&tid=133'},
|
| 106 |
+
{'name': 'Fluorides, Hydrogen Fluoride, and Fluorine', 'cas': '7664-39-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=212&tid=38'},
|
| 107 |
+
{'name': 'Fluorides, Hydrogen Fluoride, and Fluorine', 'cas': '7782-41-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=212&tid=38'},
|
| 108 |
+
{'name': 'Fluorides, Hydrogen Fluoride, and Fluorine', 'cas': '7681-49-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=212&tid=38'},
|
| 109 |
+
{'name': 'Formaldehyde', 'cas': '50-00-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=220&tid=39'},
|
| 110 |
+
{'name': 'Fuel Oils / Kerosene (Kerosene/Fuel Oil #1)', 'cas': '8008-20-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=516&tid=91'},
|
| 111 |
+
{'name': 'Fuel Oils / Kerosene (Fuel Oil #2)', 'cas': '68476-30-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=516&tid=91'},
|
| 112 |
+
{'name': 'Fuel Oils / Kerosene (Fuel Oil #3)', 'cas': '70892-10-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=516&tid=91'},
|
| 113 |
+
{'name': 'Fuel Oils / Kerosene (Fuel Oil #4)', 'cas': '68476-34-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=516&tid=91'},
|
| 114 |
+
{'name': 'Fuel Oils / Kerosene (Fuel Oil #5)', 'cas': '68476-31-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=516&tid=91'},
|
| 115 |
+
{'name': 'Gasoline, Automotive (mixture)', 'cas': '8006-61-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=468&tid=83'},
|
| 116 |
+
{'name': 'Glutaraldehyde', 'cas': '111-30-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1467&tid=284'},
|
| 117 |
+
{'name': 'Glyphosate', 'cas': '1071-83-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1488&tid=293'},
|
| 118 |
+
{'name': 'Guthion (Azinphos-methyl)', 'cas': '86-50-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1037&tid=207'},
|
| 119 |
+
{'name': '2-Hexanone', 'cas': '591-78-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=738&tid=134'},
|
| 120 |
+
{'name': 'Heptachlor/Heptachlor Epoxide', 'cas': '76-44-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=746&tid=135'},
|
| 121 |
+
{'name': 'Heptachlor/Heptachlor Epoxide', 'cas': '1024-57-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=746&tid=135'},
|
| 122 |
+
{'name': 'Hexachlorobenzene', 'cas': '118-74-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=627&tid=115'},
|
| 123 |
+
{'name': 'Hexachlorobutadiene', 'cas': '87-68-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=865&tid=168'},
|
| 124 |
+
{'name': 'Hexachlorocyclohexane (HCH) (Technical Grade)', 'cas': '608-73-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=754&tid=138'},
|
| 125 |
+
{'name': 'Hexachlorocyclohexane (HCH) (alpha-isomer)', 'cas': '319-84-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=754&tid=138'},
|
| 126 |
+
{'name': 'Hexachlorocyclohexane (HCH) (beta-isomer)', 'cas': '319-85-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=754&tid=138'},
|
| 127 |
+
{'name': 'Hexachlorocyclohexane (HCH) (gamma-isomer/Lindane)', 'cas': '58-89-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=754&tid=138'},
|
| 128 |
+
{'name': 'Hexachlorocyclohexane (HCH) (delta-isomer)', 'cas': '319-86-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=754&tid=138'},
|
| 129 |
+
{'name': 'Hexachlorocyclopentadiene (HCCPD)', 'cas': '77-47-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=992&tid=208'},
|
| 130 |
+
{'name': 'Hexachloroethane', 'cas': '67-72-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=870&tid=169'},
|
| 131 |
+
{'name': 'Hexamethylene Diisocyanate (HDI)', 'cas': '822-06-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=874&tid=170'},
|
| 132 |
+
{'name': 'HMX (Octogen)', 'cas': '2691-41-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=877&tid=171'},
|
| 133 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '55957-10-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 134 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '68937-40-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 135 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '50815-84-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 136 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '55962-27-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 137 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '66594-31-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 138 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '63848-94-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 139 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '107028-44-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 140 |
+
{'name': 'Hydraulic Fluids (Mineral oil base example)', 'cas': '28777-70-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=757&tid=141'},
|
| 141 |
+
{'name': 'Hydrazines (Hydrazine)', 'cas': '0302-01-02', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=502&tid=89'},
|
| 142 |
+
{'name': 'Hydrazines (1,1-Dimethylhydrazine)', 'cas': '57-14-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=502&tid=89'},
|
| 143 |
+
{'name': 'Hydrazines (1,2-Dimethylhydrazine)', 'cas': '540-73-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=502&tid=89'},
|
| 144 |
+
{'name': 'Hydrogen Sulfide', 'cas': '7783-06-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=389&tid=67'},
|
| 145 |
+
{'name': 'Carbonyl Sulfide', 'cas': '463-58-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=389&tid=67'},
|
| 146 |
+
{'name': 'n-Hexane', 'cas': '110-54-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=393&tid=68'},
|
| 147 |
+
{'name': 'Iodine (elemental)', 'cas': '7553-56-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=479&tid=85'},
|
| 148 |
+
{'name': 'Ionizing Radiation', 'cas': 'N/A', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=484&tid=86'},
|
| 149 |
+
{'name': 'Isophorone', 'cas': '78-59-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=763&tid=148'},
|
| 150 |
+
{'name': 'Jet Fuels JP-4 and JP-7 (Mixture)', 'cas': '50815-00-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=768&tid=149'},
|
| 151 |
+
{'name': 'JP-5, JP-8, and Jet A (Kerosene base)', 'cas': '8008-20-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=773&tid=150'},
|
| 152 |
+
{'name': 'Lead (elemental)', 'cas': '7439-92-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=96&tid=22'},
|
| 153 |
+
{'name': "4,4'-Methylenebis(2-Chloroaniline) (MBOCA)", 'cas': '101-14-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=997&tid=209'},
|
| 154 |
+
{'name': "4,4'-Methylenedianiline", 'cas': '101-77-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1001&tid=210'},
|
| 155 |
+
{'name': 'Malathion', 'cas': '121-75-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=522&tid=92'},
|
| 156 |
+
{'name': 'Manganese (elemental)', 'cas': '7439-96-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=102&tid=23'},
|
| 157 |
+
{'name': 'Mercury (elemental)', 'cas': '7439-97-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=115&tid=24'},
|
| 158 |
+
{'name': 'Methoxychlor', 'cas': '72-43-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=778&tid=151'},
|
| 159 |
+
{'name': 'Methyl Mercaptan', 'cas': '74-93-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=224&tid=40'},
|
| 160 |
+
{'name': 'Methyl Parathion', 'cas': '298-00-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=636&tid=117'},
|
| 161 |
+
{'name': 'Methyl tert-Butyl Ether (MTBE)', 'cas': '1634-04-04', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=228&tid=41'},
|
| 162 |
+
{'name': 'Methylene Chloride', 'cas': '75-09-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=234&tid=42'},
|
| 163 |
+
{'name': 'Mirex', 'cas': '2385-85-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1190&tid=276'},
|
| 164 |
+
{'name': 'Chlordecone', 'cas': '143-50-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1190&tid=276'},
|
| 165 |
+
{'name': 'Molybdenum (elemental)', 'cas': '7439-98-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1482&tid=289'},
|
| 166 |
+
{'name': 'n-Nitrosodi-n-propylamine', 'cas': '621-64-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1005&tid=211'},
|
| 167 |
+
{'name': 'N-Nitrosodimethylamine (NDMA)', 'cas': '62-75-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=884&tid=173'},
|
| 168 |
+
{'name': 'n-Nitrosodiphenylamine', 'cas': '86-30-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1009&tid=212'},
|
| 169 |
+
{'name': 'Naphthalene, 1-Methylnaphthalene, 2-Methylnaphthalene', 'cas': '91-20-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=240&tid=43'},
|
| 170 |
+
{'name': 'Naphthalene, 1-Methylnaphthalene, 2-Methylnaphthalene', 'cas': '90-12-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=240&tid=43'},
|
| 171 |
+
{'name': 'Naphthalene, 1-Methylnaphthalene, 2-Methylnaphthalene', 'cas': '91-57-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=240&tid=43'},
|
| 172 |
+
{'name': 'Nickel (elemental)', 'cas': '7440-02-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=245&tid=44'},
|
| 173 |
+
{'name': 'Nitrate and Nitrite (Nitrate ion)', 'cas': '14797-55-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1452&tid=258'},
|
| 174 |
+
{'name': 'Nitrate and Nitrite (Nitrite ion)', 'cas': '14797-65-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1452&tid=258'},
|
| 175 |
+
{'name': 'Nitrobenzene', 'cas': '98-95-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=532&tid=95'},
|
| 176 |
+
{'name': 'Nitrophenols (Group - e.g., 2-Nitrophenol)', 'cas': '88-75-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=880&tid=172'},
|
| 177 |
+
{'name': 'Nitrophenols (Group - e.g., 4-Nitrophenol)', 'cas': '0100-02-07', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=880&tid=172'},
|
| 178 |
+
{'name': 'Otto Fuel II and its Components (Mixture)', 'cas': '6423-43-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=781&tid=152'},
|
| 179 |
+
{'name': 'Parathion', 'cas': '56-38-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1425&tid=246'},
|
| 180 |
+
{'name': 'Pentachlorophenol', 'cas': '87-86-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=402&tid=70'},
|
| 181 |
+
{'name': 'Perchlorates (Group - e.g., Ammonium)', 'cas': '7790-98-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=895&tid=181'},
|
| 182 |
+
{'name': 'Perchlorates (Group - e.g., Potassium)', 'cas': '7778-74-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=895&tid=181'},
|
| 183 |
+
{'name': 'Perfluoroalkyls (Group - e.g., PFOA)', 'cas': '335-67-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1117&tid=237'},
|
| 184 |
+
{'name': 'Perfluoroalkyls (Group - e.g., PFOS)', 'cas': '1763-23-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1117&tid=237'},
|
| 185 |
+
{'name': 'Phenol', 'cas': '108-95-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=148&tid=27'},
|
| 186 |
+
{'name': 'Phosphate Ester Flame Retardants (Group - e.g., TBP)', 'cas': '126-73-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1119&tid=239'},
|
| 187 |
+
{'name': 'Phosphate Ester Flame Retardants (Group - e.g., TDCPP)', 'cas': '13674-87-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1119&tid=239'},
|
| 188 |
+
{'name': 'Phosphorus, White (elemental)', 'cas': '7723-14-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=285&tid=52'},
|
| 189 |
+
{'name': 'Plutonium (elemental)', 'cas': '2023631', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=648&tid=119'},
|
| 190 |
+
{'name': 'Polybrominated Biphenyls (PBBs) (Group - e.g., Hexa-)', 'cas': '12584439', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=529&tid=94'},
|
| 191 |
+
{'name': 'Polybrominated Diphenyl Ethers (PBDEs) (Group - e.g., Deca-)', 'cas': '1163-19-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=901&tid=183'},
|
| 192 |
+
{'name': 'Polybrominated Diphenyl Ethers (PBDEs) (Group - Penta- mixture)', 'cas': '32534-81-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=901&tid=183'},
|
| 193 |
+
{'name': 'Polybrominated Diphenyl Ethers (PBDEs) (Group - Octa- mixture)', 'cas': '32536-52-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=901&tid=183'},
|
| 194 |
+
{'name': 'Polychlorinated Biphenyls (PCBs) (Mixture)', 'cas': '1336-36-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=142&tid=26'},
|
| 195 |
+
{'name': 'Polycyclic Aromatic Hydrocarbons (PAHs) (Group - e.g., BaP)', 'cas': '50-32-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=122&tid=25'},
|
| 196 |
+
{'name': 'Polycyclic Aromatic Hydrocarbons (PAHs) (Group - e.g., Naphthalene)', 'cas': '91-20-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=122&tid=25'},
|
| 197 |
+
{'name': 'Propylene Glycol', 'cas': '57-55-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1122&tid=240'},
|
| 198 |
+
{'name': 'Pyrethrins and Pyrethroids (Pyrethrins mixture)', 'cas': '8003-34-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=787&tid=153'},
|
| 199 |
+
{'name': 'Pyrethrins and Pyrethroids (e.g., Permethrin)', 'cas': '52645-53-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=787&tid=153'},
|
| 200 |
+
{'name': 'Pyridine', 'cas': '110-86-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=535&tid=96'},
|
| 201 |
+
{'name': 'Radiation, Ionizing', 'cas': 'N/A', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=484&tid=86'},
|
| 202 |
+
{'name': 'Radium (elemental)', 'cas': '7440-14-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=791&tid=154'},
|
| 203 |
+
{'name': 'Radon (elemental)', 'cas': '10043-92-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=407&tid=71'},
|
| 204 |
+
{'name': 'RDX (Cyclonite)', 'cas': '121-82-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=412&tid=72'},
|
| 205 |
+
{'name': 'Selenium (elemental)', 'cas': '7782-49-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=153&tid=28'},
|
| 206 |
+
{'name': 'Silica (Silicon Dioxide)', 'cas': '7631-86-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1483&tid=290'},
|
| 207 |
+
{'name': 'Silica (Crystalline - Quartz)', 'cas': '14808-60-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1483&tid=290'},
|
| 208 |
+
{'name': 'Silver (elemental)', 'cas': '7440-22-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=539&tid=97'},
|
| 209 |
+
{'name': 'Stoddard Solvent (mixture)', 'cas': '8052-41-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=416&tid=73'},
|
| 210 |
+
{'name': 'Strontium (elemental)', 'cas': '7440-24-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=656&tid=120'},
|
| 211 |
+
{'name': 'Styrene', 'cas': '100-42-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=421&tid=74'},
|
| 212 |
+
{'name': 'Sulfur Dioxide', 'cas': '2025884', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=253&tid=46'},
|
| 213 |
+
{'name': 'Sulfur Mustard', 'cas': '505-60-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=905&tid=184'},
|
| 214 |
+
{'name': 'Sulfur Trioxide & Sulfuric Acid', 'cas': '2025949', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=256&tid=47'},
|
| 215 |
+
{'name': 'Sulfur Trioxide & Sulfuric Acid', 'cas': '7664-93-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=256&tid=47'},
|
| 216 |
+
{'name': 'Synthetic Vitreous Fibers (Group - e.g., Glass wool)', 'cas': '65997-17-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=908&tid=185'},
|
| 217 |
+
{'name': '1,1,1-Trichloroethane', 'cas': '71-55-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=432&tid=76'},
|
| 218 |
+
{'name': '1,1,2,2-Tetrachloroethane', 'cas': '79-34-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=801&tid=156'},
|
| 219 |
+
{'name': '1,1,2-Trichloroethane', 'cas': '79-00-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=796&tid=155'},
|
| 220 |
+
{'name': '1,2,3-Trichloropropane', 'cas': '96-18-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=912&tid=186'},
|
| 221 |
+
{'name': '2,4,6-Trinitrotoluene (TNT)', 'cas': '118-96-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=677&tid=125'},
|
| 222 |
+
{'name': 'Tetrachloroethylene (PERC)', 'cas': '127-18-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=265&tid=48'},
|
| 223 |
+
{'name': 'Tetryl', 'cas': '479-45-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1019&tid=216'},
|
| 224 |
+
{'name': 'Thallium (elemental)', 'cas': '7440-28-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=309&tid=49'},
|
| 225 |
+
{'name': 'Thorium (elemental)', 'cas': '7440-29-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=660&tid=121'},
|
| 226 |
+
{'name': 'Tin and Compounds (Tin - elemental)', 'cas': '7440-31-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=543&tid=98'},
|
| 227 |
+
{'name': 'Titanium Tetrachloride', 'cas': '7550-45-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=664&tid=122'},
|
| 228 |
+
{'name': 'Toluene', 'cas': '108-88-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=161&tid=29'},
|
| 229 |
+
{'name': 'Toluene Diisocyanate / Methylenediphenyl Diisocyanate (TDI mixture)', 'cas': '26471-62-5', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1453&tid=245'},
|
| 230 |
+
{'name': 'Toluene Diisocyanate / Methylenediphenyl Diisocyanate (2,4-TDI)', 'cas': '584-84-9', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1453&tid=245'},
|
| 231 |
+
{'name': 'Toluene Diisocyanate / Methylenediphenyl Diisocyanate (2,6-TDI)', 'cas': '91-08-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1453&tid=245'},
|
| 232 |
+
{'name': 'Toluene Diisocyanate / Methylenediphenyl Diisocyanate (MDI)', 'cas': '101-68-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1453&tid=245'},
|
| 233 |
+
{'name': 'Total Petroleum Hydrocarbons (TPH) (Mixture)', 'cas': 'N/A', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=424&tid=75'},
|
| 234 |
+
{'name': 'Toxaphene (mixture)', 'cas': '8001-35-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=548&tid=99'},
|
| 235 |
+
{'name': 'S,S,S-Tributyl Phosphorotrithioate (Tribufos)', 'cas': '78-48-8', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1487&tid=292'},
|
| 236 |
+
{'name': 'Trichlorobenzenes (1,2,3-)', 'cas': '87-61-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1168&tid=255'},
|
| 237 |
+
{'name': 'Trichlorobenzenes (1,2,4-)', 'cas': '120-82-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1168&tid=255'},
|
| 238 |
+
{'name': 'Trichlorobenzenes (1,3,5-)', 'cas': '108-70-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=1168&tid=255'},
|
| 239 |
+
{'name': 'Trichloroethylene (TCE)', 'cas': '79-01-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=173&tid=30'},
|
| 240 |
+
{'name': 'Tungsten (elemental)', 'cas': '7440-33-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=806&tid=157'},
|
| 241 |
+
{'name': 'Uranium (elemental)', 'cas': '7440-61-1', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=440&tid=77'},
|
| 242 |
+
{'name': 'Used Mineral-based Crankcase Oil (Mixture)', 'cas': '64742-65-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=667&tid=123'},
|
| 243 |
+
{'name': 'Vanadium (elemental)', 'cas': '7440-62-2', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=276&tid=50'},
|
| 244 |
+
{'name': 'Vinyl Acetate', 'cas': '0108-05-04', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=671&tid=124'},
|
| 245 |
+
{'name': 'Vinyl Chloride', 'cas': '75-01-4', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=282&tid=51'},
|
| 246 |
+
{'name': 'White Phosphorus (elemental)', 'cas': '7723-14-0', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=285&tid=52'},
|
| 247 |
+
{'name': 'Xylenes (Mixture)', 'cas': '1330-20-7', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=296&tid=53'},
|
| 248 |
+
{'name': 'Xylenes (o-Xylene)', 'cas': '95-47-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=296&tid=53'},
|
| 249 |
+
{'name': 'Xylenes (m-Xylene)', 'cas': '108-38-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=296&tid=53'},
|
| 250 |
+
{'name': 'Xylenes (p-Xylene)', 'cas': '106-42-3', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=296&tid=53'},
|
| 251 |
+
{'name': 'Zinc (elemental)', 'cas': '7440-66-6', 'url': 'https://wwwn.cdc.gov/TSP/ToxProfiles/ToxProfiles.aspx?id=302&tid=54'},
|
|
|
|
|
|
|
|
|
|
| 252 |
]
|
| 253 |
|
|
|
|
| 254 |
|
| 255 |
+
def _is_cas(value: str) -> bool:
|
| 256 |
+
v = (value or "").strip()
|
| 257 |
+
return bool(re.match(r"^\d{2,7}-\d{2}-\d$", v))
|
|
|
|
|
|
|
| 258 |
|
| 259 |
|
| 260 |
+
def lookup(query: str):
|
| 261 |
+
"""Return matching CDC toxprofiles. CAS exact match, name substring otherwise."""
|
| 262 |
+
return search(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
|
| 265 |
+
def search(query: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
q = (query or "").strip()
|
| 267 |
+
if not q:
|
| 268 |
+
return []
|
| 269 |
+
q_low = q.lower()
|
| 270 |
+
if _is_cas(q):
|
| 271 |
+
return [it for it in CDC_TOXPROFILES if (it.get("cas") or "").strip() == q]
|
| 272 |
+
return [it for it in CDC_TOXPROFILES if q_low in (it.get("name") or "").lower()]
|
|
|
|
|
|
|
|
|
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
+
def toxprofile_for(query: str):
|
| 276 |
+
"""Return a single match if unique; otherwise list of matches."""
|
| 277 |
+
matches = search(query)
|
| 278 |
+
if not matches:
|
| 279 |
+
return None
|
| 280 |
+
if len(matches) == 1:
|
| 281 |
+
return matches[0]
|
| 282 |
+
return matches
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/sources/ctx.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
| 1 |
-
import os
|
| 2 |
import re
|
| 3 |
from typing import Any, Dict, List, Optional
|
| 4 |
from urllib.parse import quote
|
| 5 |
|
| 6 |
import httpx
|
| 7 |
|
| 8 |
-
|
| 9 |
-
CTX_BASE_URL = os.getenv("CTX_BASE_URL", "https://comptox.epa.gov/ctx-api")
|
| 10 |
-
CTX_API_KEY = os.getenv("CTX_API_KEY") or os.getenv("COMPTOX_API_KEY") or os.getenv("CTX_KEY")
|
| 11 |
|
| 12 |
CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
def is_cas(s: str) -> bool:
|
|
@@ -46,10 +44,10 @@ def _as_rows(data: Any) -> List[Any]:
|
|
| 46 |
|
| 47 |
|
| 48 |
async def _ctx_get(path: str, http: httpx.AsyncClient, params: Dict[str, Any] | None = None) -> Any:
|
| 49 |
-
url =
|
| 50 |
headers = {"accept": "application/json"}
|
| 51 |
-
if
|
| 52 |
-
headers["x-api-key"] =
|
| 53 |
|
| 54 |
r = await http.get(url, params=params, headers=headers, timeout=25.0, follow_redirects=True)
|
| 55 |
r.raise_for_status()
|
|
@@ -60,18 +58,64 @@ async def _ctx_get(path: str, http: httpx.AsyncClient, params: Dict[str, Any] |
|
|
| 60 |
return {"raw": r.text}
|
| 61 |
|
| 62 |
|
| 63 |
-
async def
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
if not q:
|
| 66 |
return None
|
| 67 |
|
| 68 |
chem_tries = [
|
| 69 |
-
("/chemical/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
("/chemical/search", {"name": q}),
|
| 71 |
-
(f"/chemical/search/by-cas/{quote(q)}", None),
|
| 72 |
-
(f"/chemical/search/by-name/{quote(q)}", None),
|
| 73 |
]
|
| 74 |
-
|
| 75 |
for path, params in chem_tries:
|
| 76 |
try:
|
| 77 |
data = await _ctx_get(path, http, params=params)
|
|
@@ -86,7 +130,6 @@ async def resolve_dtxsid(query: str, http: httpx.AsyncClient) -> Optional[str]:
|
|
| 86 |
("/hazard/genetox/summary/search", {"name": q}),
|
| 87 |
(f"/hazard/genetox/summary/search/by-name/{quote(q)}", None),
|
| 88 |
]
|
| 89 |
-
|
| 90 |
for path, params in haz_tries:
|
| 91 |
try:
|
| 92 |
data = await _ctx_get(path, http, params=params)
|
|
@@ -100,13 +143,90 @@ async def resolve_dtxsid(query: str, http: httpx.AsyncClient) -> Optional[str]:
|
|
| 100 |
return None
|
| 101 |
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
def dashboard_search_url(query: str) -> str:
|
| 104 |
q = quote((query or "").strip())
|
| 105 |
-
return f"https://comptox.epa.gov/dashboard/
|
| 106 |
|
| 107 |
|
| 108 |
def dashboard_details_url(dtxsid: str) -> str:
|
| 109 |
-
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
async def fetch_ctx_genetox(cas_or_query: str, http: httpx.AsyncClient) -> Dict[str, Any]:
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
from typing import Any, Dict, List, Optional
|
| 3 |
from urllib.parse import quote
|
| 4 |
|
| 5 |
import httpx
|
| 6 |
|
| 7 |
+
from core.config import settings
|
|
|
|
|
|
|
| 8 |
|
| 9 |
CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
|
| 10 |
+
DTXSID_RE = re.compile(r"DTXSID\\d{7,}")
|
| 11 |
|
| 12 |
|
| 13 |
def is_cas(s: str) -> bool:
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
async def _ctx_get(path: str, http: httpx.AsyncClient, params: Dict[str, Any] | None = None) -> Any:
|
| 47 |
+
url = settings.ctx_base_url.rstrip("/") + path
|
| 48 |
headers = {"accept": "application/json"}
|
| 49 |
+
if settings.ctx_api_key:
|
| 50 |
+
headers["x-api-key"] = settings.ctx_api_key
|
| 51 |
|
| 52 |
r = await http.get(url, params=params, headers=headers, timeout=25.0, follow_redirects=True)
|
| 53 |
r.raise_for_status()
|
|
|
|
| 58 |
return {"raw": r.text}
|
| 59 |
|
| 60 |
|
| 61 |
+
async def _resolve_from_cas(cas: str, http: httpx.AsyncClient) -> Optional[str]:
|
| 62 |
+
clean = (cas or "").strip()
|
| 63 |
+
if not clean:
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
chem_tries = [
|
| 67 |
+
(f"/chemical/identifiers/by-cas/{quote(clean)}", None),
|
| 68 |
+
(f"/chemical/identifiers/search/by-cas/{quote(clean)}", None),
|
| 69 |
+
("/chemical/identifiers", {"cas": clean}),
|
| 70 |
+
("/chemical/search/equal", {"word": clean}),
|
| 71 |
+
("/chemical/search/contains", {"word": clean}),
|
| 72 |
+
("/chemical/search", {"matchType": "equal", "word": clean}),
|
| 73 |
+
("/chemical/search", {"matchType": "contains", "word": clean}),
|
| 74 |
+
("/chemical/search", {"casrn": clean}),
|
| 75 |
+
]
|
| 76 |
+
|
| 77 |
+
for path, params in chem_tries:
|
| 78 |
+
try:
|
| 79 |
+
data = await _ctx_get(path, http, params=params)
|
| 80 |
+
rows = _as_rows(data)
|
| 81 |
+
dtxsid = _pick_dtxsid(rows)
|
| 82 |
+
if dtxsid:
|
| 83 |
+
return dtxsid
|
| 84 |
+
except Exception:
|
| 85 |
+
pass
|
| 86 |
+
|
| 87 |
+
haz_tries = [
|
| 88 |
+
("/hazard/genetox/summary/search", {"cas": clean}),
|
| 89 |
+
(f"/hazard/genetox/summary/search/by-cas/{quote(clean)}", None),
|
| 90 |
+
]
|
| 91 |
+
for path, params in haz_tries:
|
| 92 |
+
try:
|
| 93 |
+
data = await _ctx_get(path, http, params=params)
|
| 94 |
+
rows = _as_rows(data)
|
| 95 |
+
dtxsid = _pick_dtxsid(rows)
|
| 96 |
+
if dtxsid:
|
| 97 |
+
return dtxsid
|
| 98 |
+
except Exception:
|
| 99 |
+
pass
|
| 100 |
+
|
| 101 |
+
return None
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
async def _resolve_from_name(name: str, http: httpx.AsyncClient) -> Optional[str]:
|
| 105 |
+
q = (name or "").strip()
|
| 106 |
if not q:
|
| 107 |
return None
|
| 108 |
|
| 109 |
chem_tries = [
|
| 110 |
+
(f"/chemical/identifiers/by-name/{quote(q)}", None),
|
| 111 |
+
(f"/chemical/identifiers/search/by-name/{quote(q)}", None),
|
| 112 |
+
("/chemical/identifiers", {"name": q}),
|
| 113 |
+
("/chemical/search/equal", {"word": q}),
|
| 114 |
+
("/chemical/search/contains", {"word": q}),
|
| 115 |
+
("/chemical/search", {"matchType": "equal", "word": q}),
|
| 116 |
+
("/chemical/search", {"matchType": "contains", "word": q}),
|
| 117 |
("/chemical/search", {"name": q}),
|
|
|
|
|
|
|
| 118 |
]
|
|
|
|
| 119 |
for path, params in chem_tries:
|
| 120 |
try:
|
| 121 |
data = await _ctx_get(path, http, params=params)
|
|
|
|
| 130 |
("/hazard/genetox/summary/search", {"name": q}),
|
| 131 |
(f"/hazard/genetox/summary/search/by-name/{quote(q)}", None),
|
| 132 |
]
|
|
|
|
| 133 |
for path, params in haz_tries:
|
| 134 |
try:
|
| 135 |
data = await _ctx_get(path, http, params=params)
|
|
|
|
| 143 |
return None
|
| 144 |
|
| 145 |
|
| 146 |
+
async def _resolve_dtxsid_via_pubchem(term: str, http: httpx.AsyncClient) -> Optional[str]:
|
| 147 |
+
q = (term or "").strip()
|
| 148 |
+
if not q:
|
| 149 |
+
return None
|
| 150 |
+
try:
|
| 151 |
+
cid_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{quote(q)}/cids/JSON"
|
| 152 |
+
r1 = await http.get(cid_url, timeout=20)
|
| 153 |
+
if r1.status_code >= 400:
|
| 154 |
+
return None
|
| 155 |
+
j1 = r1.json()
|
| 156 |
+
cid_list = (j1.get("IdentifierList") or {}).get("CID") or []
|
| 157 |
+
if not cid_list:
|
| 158 |
+
return None
|
| 159 |
+
cid = cid_list[0]
|
| 160 |
+
|
| 161 |
+
view_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/compound/{cid}/JSON"
|
| 162 |
+
r2 = await http.get(view_url, timeout=25)
|
| 163 |
+
if r2.status_code >= 400:
|
| 164 |
+
return None
|
| 165 |
+
text = r2.text
|
| 166 |
+
m = DTXSID_RE.search(text)
|
| 167 |
+
return m.group(0) if m else None
|
| 168 |
+
except Exception:
|
| 169 |
+
return None
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
async def _resolve_dtxsid_via_dashboard(term: str, http: httpx.AsyncClient) -> Optional[str]:
|
| 173 |
+
q = (term or "").strip()
|
| 174 |
+
if not q:
|
| 175 |
+
return None
|
| 176 |
+
|
| 177 |
+
targets = [
|
| 178 |
+
f"https://comptox.epa.gov/dashboard/dsstoxdb/results?search={quote(q)}",
|
| 179 |
+
f"https://comptox.epa.gov/dashboard/dsstoxdb/chemical/details?search={quote(q)}",
|
| 180 |
+
]
|
| 181 |
+
for url in targets:
|
| 182 |
+
try:
|
| 183 |
+
r = await http.get(
|
| 184 |
+
url,
|
| 185 |
+
timeout=20,
|
| 186 |
+
headers={
|
| 187 |
+
"accept": "text/html,application/xhtml+xml",
|
| 188 |
+
"user-agent": "Mozilla/5.0",
|
| 189 |
+
},
|
| 190 |
+
)
|
| 191 |
+
if r.status_code >= 400:
|
| 192 |
+
continue
|
| 193 |
+
m = DTXSID_RE.search(r.text)
|
| 194 |
+
if m:
|
| 195 |
+
return m.group(0)
|
| 196 |
+
except Exception:
|
| 197 |
+
pass
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
async def resolve_dtxsid(query: str, http: httpx.AsyncClient) -> Optional[str]:
|
| 202 |
+
q = (query or "").strip()
|
| 203 |
+
if not q:
|
| 204 |
+
return None
|
| 205 |
+
|
| 206 |
+
if is_cas(q):
|
| 207 |
+
dtxsid = await _resolve_from_cas(q, http)
|
| 208 |
+
if dtxsid:
|
| 209 |
+
return dtxsid
|
| 210 |
+
else:
|
| 211 |
+
dtxsid = await _resolve_from_name(q, http)
|
| 212 |
+
if dtxsid:
|
| 213 |
+
return dtxsid
|
| 214 |
+
|
| 215 |
+
# Fallbacks: PubChem -> Dashboard
|
| 216 |
+
via_pc = await _resolve_dtxsid_via_pubchem(q, http)
|
| 217 |
+
if via_pc:
|
| 218 |
+
return via_pc
|
| 219 |
+
return await _resolve_dtxsid_via_dashboard(q, http)
|
| 220 |
+
|
| 221 |
+
|
| 222 |
def dashboard_search_url(query: str) -> str:
|
| 223 |
q = quote((query or "").strip())
|
| 224 |
+
return f"https://comptox.epa.gov/dashboard/dsstoxdb/results?search={q}"
|
| 225 |
|
| 226 |
|
| 227 |
def dashboard_details_url(dtxsid: str) -> str:
|
| 228 |
+
q = quote((dtxsid or "").strip())
|
| 229 |
+
return f"https://comptox.epa.gov/dashboard/dsstoxdb/results?search={q}"
|
| 230 |
|
| 231 |
|
| 232 |
async def fetch_ctx_genetox(cas_or_query: str, http: httpx.AsyncClient) -> Dict[str, Any]:
|
core/sources/ntp.py
CHANGED
|
@@ -7,6 +7,7 @@ import httpx
|
|
| 7 |
|
| 8 |
REPORTS_URL = "https://ntp.niehs.nih.gov/publications/reports"
|
| 9 |
BASE = "https://ntp.niehs.nih.gov"
|
|
|
|
| 10 |
|
| 11 |
TR_RE = re.compile(r"\bTR-?(\d{3,4})\b", re.IGNORECASE)
|
| 12 |
HREF_RE = re.compile(r"href=[\"']([^\"']+)[\"']", re.IGNORECASE)
|
|
@@ -120,7 +121,52 @@ async def search_technical_reports(query: str, http: httpx.AsyncClient, limit: i
|
|
| 120 |
break
|
| 121 |
|
| 122 |
if not nums:
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
items: List[Dict[str, Any]] = []
|
| 126 |
for num in nums:
|
|
|
|
| 7 |
|
| 8 |
REPORTS_URL = "https://ntp.niehs.nih.gov/publications/reports"
|
| 9 |
BASE = "https://ntp.niehs.nih.gov"
|
| 10 |
+
INDEX_URL = "https://ntp.niehs.nih.gov/data/tr"
|
| 11 |
|
| 12 |
TR_RE = re.compile(r"\bTR-?(\d{3,4})\b", re.IGNORECASE)
|
| 13 |
HREF_RE = re.compile(r"href=[\"']([^\"']+)[\"']", re.IGNORECASE)
|
|
|
|
| 121 |
break
|
| 122 |
|
| 123 |
if not nums:
|
| 124 |
+
# Fallback: scan the TR index page (data/tr)
|
| 125 |
+
try:
|
| 126 |
+
r2 = await http.get(INDEX_URL, timeout=25, follow_redirects=True)
|
| 127 |
+
if r2.status_code >= 400:
|
| 128 |
+
return {"ok": True, "query": q, "items": []}
|
| 129 |
+
idx_html = r2.text
|
| 130 |
+
except Exception:
|
| 131 |
+
return {"ok": True, "query": q, "items": []}
|
| 132 |
+
|
| 133 |
+
idx_lines = idx_html.splitlines()
|
| 134 |
+
items: List[Dict[str, Any]] = []
|
| 135 |
+
seen = set()
|
| 136 |
+
|
| 137 |
+
for i, line in enumerate(idx_lines):
|
| 138 |
+
if not TR_RE.search(line):
|
| 139 |
+
continue
|
| 140 |
+
block = " ".join(idx_lines[i : i + 6])
|
| 141 |
+
block_text = _strip_tags(block)
|
| 142 |
+
if q_low not in block_text.lower():
|
| 143 |
+
continue
|
| 144 |
+
m = TR_RE.search(block_text)
|
| 145 |
+
if not m:
|
| 146 |
+
continue
|
| 147 |
+
num = m.group(1)
|
| 148 |
+
if num in seen:
|
| 149 |
+
continue
|
| 150 |
+
seen.add(num)
|
| 151 |
+
|
| 152 |
+
# Derive a best-effort title from the block text
|
| 153 |
+
title = re.sub(TR_RE, "", block_text).strip()
|
| 154 |
+
title = re.sub(r"\\b\\d{2,7}-\\d{2}-\\d\\b", "", title).strip()
|
| 155 |
+
|
| 156 |
+
items.append(
|
| 157 |
+
{
|
| 158 |
+
"num": num,
|
| 159 |
+
"tr": f"TR-{num}",
|
| 160 |
+
"report_page": INDEX_URL,
|
| 161 |
+
"title": title,
|
| 162 |
+
"year": None,
|
| 163 |
+
"pdf": None,
|
| 164 |
+
}
|
| 165 |
+
)
|
| 166 |
+
if len(items) >= max(1, int(limit)):
|
| 167 |
+
break
|
| 168 |
+
|
| 169 |
+
return {"ok": True, "query": q, "items": items}
|
| 170 |
|
| 171 |
items: List[Dict[str, Any]] = []
|
| 172 |
for num in nums:
|
core/sources/pubchem.py
CHANGED
|
@@ -1,220 +1,209 @@
|
|
| 1 |
-
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
|
| 4 |
import re
|
| 5 |
from typing import Any, Dict, List, Optional
|
| 6 |
from urllib.parse import quote
|
| 7 |
|
| 8 |
import httpx
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
|
| 11 |
|
| 12 |
|
| 13 |
-
def
|
| 14 |
return bool(CAS_RE.match((s or "").strip()))
|
| 15 |
|
| 16 |
|
| 17 |
-
def
|
| 18 |
-
if
|
| 19 |
-
return
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
return None
|
| 23 |
|
| 24 |
|
| 25 |
-
def
|
| 26 |
-
"""
|
| 27 |
-
Mirrors your production index.html fmtInfoValue():
|
| 28 |
-
- StringWithMarkup -> join strings
|
| 29 |
-
- primitive string/number -> stringify
|
| 30 |
-
- otherwise JSON-ish fallback
|
| 31 |
-
"""
|
| 32 |
if value is None:
|
| 33 |
-
return "
|
|
|
|
|
|
|
| 34 |
if isinstance(value, dict):
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
if "String" in value
|
| 44 |
-
return value["String"]
|
| 45 |
if "Number" in value:
|
| 46 |
-
return str(value["Number"])
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
except Exception:
|
| 52 |
-
return "Not available"
|
| 53 |
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
- "echa c&l notifications summary"
|
| 61 |
-
(You explicitly asked to keep only these blocks.)
|
| 62 |
"""
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
continue
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
async def _cid_from_query(query: str, http: httpx.AsyncClient) -> Optional[int]:
|
| 96 |
-
q = (query or "").strip()
|
| 97 |
-
if not q:
|
| 98 |
-
return None
|
| 99 |
-
url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{quote(q)}/cids/JSON"
|
| 100 |
-
r = await http.get(url, timeout=30)
|
| 101 |
-
if r.status_code != 200:
|
| 102 |
return None
|
| 103 |
-
js = r.json()
|
| 104 |
-
cids = js.get("IdentifierList", {}).get("CID", [])
|
| 105 |
-
first = _safe_first(cids)
|
| 106 |
-
return int(first) if isinstance(first, int) else None
|
| 107 |
|
| 108 |
|
| 109 |
async def _props_from_cid(cid: int, http: httpx.AsyncClient) -> Dict[str, Any]:
|
|
|
|
| 110 |
url = (
|
| 111 |
-
"
|
| 112 |
-
|
| 113 |
)
|
| 114 |
-
r = await http.get(url, timeout=
|
| 115 |
r.raise_for_status()
|
| 116 |
js = r.json()
|
| 117 |
-
props = js.get("PropertyTable", {}).get("Properties"
|
| 118 |
-
|
| 119 |
-
return first if isinstance(first, dict) else {}
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
async def _synonyms_from_cid(cid: int, http: httpx.AsyncClient) -> List[str]:
|
| 123 |
-
url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/synonyms/JSON"
|
| 124 |
-
r = await http.get(url, timeout=30)
|
| 125 |
-
if r.status_code != 200:
|
| 126 |
-
return []
|
| 127 |
-
js = r.json()
|
| 128 |
-
info = _safe_first(js.get("InformationList", {}).get("Information", []))
|
| 129 |
-
syns = info.get("Synonym", []) if isinstance(info, dict) else []
|
| 130 |
-
return [s for s in syns if isinstance(s, str)]
|
| 131 |
|
| 132 |
|
| 133 |
-
async def
|
| 134 |
-
url = f"
|
| 135 |
-
r = await http.get(url, timeout=
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
try:
|
| 139 |
-
return r.json()
|
| 140 |
-
except Exception:
|
| 141 |
-
return {}
|
| 142 |
|
| 143 |
|
| 144 |
async def pubchem_by_query(query: str, http: httpx.AsyncClient) -> Dict[str, Any]:
|
| 145 |
-
"""
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
ghs_paragraphs (list[str]), echa_summary (str), raw (dict)
|
| 149 |
"""
|
| 150 |
q = (query or "").strip()
|
| 151 |
if not q:
|
| 152 |
-
return {"ok": False, "error": "Empty query
|
| 153 |
|
| 154 |
cid = await _cid_from_query(q, http)
|
| 155 |
if not cid:
|
| 156 |
-
return {"ok": False, "error": "No PubChem CID found
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
syn_json = {"error": str(e)}
|
| 181 |
-
|
| 182 |
-
# hazards via PUG-View (production-equivalent scan)
|
| 183 |
-
view_json = await _pug_view_from_cid(cid, http)
|
| 184 |
-
hazard_items = []
|
| 185 |
-
try:
|
| 186 |
-
sections = view_json.get("Record", {}).get("Section", [])
|
| 187 |
-
hazard_items = _scan_hazards(sections)
|
| 188 |
-
except Exception:
|
| 189 |
-
hazard_items = []
|
| 190 |
-
|
| 191 |
-
# Build display blocks exactly as strings (avoid the "char-per-line" join bug)
|
| 192 |
-
ghs_paragraphs: List[str] = []
|
| 193 |
-
echa_summary = ""
|
| 194 |
-
for it in hazard_items:
|
| 195 |
-
nm = it.get("name", "")
|
| 196 |
-
val = it.get("value", "")
|
| 197 |
-
if not nm:
|
| 198 |
continue
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
if "echa c&l notifications summary" in nm.lower():
|
| 202 |
-
echa_summary = f"{nm}: {val}".strip()
|
| 203 |
|
| 204 |
-
|
| 205 |
"ok": True,
|
| 206 |
"query": q,
|
| 207 |
"cid": cid,
|
| 208 |
"resolved_cas": resolved_cas,
|
| 209 |
"props": props,
|
| 210 |
-
"
|
| 211 |
-
"
|
| 212 |
-
"
|
| 213 |
-
"
|
| 214 |
-
"raw": {
|
| 215 |
-
"props_json": props_json,
|
| 216 |
-
"synonyms_json": syn_json,
|
| 217 |
-
"pug_view_json": view_json,
|
| 218 |
-
},
|
| 219 |
}
|
| 220 |
-
return out
|
|
|
|
| 1 |
+
import html
|
|
|
|
|
|
|
| 2 |
import re
|
| 3 |
from typing import Any, Dict, List, Optional
|
| 4 |
from urllib.parse import quote
|
| 5 |
|
| 6 |
import httpx
|
| 7 |
|
| 8 |
+
PUBCHEM_REST = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
|
| 9 |
+
PUBCHEM_VIEW = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view"
|
| 10 |
+
|
| 11 |
CAS_RE = re.compile(r"^\d{2,7}-\d{2}-\d$")
|
| 12 |
|
| 13 |
|
| 14 |
+
def is_cas(s: str) -> bool:
|
| 15 |
return bool(CAS_RE.match((s or "").strip()))
|
| 16 |
|
| 17 |
|
| 18 |
+
def _first_cas_in_text(text: str) -> Optional[str]:
|
| 19 |
+
if not text:
|
| 20 |
+
return None
|
| 21 |
+
m = re.search(r"\b\d{2,7}-\d{2}-\d\b", text)
|
| 22 |
+
return m.group(0) if m else None
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
+
def _fmt_value(value: Any) -> str:
|
| 26 |
+
"""Port of production `fmtInfoValue()` for PubChem PUG-View values."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
if value is None:
|
| 28 |
+
return ""
|
| 29 |
+
|
| 30 |
+
# PUG-View Value is usually a dict
|
| 31 |
if isinstance(value, dict):
|
| 32 |
+
if "StringWithMarkup" in value and isinstance(value["StringWithMarkup"], list):
|
| 33 |
+
parts: List[str] = []
|
| 34 |
+
for item in value["StringWithMarkup"]:
|
| 35 |
+
if isinstance(item, dict) and item.get("String"):
|
| 36 |
+
parts.append(str(item["String"]))
|
| 37 |
+
elif isinstance(item, str):
|
| 38 |
+
parts.append(item)
|
| 39 |
+
return html.unescape("".join(parts)).strip()
|
| 40 |
+
if "String" in value:
|
| 41 |
+
return html.unescape(str(value["String"])).strip()
|
| 42 |
if "Number" in value:
|
| 43 |
+
return str(value["Number"]) # already numeric
|
| 44 |
+
if "Boolean" in value:
|
| 45 |
+
return str(value["Boolean"])
|
| 46 |
+
if "Date" in value:
|
| 47 |
+
return str(value["Date"])
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
# Fallback
|
| 50 |
+
return html.unescape(str(value)).strip()
|
| 51 |
|
| 52 |
+
|
| 53 |
+
def _scan_hazards(section: Dict[str, Any], out: List[Dict[str, str]]):
|
| 54 |
+
"""Recursively scan PubChem PUG-View sections for hazard-related info.
|
| 55 |
+
|
| 56 |
+
Mirrors production `scanHazards()` semantics.
|
|
|
|
|
|
|
| 57 |
"""
|
| 58 |
+
info_list = section.get("Information") or []
|
| 59 |
+
for info in info_list:
|
| 60 |
+
name = (info.get("Name") or "").strip()
|
| 61 |
+
low = name.lower()
|
| 62 |
+
if (
|
| 63 |
+
"ghs hazard statements" in low
|
| 64 |
+
or "echa c&l notifications summary" in low
|
| 65 |
+
or "carcinogenicity" in low
|
| 66 |
+
or "mutagenicity" in low
|
| 67 |
+
or "genotoxicity" in low
|
| 68 |
+
or "toxic" in low
|
| 69 |
+
or "hazard" in low
|
| 70 |
+
):
|
| 71 |
+
text = _fmt_value(info.get("Value"))
|
| 72 |
+
if text:
|
| 73 |
+
out.append({"name": name or "Hazard information", "text": text})
|
| 74 |
+
|
| 75 |
+
for sub in section.get("Section") or []:
|
| 76 |
+
_scan_hazards(sub, out)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _extract_synonyms(record: Dict[str, Any]) -> List[str]:
|
| 80 |
+
"""Best-effort extraction of synonyms list from PubChem PUG-View record."""
|
| 81 |
+
if not record:
|
| 82 |
+
return []
|
| 83 |
|
| 84 |
+
def walk(sec: Dict[str, Any], acc: List[str]):
|
| 85 |
+
# Synonyms often appear under Names and Identifiers
|
| 86 |
+
if (sec.get("TOCHeading") or "").lower() == "synonyms":
|
| 87 |
+
for info in sec.get("Information") or []:
|
| 88 |
+
val = info.get("Value")
|
| 89 |
+
if isinstance(val, dict) and isinstance(val.get("StringWithMarkup"), list):
|
| 90 |
+
for item in val["StringWithMarkup"]:
|
| 91 |
+
if isinstance(item, dict) and item.get("String"):
|
| 92 |
+
acc.append(str(item["String"]))
|
| 93 |
+
for sub in sec.get("Section") or []:
|
| 94 |
+
walk(sub, acc)
|
| 95 |
+
|
| 96 |
+
out: List[str] = []
|
| 97 |
+
for top in record.get("Section") or []:
|
| 98 |
+
walk(top, out)
|
| 99 |
+
|
| 100 |
+
# De-dupe preserve order
|
| 101 |
+
seen = set()
|
| 102 |
+
uniq: List[str] = []
|
| 103 |
+
for s in out:
|
| 104 |
+
s = s.strip()
|
| 105 |
+
if not s:
|
| 106 |
+
continue
|
| 107 |
+
if s.lower() in seen:
|
| 108 |
continue
|
| 109 |
+
seen.add(s.lower())
|
| 110 |
+
uniq.append(s)
|
| 111 |
+
return uniq
|
| 112 |
|
| 113 |
+
|
| 114 |
+
def _structure_png_url(cid: int) -> str:
|
| 115 |
+
return f"{PUBCHEM_REST}/compound/cid/{cid}/PNG?record_type=2d"
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _compound_url(cid: int) -> str:
|
| 119 |
+
return f"https://pubchem.ncbi.nlm.nih.gov/compound/{cid}"
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _safe_first(items: Any) -> Optional[Any]:
|
| 123 |
+
if isinstance(items, list) and items:
|
| 124 |
+
return items[0]
|
| 125 |
+
return None
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
async def _cid_from_query(q: str, http: httpx.AsyncClient) -> Optional[int]:
|
| 129 |
+
url = f"{PUBCHEM_REST}/compound/name/{quote(q)}/cids/JSON"
|
| 130 |
+
try:
|
| 131 |
+
r = await http.get(url, timeout=20)
|
| 132 |
+
r.raise_for_status()
|
| 133 |
+
js = r.json()
|
| 134 |
+
cid = _safe_first(js.get("IdentifierList", {}).get("CID"))
|
| 135 |
+
return int(cid) if cid is not None else None
|
| 136 |
+
except Exception:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
async def _props_from_cid(cid: int, http: httpx.AsyncClient) -> Dict[str, Any]:
|
| 141 |
+
# Request all props production needs.
|
| 142 |
url = (
|
| 143 |
+
f"{PUBCHEM_REST}/compound/cid/{cid}/property/"
|
| 144 |
+
"MolecularFormula,MolecularWeight,CanonicalSMILES,IUPACName/JSON"
|
| 145 |
)
|
| 146 |
+
r = await http.get(url, timeout=20)
|
| 147 |
r.raise_for_status()
|
| 148 |
js = r.json()
|
| 149 |
+
props = _safe_first(js.get("PropertyTable", {}).get("Properties"))
|
| 150 |
+
return props or {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
+
async def _view_record(cid: int, http: httpx.AsyncClient) -> Dict[str, Any]:
|
| 154 |
+
url = f"{PUBCHEM_VIEW}/data/compound/{cid}/JSON"
|
| 155 |
+
r = await http.get(url, timeout=25)
|
| 156 |
+
r.raise_for_status()
|
| 157 |
+
return r.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
|
| 160 |
async def pubchem_by_query(query: str, http: httpx.AsyncClient) -> Dict[str, Any]:
|
| 161 |
+
"""Query PubChem by CAS or name.
|
| 162 |
+
|
| 163 |
+
Returns a dict compatible with app.py renderers.
|
|
|
|
| 164 |
"""
|
| 165 |
q = (query or "").strip()
|
| 166 |
if not q:
|
| 167 |
+
return {"ok": False, "error": "Empty query"}
|
| 168 |
|
| 169 |
cid = await _cid_from_query(q, http)
|
| 170 |
if not cid:
|
| 171 |
+
return {"ok": False, "error": "No PubChem CID found"}
|
| 172 |
+
|
| 173 |
+
props = await _props_from_cid(cid, http)
|
| 174 |
+
|
| 175 |
+
record_json = await _view_record(cid, http)
|
| 176 |
+
record = record_json.get("Record") or {}
|
| 177 |
+
|
| 178 |
+
synonyms = _extract_synonyms(record)
|
| 179 |
+
resolved_cas = None
|
| 180 |
+
if is_cas(q):
|
| 181 |
+
resolved_cas = q
|
| 182 |
+
else:
|
| 183 |
+
resolved_cas = _first_cas_in_text("\n".join(synonyms))
|
| 184 |
+
|
| 185 |
+
hazards: List[Dict[str, str]] = []
|
| 186 |
+
for top in record.get("Section") or []:
|
| 187 |
+
_scan_hazards(top, hazards)
|
| 188 |
+
|
| 189 |
+
# De-dupe hazards by (name, text)
|
| 190 |
+
seen = set()
|
| 191 |
+
uniq_haz: List[Dict[str, str]] = []
|
| 192 |
+
for h in hazards:
|
| 193 |
+
key = (h.get("name", "").lower(), h.get("text", "").strip())
|
| 194 |
+
if key in seen:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
continue
|
| 196 |
+
seen.add(key)
|
| 197 |
+
uniq_haz.append(h)
|
|
|
|
|
|
|
| 198 |
|
| 199 |
+
return {
|
| 200 |
"ok": True,
|
| 201 |
"query": q,
|
| 202 |
"cid": cid,
|
| 203 |
"resolved_cas": resolved_cas,
|
| 204 |
"props": props,
|
| 205 |
+
"structure_png": _structure_png_url(cid),
|
| 206 |
+
"url": _compound_url(cid),
|
| 207 |
+
"synonyms": synonyms[:50],
|
| 208 |
+
"hazards": uniq_haz,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
}
|
|
|
core/sources/scholar.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def scholar_link(query: str) -> str:
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from urllib.parse import quote_plus
|
| 4 |
+
|
| 5 |
|
| 6 |
def scholar_link(query: str) -> str:
|
| 7 |
+
q = (query or "").strip()
|
| 8 |
+
if not q:
|
| 9 |
+
return ""
|
| 10 |
+
# Mirror JS behavior: add a genotoxicity hint.
|
| 11 |
+
return f"https://scholar.google.com/scholar?q={quote_plus(q + ' genotoxicity')}"
|