Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from toxra_core.nlp_pipeline import ( | |
| expand_regulatory_queries, | |
| extract_evidence_span, | |
| hybrid_rank_text_items, | |
| ) | |
| def test_expand_regulatory_queries_adds_families(): | |
| queries, families = expand_regulatory_queries( | |
| base_queries=["genotoxicity risk"], | |
| endpoint_modules=["Genotoxicity (OECD TG)"], | |
| frameworks=["FDA CTP"], | |
| ) | |
| assert len(queries) > 1 | |
| assert "endpoint" in families | |
| assert families["endpoint"] | |
| def test_extract_evidence_span_hit_and_fallback(): | |
| text = "Sentence one. AMES test showed equivocal response. Sentence three. Sentence four." | |
| hit = extract_evidence_span(text, "AMES") | |
| assert "AMES" in hit["text"] | |
| fb = extract_evidence_span("Alpha. Beta.", "nonexistenttoken") | |
| assert fb["text"] | |
| def test_hybrid_rank_text_items_lexical_only(): | |
| items = [ | |
| {"text": "This section discusses liver toxicity and NOAEL values."}, | |
| {"text": "Completely unrelated formulation text."}, | |
| ] | |
| selected, diag = hybrid_rank_text_items(items, query="NOAEL liver") | |
| assert selected | |
| assert diag["ranking_method"] in {"lexical_only", "hybrid_rrf"} | |
| def test_hybrid_rank_text_items_with_embeddings(): | |
| items = [{"text": "A"}, {"text": "B"}, {"text": "C"}] | |
| emb = np.array([[1.0, 0.0], [0.5, 0.5], [0.0, 1.0]], dtype=np.float32) | |
| q = np.array([1.0, 0.0], dtype=np.float32) | |
| selected, diag = hybrid_rank_text_items(items, query="A", item_embeddings=emb, query_embedding=q) | |
| assert selected | |
| assert diag["ranking_method"] == "hybrid_rrf" | |