File size: 3,284 Bytes
e7fd66f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from __future__ import annotations

from pathlib import Path

import numpy as np
import pytest

from agent.runner import AgentRunner
from researchmind.config import ResearchMindConfig
from researchmind.extract import ExtractedDocument
from researchmind.store import MemRAGStore


class MockBackend:
    def load(self) -> None:
        return None

    def chat(self, messages, *, max_tokens=512, temperature=0.7):
        user = messages[-1]["content"]
        if "Topic:" in user:
            return '["https://example.com/a", "https://example.com/b"]'
        return "Plants use photosynthesis [1]."

    def generate(self, prompt, *, max_tokens=512, temperature=0.7):
        return self.chat([{"role": "user", "content": prompt}], max_tokens=max_tokens)


@pytest.fixture
def research_env(tmp_path, monkeypatch):
    cfg = ResearchMindConfig(
        data_dir=tmp_path / "rm",
        embed_model="test",
        auto_search=False,
        top_k=2,
        max_context_chunks=8,
        chunk_size=50,
        chunk_overlap=10,
    )
    monkeypatch.setenv("RESEARCHMIND_DATA_DIR", str(cfg.data_dir))

    def fake_embed(texts, *, model_name):
        vecs = []
        for t in texts:
            vecs.append(np.array([1.0, 0.0, 0.0], dtype=np.float32))
        return np.stack(vecs) if vecs else np.zeros((0, 3), dtype=np.float32)

    monkeypatch.setattr("researchmind.ingest.embed_texts", fake_embed)
    monkeypatch.setattr("researchmind.retrieve.embed_texts", fake_embed)

    def fake_scrape(url: str):
        return ExtractedDocument(
            source_type="web",
            uri=url,
            title="Example",
            text="Photosynthesis converts light to energy in plants.",
        )

    monkeypatch.setattr("agent.tools.research_tools.fetch_and_extract", fake_scrape)

    def fake_search(topic, *, n=5, check_reachable=True):
        return [f"https://example.com/{topic.replace(' ', '-')}"]

    monkeypatch.setattr("agent.tools.research_tools.search_urls", fake_search)

    def fake_validate(url, *, check_reachable=True):
        normalized = url if url.startswith("http") else f"https://{url}"
        return True, "ok", normalized

    monkeypatch.setattr("researchmind.url_validate.validate_url", fake_validate)
    return cfg


def test_discover_urls(research_env):
    runner = AgentRunner()
    result = runner.run_researchmind_discover(
        topic="photosynthesis",
        auto_search=False,
        session_id=None,
        model_key="test",
        backend=MockBackend(),
    )
    assert len(result.suggested_urls) >= 1
    assert result.session_id


def test_ingest_and_chat(research_env):
    runner = AgentRunner()
    ingest = runner.run_researchmind_ingest(
        topic=None,
        urls=["https://example.com/a"],
        files=[],
        auto_search=False,
        session_id=None,
        model_key="test",
        backend=MockBackend(),
    )
    assert ingest.doc_count >= 1
    assert ingest.chunk_count >= 1

    chat = runner.run_researchmind_chat(
        question="How do plants make energy?",
        session_id=ingest.session_id,
        model_key="test",
        backend=MockBackend(),
    )
    assert "photosynthesis" in chat.answer.lower() or "[1]" in chat.answer
    assert chat.session_id == ingest.session_id