File size: 6,325 Bytes
8ad59ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c0aeb9
6a6d57d
 
8ad59ed
 
0d89931
 
8ad59ed
 
 
 
 
6a6d57d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ad59ed
892c58b
 
 
 
8ad59ed
 
 
 
 
 
 
 
892c58b
bd41508
 
8ad59ed
 
 
892c58b
 
 
 
 
 
 
 
 
6c0aeb9
 
 
 
892c58b
8ad59ed
 
 
 
 
 
 
ddb6686
 
 
 
 
 
 
 
8ad59ed
 
 
 
5978528
 
bd41508
8ad59ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe6be00
 
 
 
8ad59ed
 
 
 
 
 
892c58b
5978528
892c58b
 
 
6a6d57d
 
 
 
 
 
 
 
8ad59ed
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from deepengineer.deepsearch.scawl_web_agent import (
    create_web_search_agent,
    SearchTool,
    ArxivSearchTool,
    PubmedSearchTool,
    ScientificSearchTool,
    GetTableOfContentsTool,
    GetMarkdownTool,
    GetPagesContentTool,
    FindInMarkdownTool,
)
from deepengineer.deepsearch.draw_agent import SaveMatplotlibFigTool
from deepengineer.webcrawler.crawl_database import DataBase
from deepengineer.common_path import DATA_DIR
from smolagents import CodeAgent, LiteLLMModel
import random
from pathlib import Path
import queue
import json
import datetime


def create_output_image_path(random_name_images: int | None = None):
    random_name_images = random_name_images or random.randint(1000000, 9999999)
    output_image_path = Path(DATA_DIR) / f"images_{random_name_images}"
    output_image_path.mkdir(parents=True, exist_ok=True)
    return output_image_path


def save_log(task: str, answer: str, model_id: str, success: bool = True, error: str = None):
    """Simple function to save request logs with timestamps."""
    logs_dir = Path(DATA_DIR) / "logs"
    logs_dir.mkdir(parents=True, exist_ok=True)
    
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = logs_dir / f"request_{timestamp}.json"
    
    log_data = {
        "timestamp": datetime.datetime.now().isoformat(),
        "task": task,
        "model_id": model_id,
        "success": success,
        "answer": answer,
        "error": error
    }
    
    try:
        with open(log_file, 'w', encoding='utf-8') as f:
            json.dump(log_data, f, indent=2, ensure_ascii=False)
    except Exception as e:
        print(f"Failed to save log: {e}")


def create_main_search_agent(
    model_id="deepseek/deepseek-reasoner",
    database: DataBase | None = None,
    log_queue: queue.Queue | None = None,
    output_image_path: Path | None = None,
):
    """
    Simple agent that can search the web and answer the question. This is much faster and better for simple questions that do not require deep research.
    """

    model = LiteLLMModel(model_id=model_id)
    if database is None:
        database = DataBase()

    output_image_path = output_image_path or DATA_DIR / "images"
    output_image_path.mkdir(parents=True, exist_ok=True)

    # Web search and crawling tools
    WEB_SEARCH_TOOLS = [
        SearchTool(
            log_queue=log_queue,
        ),
        ArxivSearchTool(
            log_queue=log_queue,
        ),
        ScientificSearchTool(
            log_queue=log_queue,
        ),
        GetTableOfContentsTool(log_queue=log_queue, database=database),
        GetMarkdownTool(log_queue=log_queue, database=database),
        GetPagesContentTool(log_queue=log_queue, database=database),
        FindInMarkdownTool(log_queue=log_queue, database=database),
        SaveMatplotlibFigTool(log_queue=log_queue, output_dir=output_image_path),
    ]

    search_agent = CodeAgent(
        model=model,
        tools=WEB_SEARCH_TOOLS,
        max_steps=20,
        verbosity_level=2,
        additional_authorized_imports=[
            "matplotlib.*",
            "numpy.*",
            "pandas.*",
            "seaborn.*",
            "scipy.*",
            "sympy.*",
        ],
    )
    return search_agent


def main_search(task: str, log_queue: queue.Queue | None = None, model_id: str = "mistral/mistral-medium-latest") -> tuple[str, Path]:
    print(f"Using model: {model_id}")
    output_image_path = create_output_image_path()
    MAIN_PROMPT = """
You are DeepDraft, an advanced research and analysis agent specialized in deep technical research, data visualization, and comprehensive information synthesis. You have access to powerful tools for web search, document analysis, and data visualization.

You will be given a task to complete. This task is related to engineering, science, and technology. I want you to answer the question with a very detailed answer. The output should be written in markdown and include sources and images.

## Your Capabilities

### **Python and scientific reasoning**
You will have to answer the question as an engineer. Make hypothesis, test them, and draw conclusions. When in doubt, go back to the basic equations and laws of physics. Make simple models that you can test using python.
You are a coding agent so you can use python to test your hypothesis.
You have access to these libraries: matplotlib, numpy, pandas, seaborn, scipy, sympy.
You do not have access to any other library.

### Web searching:
You have the tools to search the web using Linkup API for comprehensive research with sourced answers. You can also search arXiv, PubMed, and ScienceDirect, download the documents and extract the relevant information.

### **Data Visualization Tools**
- You can always use the tool `SaveMatplotlibFigTool` to save a figure at the end of a matplotlib code block. You can then include the figure in your final answer.

## Answer format
You must answer the question in markdown format. Remember, you are a coding agent and you can pass the answer only by using the `final_answer("your markdown answer")` tool.
Also, if you want to include images in your answer, you MUST include the image in the markdown like this: ![title](image_name.png).

You have one question to answer. It is paramount that you provide a correct answer.
Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist).
Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded.
Run verification steps if that's needed, you must make sure you find the correct answer! Here is the task:
{task}
"""
    agent = create_main_search_agent(
        model_id=model_id,
        log_queue=log_queue,
        output_image_path=output_image_path,
    )
    
    try:
        answer = agent.run(MAIN_PROMPT.format(task=task))
        save_log(task, answer, model_id, success=True)
        return answer, output_image_path
    except Exception as e:
        save_log(task, "", model_id, success=False, error=str(e))
        raise e


if __name__ == "__main__":
    main_search(
        """
    Search a paper called "High Energy Physics Opportunities Using Reactor Antineutrinos" on arXiv, download it and extract the table of contents
    """
    )