Spaces:
Runtime error
Runtime error
Charles Azam
commited on
Commit
·
84c66cd
1
Parent(s):
e003639
feat: start writing tools
Browse files
src/deepengineer/webcrawler/async_search.py
CHANGED
|
@@ -93,7 +93,7 @@ def get_linkup_balance():
|
|
| 93 |
return balance
|
| 94 |
|
| 95 |
|
| 96 |
-
async def
|
| 97 |
search_query: str,
|
| 98 |
depth: Literal["standard", "deep"] = "standard",
|
| 99 |
output_type: Literal['searchResults', 'sourcedAnswer', 'structured'] = "sourcedAnswer",
|
|
@@ -138,24 +138,24 @@ async def async_linkup_search(
|
|
| 138 |
async def arxiv_search_async(
|
| 139 |
search_query: str,
|
| 140 |
) -> SearchResponse:
|
| 141 |
-
response = await
|
| 142 |
return response
|
| 143 |
|
| 144 |
|
| 145 |
async def pubmed_search_async(
|
| 146 |
search_query: str,
|
| 147 |
) -> SearchResponse:
|
| 148 |
-
response = await
|
| 149 |
return response
|
| 150 |
|
| 151 |
async def sciencedirect_search_async(
|
| 152 |
search_query: str,
|
| 153 |
) -> SearchResponse:
|
| 154 |
-
response = await
|
| 155 |
return response
|
| 156 |
|
| 157 |
async def scientific_search_async(
|
| 158 |
search_query: str,
|
| 159 |
) -> SearchResponse:
|
| 160 |
-
response = await
|
| 161 |
return response
|
|
|
|
| 93 |
return balance
|
| 94 |
|
| 95 |
|
| 96 |
+
async def linkup_search_async(
|
| 97 |
search_query: str,
|
| 98 |
depth: Literal["standard", "deep"] = "standard",
|
| 99 |
output_type: Literal['searchResults', 'sourcedAnswer', 'structured'] = "sourcedAnswer",
|
|
|
|
| 138 |
async def arxiv_search_async(
|
| 139 |
search_query: str,
|
| 140 |
) -> SearchResponse:
|
| 141 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.arxiv])
|
| 142 |
return response
|
| 143 |
|
| 144 |
|
| 145 |
async def pubmed_search_async(
|
| 146 |
search_query: str,
|
| 147 |
) -> SearchResponse:
|
| 148 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.pubmed])
|
| 149 |
return response
|
| 150 |
|
| 151 |
async def sciencedirect_search_async(
|
| 152 |
search_query: str,
|
| 153 |
) -> SearchResponse:
|
| 154 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.sciencedirect])
|
| 155 |
return response
|
| 156 |
|
| 157 |
async def scientific_search_async(
|
| 158 |
search_query: str,
|
| 159 |
) -> SearchResponse:
|
| 160 |
+
response = await linkup_search_async(search_query, include_domains=[ScientificDomains.wikipedia, ScientificDomains.arxiv, ScientificDomains.pubmed, ScientificDomains.sciencedirect])
|
| 161 |
return response
|
src/deepengineer/webcrawler/tools.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from deepengineer.webcrawler.async_search import linkup_search_async, tavily_search_async, arxiv_search_async, pubmed_search_async, sciencedirect_search_async, scientific_search_async
|
| 2 |
+
from deepengineer.webcrawler.async_crawl import crawl4ai_extract_markdown_of_url_async, arxiv_download_pdf_async, download_pdf_async
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
tests/webcrawler/test_async_crawl.py
CHANGED
|
@@ -5,7 +5,6 @@ from deepengineer.webcrawler.async_crawl import (
|
|
| 5 |
arxiv_download_pdf_async,
|
| 6 |
)
|
| 7 |
from mistralai import OCRResponse
|
| 8 |
-
from deepengineer.webcrawler.pdf_tools import convert_pdf_to_markdown_async
|
| 9 |
from deepengineer.webcrawler.testing import URL_WIKIPEDIA, URL_PDF, ARXIV_URL
|
| 10 |
from deepengineer.common_path import DATA_DIR
|
| 11 |
|
|
@@ -31,14 +30,3 @@ async def test_arxiv_download_pdf_async():
|
|
| 31 |
pdf_path = await arxiv_download_pdf_async(ARXIV_URL, output_path=output_path)
|
| 32 |
assert pdf_path == output_path
|
| 33 |
assert output_path.exists()
|
| 34 |
-
|
| 35 |
-
@pytest.mark.expensive
|
| 36 |
-
@pytest.mark.asyncio
|
| 37 |
-
async def test_convert_pdf_to_markdown_async():
|
| 38 |
-
pdf_path = DATA_DIR / "report_thermal_neutron.pdf"
|
| 39 |
-
assert pdf_path.exists()
|
| 40 |
-
markdown, ocr_response = await convert_pdf_to_markdown_async(pdf_path)
|
| 41 |
-
assert isinstance(ocr_response, OCRResponse)
|
| 42 |
-
assert len(ocr_response.pages) == 16
|
| 43 |
-
assert isinstance(markdown, str)
|
| 44 |
-
assert "where each cylinder represent" in markdown
|
|
|
|
| 5 |
arxiv_download_pdf_async,
|
| 6 |
)
|
| 7 |
from mistralai import OCRResponse
|
|
|
|
| 8 |
from deepengineer.webcrawler.testing import URL_WIKIPEDIA, URL_PDF, ARXIV_URL
|
| 9 |
from deepengineer.common_path import DATA_DIR
|
| 10 |
|
|
|
|
| 30 |
pdf_path = await arxiv_download_pdf_async(ARXIV_URL, output_path=output_path)
|
| 31 |
assert pdf_path == output_path
|
| 32 |
assert output_path.exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/webcrawler/test_async_search.py
CHANGED
|
@@ -4,7 +4,7 @@ from deepengineer.webcrawler.async_search import (
|
|
| 4 |
tavily_search_async,
|
| 5 |
SearchResponse,
|
| 6 |
get_tavily_usage,
|
| 7 |
-
|
| 8 |
get_linkup_balance
|
| 9 |
)
|
| 10 |
|
|
@@ -47,7 +47,7 @@ async def test_linkup_search_async():
|
|
| 47 |
balance_before = get_linkup_balance()
|
| 48 |
print(balance_before)
|
| 49 |
|
| 50 |
-
response = await
|
| 51 |
search_query="Would it be possible to make a thermal reactor with graphite and lead?",
|
| 52 |
)
|
| 53 |
print(response.answer)
|
|
|
|
| 4 |
tavily_search_async,
|
| 5 |
SearchResponse,
|
| 6 |
get_tavily_usage,
|
| 7 |
+
linkup_search_async,
|
| 8 |
get_linkup_balance
|
| 9 |
)
|
| 10 |
|
|
|
|
| 47 |
balance_before = get_linkup_balance()
|
| 48 |
print(balance_before)
|
| 49 |
|
| 50 |
+
response = await linkup_search_async(
|
| 51 |
search_query="Would it be possible to make a thermal reactor with graphite and lead?",
|
| 52 |
)
|
| 53 |
print(response.answer)
|
tests/webcrawler/test_pdfs_tools.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from deepengineer.webcrawler.pdf_tools import convert_pdf_to_markdown_async
|
| 2 |
+
from mistralai import OCRResponse
|
| 3 |
+
from deepengineer.common_path import DATA_DIR
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@pytest.mark.expensive
|
| 8 |
+
@pytest.mark.asyncio
|
| 9 |
+
async def test_convert_pdf_to_markdown_async():
|
| 10 |
+
pdf_path = DATA_DIR / "report_thermal_neutron.pdf"
|
| 11 |
+
assert pdf_path.exists()
|
| 12 |
+
markdown, ocr_response = await convert_pdf_to_markdown_async(pdf_path)
|
| 13 |
+
assert isinstance(ocr_response, OCRResponse)
|
| 14 |
+
assert len(ocr_response.pages) == 16
|
| 15 |
+
assert isinstance(markdown, str)
|
| 16 |
+
assert "where each cylinder represent" in markdown
|