File size: 3,731 Bytes
9c400b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from dotenv import load_dotenv

import httpx
import urllib.parse
from typing import Self
from src.libs.helper_functions import convert_to_snakecase
from src.databases.redis import REDIS_CACHED
from src.libs.constants import ONE_MINUTE_IN_SECONDS
from src.libs.constants import JINA_READER_BASE_ENDPOINT, JINA_SEARCH_BASE_ENDPOINT, ONE_MINUTE_IN_SECONDS

load_dotenv()

redis_cache = REDIS_CACHED

class JinaAI:
    """
    A class for interacting with Jina AI's search and reader services.

    Attributes:
        JINA_SEARCH_BASE_ENDPOINT (str): The base URL for the Jina AI search service.
        JINA_READER_BASE_ENDPOINT (str): The base URL for the Jina AI reader service.

    Methods:
        __init__(self, search_base_url: str = None, reader_base_url: str = None) -> None:
            Initialize the JinaAI instance with optional search and reader base URLs.

        search_web_with_jina(self, search_query: str) -> dict | None:
            Search the web using Jina AI and return the search results as a dictionary.

        read_website_with_jina(self, website_url: str) -> dict | None:
            Read a website using Jina AI and return the website content as a dictionary.
    """
    def __init__(self, search_base_url: str = None, reader_base_url: str = None) -> None:
        self.JINA_SEARCH_BASE_ENDPOINT = search_base_url or JINA_SEARCH_BASE_ENDPOINT
        self.JINA_READER_BASE_ENDPOINT = reader_base_url or JINA_READER_BASE_ENDPOINT

    @redis_cache(ttl=ONE_MINUTE_IN_SECONDS)
    def search_web_with_jina(self, search_query: str) -> dict | None:
        """
        Search the web using Jina AI.

        Args:
            search_query (str): The query to be searched on the web.

        Returns:
            dict | None: A dictionary containing the search results if successful, otherwise None.

        Raises:
            httpx.HTTPError: If an HTTP error occurs during the request.

        Usage:
            jina_ai_instance.search_web_with_jina(search_query)

        Example:
            jina_ai_instance.search_web_with_jina("example search query")
        """
        url = self.JINA_SEARCH_BASE_ENDPOINT
        encoded_search_query = urllib.parse.quote(search_query)
        headers = {"Accept": "application/json"}

        try:
            with httpx.Client(timeout=30.0) as client:
                response = client.get(f"{url}{encoded_search_query}", headers=headers)
                response.raise_for_status()
                return convert_to_snakecase(response.json())
        except httpx.HTTPError as e:
            print(f"An error occurred: {e}")
            return None
        
    @redis_cache(ttl=ONE_MINUTE_IN_SECONDS)
    def read_website_with_jina(self, website_url: str) -> dict | None:
        """
        Read a website using Jina AI.

        Args:
            website_url (str): The URL of the website to be read.

        Returns:
            dict | None: A dictionary containing the content of the website if successful, otherwise None.

        Raises:
            httpx.HTTPError: If an HTTP error occurs during the request.

        Usage:
            jina_ai_instance.read_website_with_jina(website_url)

        Example:
            jina_ai_instance.read_website_with_jina("https://example.com")
        """
        url = self.JINA_READER_BASE_ENDPOINT
        headers = {"Accept": "application/json"}

        try:
            with httpx.Client(timeout=30.0) as client:
                response = client.get(f"{url}{website_url}", headers=headers)
                response.raise_for_status()
                return convert_to_snakecase(response.json())
        except httpx.HTTPError as e:
            print(f"An error occurred: {e}")
            return None