File size: 4,456 Bytes
9c400b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
from dotenv import load_dotenv

from scrapegraphai.graphs import SearchGraph
from scrapegraphai.graphs import SmartScraperMultiGraph
from scrapegraphai.graphs import ScriptCreatorGraph

from src.databases.redis import REDIS_CACHED
from src.libs.constants import ONE_HOUR_IN_SECONDS
from src.libs.logger import logger


load_dotenv()

redis_cache = REDIS_CACHED

default_graph_config: dict = {
    "openai": {
        "llm": {
            "api_key": os.getenv("OPENAI_API_KEY"),
            "model": "gpt-3.5-turbo",
            # "model": "gpt-4o",?
            "temperature": 0,
        }
    },
    "groq": {
        "llm": {
            "model": "groq/llama3-70b-8192",
            "api_key": os.getenv("GROQ_API_KEY"),
            "temperature": 0
        },
        "embeddings": {
            "model": "ollama/nomic-embed-text",
            "base_url": "http://localhost:11434",
        }
    }
}

@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
@logger.instrument()
def scrape_graph(prompt: str, sources: str | list[str], config: dict | str = None, cache_ttl: int = None) -> str:
    """
    This function scrapes the web using a multi-graph approach. It takes a prompt, sources, optional configuration, and optional cache time-to-live.

    Args:
        prompt (str): The prompt or query to be used for scraping.
        sources (str | list[str]): The source(s) from which to scrape data. It can be a single string or a list of strings.
        config (dict | str, optional): The configuration for the scraping process. Defaults to the default "openai" configuration.
        cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

    Returns:
        str: The scraped data as a string.
    """
    logger.info({'prompt': prompt, 'sources': sources, 'config': config, 'cache_ttl': cache_ttl})

    config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]
    logger.debug(f"Config: {config}")

    smart_scraper_graph = SmartScraperMultiGraph(
        prompt=prompt,
        source=sources,
        config=config
    )

    result = smart_scraper_graph.run()
    logger.debug(f"Result: {result}")

    return result

@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
@logger.instrument()
def search_graph(prompt: str, config: dict | str = None, cache_ttl: int = None) -> str:
    """
    This function uses a SearchGraph to search the web for relevant information based on the provided prompt.

    Args:
        prompt (str): The prompt or query to be used for searching.
        config (dict | str, optional): The configuration for the search process. Defaults to the default "openai" configuration.
        cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

    Returns:
        str: The search results as a string.
    """
    logger.info({'prompt': prompt, 'config': config, 'cache_ttl': cache_ttl})

    config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]
    logger.debug(f"Config: {config}")

    search_graph = SearchGraph(
        prompt=prompt,
        config=config,
    )

    result = search_graph.run()
    logger.debug(f"Result: {result}")

    return result

@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
@logger.instrument()
def _create_script_graph(prompt: str, source: str, library: str, config: dict | str = None, cache_ttl: int = None) -> str:
    """
    This function creates a ScriptCreatorGraph for generating scripts based on the provided prompt, source, and library.

    Args:
        prompt (str): The prompt or query to be used for generating the script.
        source (str): The source from which to generate the script.
        library (str): The library to be used for generating the script.
        config (dict | str, optional): The configuration for the script creation process. Defaults to the default "openai" configuration.
        cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

    Returns:
        str: The generated script as a string.
    """
    config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]

    script_creator_graph = ScriptCreatorGraph(
        prompt=prompt,
        source=source,
        config=config,
        library=library
    )

    result = script_creator_graph.run()

    return result