import os from dotenv import load_dotenv from scrapegraphai.graphs import SearchGraph from scrapegraphai.graphs import SmartScraperMultiGraph from scrapegraphai.graphs import ScriptCreatorGraph from src.databases.redis import REDIS_CACHED from src.libs.constants import ONE_HOUR_IN_SECONDS from src.libs.logger import logger load_dotenv() redis_cache = REDIS_CACHED default_graph_config: dict = { "openai": { "llm": { "api_key": os.getenv("OPENAI_API_KEY"), "model": "gpt-3.5-turbo", # "model": "gpt-4o",? "temperature": 0, } }, "groq": { "llm": { "model": "groq/llama3-70b-8192", "api_key": os.getenv("GROQ_API_KEY"), "temperature": 0 }, "embeddings": { "model": "ollama/nomic-embed-text", "base_url": "http://localhost:11434", } } } @redis_cache(ttl=ONE_HOUR_IN_SECONDS) @logger.instrument() def scrape_graph(prompt: str, sources: str | list[str], config: dict | str = None, cache_ttl: int = None) -> str: """ This function scrapes the web using a multi-graph approach. It takes a prompt, sources, optional configuration, and optional cache time-to-live. Args: prompt (str): The prompt or query to be used for scraping. sources (str | list[str]): The source(s) from which to scrape data. It can be a single string or a list of strings. config (dict | str, optional): The configuration for the scraping process. Defaults to the default "openai" configuration. cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour. Returns: str: The scraped data as a string. """ logger.info({'prompt': prompt, 'sources': sources, 'config': config, 'cache_ttl': cache_ttl}) config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"] logger.debug(f"Config: {config}") smart_scraper_graph = SmartScraperMultiGraph( prompt=prompt, source=sources, config=config ) result = smart_scraper_graph.run() logger.debug(f"Result: {result}") return result @redis_cache(ttl=ONE_HOUR_IN_SECONDS) @logger.instrument() def search_graph(prompt: str, config: dict | str = None, cache_ttl: int = None) -> str: """ This function uses a SearchGraph to search the web for relevant information based on the provided prompt. Args: prompt (str): The prompt or query to be used for searching. config (dict | str, optional): The configuration for the search process. Defaults to the default "openai" configuration. cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour. Returns: str: The search results as a string. """ logger.info({'prompt': prompt, 'config': config, 'cache_ttl': cache_ttl}) config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"] logger.debug(f"Config: {config}") search_graph = SearchGraph( prompt=prompt, config=config, ) result = search_graph.run() logger.debug(f"Result: {result}") return result @redis_cache(ttl=ONE_HOUR_IN_SECONDS) @logger.instrument() def _create_script_graph(prompt: str, source: str, library: str, config: dict | str = None, cache_ttl: int = None) -> str: """ This function creates a ScriptCreatorGraph for generating scripts based on the provided prompt, source, and library. Args: prompt (str): The prompt or query to be used for generating the script. source (str): The source from which to generate the script. library (str): The library to be used for generating the script. config (dict | str, optional): The configuration for the script creation process. Defaults to the default "openai" configuration. cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour. Returns: str: The generated script as a string. """ config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"] script_creator_graph = ScriptCreatorGraph( prompt=prompt, source=source, config=config, library=library ) result = script_creator_graph.run() return result