Spaces:

Chatxbt
/

app

Sleeping

app

File size: 4,456 Bytes

9c400b9

import os
from dotenv import load_dotenv

from scrapegraphai.graphs import SearchGraph
from scrapegraphai.graphs import SmartScraperMultiGraph
from scrapegraphai.graphs import ScriptCreatorGraph

from src.databases.redis import REDIS_CACHED
from src.libs.constants import ONE_HOUR_IN_SECONDS
from src.libs.logger import logger


load_dotenv()

redis_cache = REDIS_CACHED

default_graph_config: dict = {
    "openai": {
        "llm": {
            "api_key": os.getenv("OPENAI_API_KEY"),
            "model": "gpt-3.5-turbo",
            # "model": "gpt-4o",?
            "temperature": 0,
        }
    },
    "groq": {
        "llm": {
            "model": "groq/llama3-70b-8192",
            "api_key": os.getenv("GROQ_API_KEY"),
            "temperature": 0
        },
        "embeddings": {
            "model": "ollama/nomic-embed-text",
            "base_url": "http://localhost:11434",
        }
    }
}

@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
@logger.instrument()
def scrape_graph(prompt: str, sources: str | list[str], config: dict | str = None, cache_ttl: int = None) -> str:
    """
    This function scrapes the web using a multi-graph approach. It takes a prompt, sources, optional configuration, and optional cache time-to-live.

    Args:
        prompt (str): The prompt or query to be used for scraping.
        sources (str | list[str]): The source(s) from which to scrape data. It can be a single string or a list of strings.
        config (dict | str, optional): The configuration for the scraping process. Defaults to the default "openai" configuration.
        cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

    Returns:
        str: The scraped data as a string.
    """
    logger.info({'prompt': prompt, 'sources': sources, 'config': config, 'cache_ttl': cache_ttl})

    config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]
    logger.debug(f"Config: {config}")

    smart_scraper_graph = SmartScraperMultiGraph(
        prompt=prompt,
        source=sources,
        config=config
    )

    result = smart_scraper_graph.run()
    logger.debug(f"Result: {result}")

    return result

@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
@logger.instrument()
def search_graph(prompt: str, config: dict | str = None, cache_ttl: int = None) -> str:
    """
    This function uses a SearchGraph to search the web for relevant information based on the provided prompt.

    Args:
        prompt (str): The prompt or query to be used for searching.
        config (dict | str, optional): The configuration for the search process. Defaults to the default "openai" configuration.
        cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

    Returns:
        str: The search results as a string.
    """
    logger.info({'prompt': prompt, 'config': config, 'cache_ttl': cache_ttl})

    config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]
    logger.debug(f"Config: {config}")

    search_graph = SearchGraph(
        prompt=prompt,
        config=config,
    )

    result = search_graph.run()
    logger.debug(f"Result: {result}")

    return result

@redis_cache(ttl=ONE_HOUR_IN_SECONDS)
@logger.instrument()
def _create_script_graph(prompt: str, source: str, library: str, config: dict | str = None, cache_ttl: int = None) -> str:
    """
    This function creates a ScriptCreatorGraph for generating scripts based on the provided prompt, source, and library.

    Args:
        prompt (str): The prompt or query to be used for generating the script.
        source (str): The source from which to generate the script.
        library (str): The library to be used for generating the script.
        config (dict | str, optional): The configuration for the script creation process. Defaults to the default "openai" configuration.
        cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour.

    Returns:
        str: The generated script as a string.
    """
    config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"]

    script_creator_graph = ScriptCreatorGraph(
        prompt=prompt,
        source=source,
        config=config,
        library=library
    )

    result = script_creator_graph.run()

    return result