|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
from scrapegraphai.graphs import SearchGraph |
|
|
from scrapegraphai.graphs import SmartScraperMultiGraph |
|
|
from scrapegraphai.graphs import ScriptCreatorGraph |
|
|
|
|
|
from src.databases.redis import REDIS_CACHED |
|
|
from src.libs.constants import ONE_HOUR_IN_SECONDS |
|
|
from src.libs.logger import logger |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
redis_cache = REDIS_CACHED |
|
|
|
|
|
default_graph_config: dict = { |
|
|
"openai": { |
|
|
"llm": { |
|
|
"api_key": os.getenv("OPENAI_API_KEY"), |
|
|
"model": "gpt-3.5-turbo", |
|
|
|
|
|
"temperature": 0, |
|
|
} |
|
|
}, |
|
|
"groq": { |
|
|
"llm": { |
|
|
"model": "groq/llama3-70b-8192", |
|
|
"api_key": os.getenv("GROQ_API_KEY"), |
|
|
"temperature": 0 |
|
|
}, |
|
|
"embeddings": { |
|
|
"model": "ollama/nomic-embed-text", |
|
|
"base_url": "http://localhost:11434", |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
@redis_cache(ttl=ONE_HOUR_IN_SECONDS) |
|
|
@logger.instrument() |
|
|
def scrape_graph(prompt: str, sources: str | list[str], config: dict | str = None, cache_ttl: int = None) -> str: |
|
|
""" |
|
|
This function scrapes the web using a multi-graph approach. It takes a prompt, sources, optional configuration, and optional cache time-to-live. |
|
|
|
|
|
Args: |
|
|
prompt (str): The prompt or query to be used for scraping. |
|
|
sources (str | list[str]): The source(s) from which to scrape data. It can be a single string or a list of strings. |
|
|
config (dict | str, optional): The configuration for the scraping process. Defaults to the default "openai" configuration. |
|
|
cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour. |
|
|
|
|
|
Returns: |
|
|
str: The scraped data as a string. |
|
|
""" |
|
|
logger.info({'prompt': prompt, 'sources': sources, 'config': config, 'cache_ttl': cache_ttl}) |
|
|
|
|
|
config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"] |
|
|
logger.debug(f"Config: {config}") |
|
|
|
|
|
smart_scraper_graph = SmartScraperMultiGraph( |
|
|
prompt=prompt, |
|
|
source=sources, |
|
|
config=config |
|
|
) |
|
|
|
|
|
result = smart_scraper_graph.run() |
|
|
logger.debug(f"Result: {result}") |
|
|
|
|
|
return result |
|
|
|
|
|
@redis_cache(ttl=ONE_HOUR_IN_SECONDS) |
|
|
@logger.instrument() |
|
|
def search_graph(prompt: str, config: dict | str = None, cache_ttl: int = None) -> str: |
|
|
""" |
|
|
This function uses a SearchGraph to search the web for relevant information based on the provided prompt. |
|
|
|
|
|
Args: |
|
|
prompt (str): The prompt or query to be used for searching. |
|
|
config (dict | str, optional): The configuration for the search process. Defaults to the default "openai" configuration. |
|
|
cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour. |
|
|
|
|
|
Returns: |
|
|
str: The search results as a string. |
|
|
""" |
|
|
logger.info({'prompt': prompt, 'config': config, 'cache_ttl': cache_ttl}) |
|
|
|
|
|
config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"] |
|
|
logger.debug(f"Config: {config}") |
|
|
|
|
|
search_graph = SearchGraph( |
|
|
prompt=prompt, |
|
|
config=config, |
|
|
) |
|
|
|
|
|
result = search_graph.run() |
|
|
logger.debug(f"Result: {result}") |
|
|
|
|
|
return result |
|
|
|
|
|
@redis_cache(ttl=ONE_HOUR_IN_SECONDS) |
|
|
@logger.instrument() |
|
|
def _create_script_graph(prompt: str, source: str, library: str, config: dict | str = None, cache_ttl: int = None) -> str: |
|
|
""" |
|
|
This function creates a ScriptCreatorGraph for generating scripts based on the provided prompt, source, and library. |
|
|
|
|
|
Args: |
|
|
prompt (str): The prompt or query to be used for generating the script. |
|
|
source (str): The source from which to generate the script. |
|
|
library (str): The library to be used for generating the script. |
|
|
config (dict | str, optional): The configuration for the script creation process. Defaults to the default "openai" configuration. |
|
|
cache_ttl (int, optional): The time-to-live (in seconds) for the cached results. Defaults to one hour. |
|
|
|
|
|
Returns: |
|
|
str: The generated script as a string. |
|
|
""" |
|
|
config = type(config) == str and default_graph_config[config] or config or default_graph_config["openai"] |
|
|
|
|
|
script_creator_graph = ScriptCreatorGraph( |
|
|
prompt=prompt, |
|
|
source=source, |
|
|
config=config, |
|
|
library=library |
|
|
) |
|
|
|
|
|
result = script_creator_graph.run() |
|
|
|
|
|
return result |
|
|
|