| | import argparse |
| | import logging |
| |
|
| | from autogpt.commands.file_operations import ingest_file, search_files |
| | from autogpt.config import Config |
| | from autogpt.memory import get_memory |
| |
|
| | cfg = Config() |
| |
|
| |
|
| | def configure_logging(): |
| | logging.basicConfig( |
| | filename="log-ingestion.txt", |
| | filemode="a", |
| | format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", |
| | datefmt="%H:%M:%S", |
| | level=logging.DEBUG, |
| | ) |
| | return logging.getLogger("AutoGPT-Ingestion") |
| |
|
| |
|
| | def ingest_directory(directory, memory, args): |
| | """ |
| | Ingest all files in a directory by calling the ingest_file function for each file. |
| | |
| | :param directory: The directory containing the files to ingest |
| | :param memory: An object with an add() method to store the chunks in memory |
| | """ |
| | try: |
| | files = search_files(directory) |
| | for file in files: |
| | ingest_file(file, memory, args.max_length, args.overlap) |
| | except Exception as e: |
| | print(f"Error while ingesting directory '{directory}': {str(e)}") |
| |
|
| |
|
| | def main() -> None: |
| | logger = configure_logging() |
| |
|
| | parser = argparse.ArgumentParser( |
| | description="Ingest a file or a directory with multiple files into memory. " |
| | "Make sure to set your .env before running this script." |
| | ) |
| | group = parser.add_mutually_exclusive_group(required=True) |
| | group.add_argument("--file", type=str, help="The file to ingest.") |
| | group.add_argument( |
| | "--dir", type=str, help="The directory containing the files to ingest." |
| | ) |
| | parser.add_argument( |
| | "--init", |
| | action="store_true", |
| | help="Init the memory and wipe its content (default: False)", |
| | default=False, |
| | ) |
| | parser.add_argument( |
| | "--overlap", |
| | type=int, |
| | help="The overlap size between chunks when ingesting files (default: 200)", |
| | default=200, |
| | ) |
| | parser.add_argument( |
| | "--max_length", |
| | type=int, |
| | help="The max_length of each chunk when ingesting files (default: 4000)", |
| | default=4000, |
| | ) |
| |
|
| | args = parser.parse_args() |
| |
|
| | |
| | memory = get_memory(cfg, init=args.init) |
| | print("Using memory of type: " + memory.__class__.__name__) |
| |
|
| | if args.file: |
| | try: |
| | ingest_file(args.file, memory, args.max_length, args.overlap) |
| | print(f"File '{args.file}' ingested successfully.") |
| | except Exception as e: |
| | logger.error(f"Error while ingesting file '{args.file}': {str(e)}") |
| | print(f"Error while ingesting file '{args.file}': {str(e)}") |
| | elif args.dir: |
| | try: |
| | ingest_directory(args.dir, memory, args) |
| | print(f"Directory '{args.dir}' ingested successfully.") |
| | except Exception as e: |
| | logger.error(f"Error while ingesting directory '{args.dir}': {str(e)}") |
| | print(f"Error while ingesting directory '{args.dir}': {str(e)}") |
| | else: |
| | print( |
| | "Please provide either a file path (--file) or a directory name (--dir)" |
| | " inside the auto_gpt_workspace directory as input." |
| | ) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|