Spaces:
Runtime error
Runtime error
| title: Configuration Template | |
| navtitle: Configuration Template | |
| layout: page | |
| tags: [post] | |
| date: 2024-04-04 | |
| The following template can be used and stored as a `.env` in the the directory where you're are pointing | |
| the `--root` parameter on your Indexing Pipeline execution. | |
| For details about how to run the Indexing Pipeline, refer to the [Index CLI](../../index/2-cli) documentation. | |
| ## .env File Template | |
| Required variables are uncommented. All the optional configuration can be turned on or off as needed. | |
| ### Minimal Configuration | |
| ```bash | |
| # Base LLM Settings | |
| GRAPHRAG_API_KEY="your_api_key" | |
| GRAPHRAG_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users | |
| GRAPHRAG_API_VERSION="api_version" # For Azure OpenAI Users | |
| # Text Generation Settings | |
| GRAPHRAG_LLM_TYPE="azure_openai_chat" # or openai_chat | |
| GRAPHRAG_LLM_DEPLOYMENT_NAME="gpt-4-turbo-preview" | |
| GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True | |
| # Text Embedding Settings | |
| GRAPHRAG_EMBEDDING_TYPE="azure_openai_embedding" # or openai_embedding | |
| GRAPHRAG_LLM_DEPLOYMENT_NAME="text-embedding-3-small" | |
| # Data Mapping Settings | |
| GRAPHRAG_INPUT_TYPE="text" | |
| ``` | |
| ### Full Configuration | |
| ```bash | |
| # Required LLM Config | |
| # Input Data Configuration | |
| GRAPHRAG_INPUT_TYPE="file" | |
| # Plaintext Input Data Configuration | |
| # GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt | |
| # Text Input Data Configuration | |
| GRAPHRAG_INPUT_FILE_TYPE="text" | |
| GRAPHRAG_INPUT_FILE_PATTERN=".*\.txt$" | |
| GRAPHRAG_INPUT_SOURCE_COLUMN=source | |
| # GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None | |
| # GRAPHRAG_INPUT_TIMESTAMP_FORMAT=None | |
| # GRAPHRAG_INPUT_TEXT_COLUMN="text" | |
| # GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id | |
| # GRAPHRAG_INPUT_TITLE_COLUMN="title" | |
| # GRAPHRAG_INPUT_TYPE="file" | |
| # GRAPHRAG_INPUT_CONNECTION_STRING=None | |
| # GRAPHRAG_INPUT_CONTAINER_NAME=None | |
| # GRAPHRAG_INPUT_BASE_DIR=None | |
| # Base LLM Settings | |
| GRAPHRAG_API_KEY="your_api_key" | |
| GRAPHRAG_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users | |
| GRAPHRAG_API_VERSION="api_version" # For Azure OpenAI Users | |
| # GRAPHRAG_API_ORGANIZATION=None | |
| # GRAPHRAG_API_PROXY=None | |
| # Text Generation Settings | |
| # GRAPHRAG_LLM_TYPE=openai_chat | |
| GRAPHRAG_LLM_API_KEY="your_api_key" # If GRAPHRAG_API_KEY is not set | |
| GRAPHRAG_LLM_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set | |
| GRAPHRAG_LLM_API_VERSION="api_version" # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set | |
| GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True # Suggested by default | |
| # GRAPHRAG_LLM_API_ORGANIZATION=None | |
| # GRAPHRAG_LLM_API_PROXY=None | |
| # GRAPHRAG_LLM_DEPLOYMENT_NAME=None | |
| # GRAPHRAG_LLM_MODEL=gpt-4-turbo-preview | |
| # GRAPHRAG_LLM_MAX_TOKENS=4000 | |
| # GRAPHRAG_LLM_REQUEST_TIMEOUT=180 | |
| # GRAPHRAG_LLM_THREAD_COUNT=50 | |
| # GRAPHRAG_LLM_THREAD_STAGGER=0.3 | |
| # GRAPHRAG_LLM_CONCURRENT_REQUESTS=25 | |
| # GRAPHRAG_LLM_TPM=0 | |
| # GRAPHRAG_LLM_RPM=0 | |
| # GRAPHRAG_LLM_MAX_RETRIES=10 | |
| # GRAPHRAG_LLM_MAX_RETRY_WAIT=10 | |
| # GRAPHRAG_LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True | |
| # Text Embedding Settings | |
| # GRAPHRAG_EMBEDDING_TYPE=openai_embedding | |
| GRAPHRAG_EMBEDDING_API_KEY="your_api_key" # If GRAPHRAG_API_KEY is not set | |
| GRAPHRAG_EMBEDDING_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set | |
| GRAPHRAG_EMBEDDING_API_VERSION="api_version" # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set | |
| # GRAPHRAG_EMBEDDING_API_ORGANIZATION=None | |
| # GRAPHRAG_EMBEDDING_API_PROXY=None | |
| # GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME=None | |
| # GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-small | |
| # GRAPHRAG_EMBEDDING_BATCH_SIZE=16 | |
| # GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS=8191 | |
| # GRAPHRAG_EMBEDDING_TARGET=required | |
| # GRAPHRAG_EMBEDDING_SKIP=None | |
| # GRAPHRAG_EMBEDDING_THREAD_COUNT=None | |
| # GRAPHRAG_EMBEDDING_THREAD_STAGGER=50 | |
| # GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS=25 | |
| # GRAPHRAG_EMBEDDING_TPM=0 | |
| # GRAPHRAG_EMBEDDING_RPM=0 | |
| # GRAPHRAG_EMBEDDING_MAX_RETRIES=10 | |
| # GRAPHRAG_EMBEDDING_MAX_RETRY_WAIT=10 | |
| # GRAPHRAG_EMBEDDING_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True | |
| # Data Mapping Settings | |
| # GRAPHRAG_INPUT_ENCODING=utf-8 | |
| # Data Chunking | |
| # GRAPHRAG_CHUNK_SIZE=1200 | |
| # GRAPHRAG_CHUNK_OVERLAP=100 | |
| # GRAPHRAG_CHUNK_BY_COLUMNS=id | |
| # Prompting Overrides | |
| # GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE=None | |
| # GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=1 | |
| # GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES=organization,person,event,geo | |
| # GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE=None | |
| # GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH=500 | |
| # GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION="Any claims or facts that could be relevant to threat analysis." | |
| # GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE=None | |
| # GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=1 | |
| # GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE=None | |
| # GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH=1500 | |
| # Storage | |
| # GRAPHRAG_STORAGE_TYPE=file | |
| # GRAPHRAG_STORAGE_CONNECTION_STRING=None | |
| # GRAPHRAG_STORAGE_CONTAINER_NAME=None | |
| # GRAPHRAG_STORAGE_BASE_DIR=None | |
| # Cache | |
| # GRAPHRAG_CACHE_TYPE=file | |
| # GRAPHRAG_CACHE_CONNECTION_STRING=None | |
| # GRAPHRAG_CACHE_CONTAINER_NAME=None | |
| # GRAPHRAG_CACHE_BASE_DIR=None | |
| # Reporting | |
| # GRAPHRAG_REPORTING_TYPE=file | |
| # GRAPHRAG_REPORTING_CONNECTION_STRING=None | |
| # GRAPHRAG_REPORTING_CONTAINER_NAME=None | |
| # GRAPHRAG_REPORTING_BASE_DIR=None | |
| # Node2Vec Parameters | |
| # GRAPHRAG_NODE2VEC_ENABLED=False | |
| # GRAPHRAG_NODE2VEC_NUM_WALKS=10 | |
| # GRAPHRAG_NODE2VEC_WALK_LENGTH=40 | |
| # GRAPHRAG_NODE2VEC_WINDOW_SIZE=2 | |
| # GRAPHRAG_NODE2VEC_ITERATIONS=3 | |
| # GRAPHRAG_NODE2VEC_RANDOM_SEED=597832 | |
| # Data Snapshotting | |
| # GRAPHRAG_SNAPSHOT_GRAPHML=False | |
| # GRAPHRAG_SNAPSHOT_RAW_ENTITIES=False | |
| # GRAPHRAG_SNAPSHOT_TOP_LEVEL_NODES=False | |
| # Miscellaneous Settings | |
| # GRAPHRAG_ASYNC_MODE=asyncio | |
| # GRAPHRAG_ENCODING_MODEL=cl100k_base | |
| # GRAPHRAG_MAX_CLUSTER_SIZE=10 | |
| # GRAPHRAG_ENTITY_RESOLUTION_ENABLED=False | |
| # GRAPHRAG_SKIP_WORKFLOWS=None | |
| # GRAPHRAG_UMAP_ENABLED=False | |
| ``` | |