SOCAR_Hackathon / .env.example
IsmatS's picture
init
fd93806
# Azure OpenAI Configuration
AZURE_OPENAI_API_KEY=your_azure_openai_api_key_here
AZURE_OPENAI_ENDPOINT=https://your-resource.services.ai.azure.com/
AZURE_OPENAI_API_VERSION=2024-08-01-preview
# Azure OpenAI Embedding Configuration (separate resource for embeddings)
# IMPORTANT: If using a different Azure resource for embeddings, set these variables
# Otherwise, the main AZURE_OPENAI credentials will be used
AZURE_EMBEDDING_API_KEY=your_embedding_api_key_here
AZURE_EMBEDDING_ENDPOINT=https://your-embedding-resource.cognitiveservices.azure.com/
AZURE_EMBEDDING_MODEL=text-embedding-3-small
AZURE_EMBEDDING_DIMS=1024
# Azure Document Intelligence (using same credentials as OpenAI for hackathon)
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT=https://your-resource.services.ai.azure.com/
AZURE_DOCUMENT_INTELLIGENCE_KEY=your_document_intelligence_key_here
# Azure AI Foundry Models
# Access to LLaMA and other models via Azure AI Foundry
# https://azure.microsoft.com/en-us/products/ai-foundry/models
# VM Configuration (Optional)
VM_HOST=your-vm-host.cloudapp.azure.com
VM_USER=hackathon
VM_SSH_KEY=your_ssh_key
# HuggingFace Resources
HUGGINGFACE_ORG=https://huggingface.co/SOCARAI
DATASET_NAME=SOCARAI/ai_track_data
# GitHub Code Samples
CODE_SAMPLES_REPO=https://github.com/neaorin/foundry-models-samples
# Application Configuration
DATA_DIR=./data
PDF_DIR=./data/pdfs
VECTOR_DB_PATH=./data/vector_db
PROCESSED_DIR=./data/processed
# LLM Model Configuration
# Available open-source models: DeepSeek-R1, Llama-4-Maverick-17B-128E-Instruct-FP8
# Using Llama-4-Maverick for optimal speed/quality balance and open-source architecture scores!
LLM_MODEL=Llama-4-Maverick-17B-128E-Instruct-FP8
# Pinecone Configuration (Cloud Vector Database)
PINECONE_API_KEY=your_pinecone_api_key_here
PINECONE_INDEX_NAME=hackathon
PINECONE_CLOUD=aws
PINECONE_REGION=us-east-1
VECTOR_DB_TYPE=pinecone
# API Configuration
API_HOST=0.0.0.0
API_PORT=8000
# OCR Configuration
OCR_MAX_PAGES=0 # 0 = unlimited pages (set to limit if needed)
# Disable telemetry and warnings
TOKENIZERS_PARALLELISM=false
ANONYMIZED_TELEMETRY=false