import os import shutil from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # --- Paths --- BASE_DIR = os.path.dirname(os.path.abspath(__file__)) UPLOAD_DIR = os.path.join(BASE_DIR, "uploads") STATIC_DIR = os.path.join(BASE_DIR, "static") # Create uploads directory if it doesn't exist os.makedirs(UPLOAD_DIR, exist_ok=True) # --- File Upload Settings --- MAX_FILE_SIZE_MB = 50 MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 ALLOWED_EXTENSIONS = { "pdf": "application/pdf", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg", "tiff": "image/tiff", "bmp": "image/bmp", "webp": "image/webp", } # --- OCR Configuration --- # EasyOCR settings EASYOCR_LANGS = ["en"] # Languages to support EASYOCR_GPU = False # Set to True if NVIDIA GPU is available and CUDA is installed # Keep Tesseract as fallback if needed, but prioritize EasyOCR for accuracy def find_tesseract(): """Auto-detect Tesseract installation path on Windows.""" import shutil tesseract_in_path = shutil.which("tesseract") if tesseract_in_path: return tesseract_in_path common_paths = [ r"C:\Program Files\Tesseract-OCR\tesseract.exe", r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe", r"C:\Users\{}\AppData\Local\Tesseract-OCR\tesseract.exe".format(os.getenv("USERNAME", "")), ] for path in common_paths: if os.path.isfile(path): return path return None TESSERACT_CMD = find_tesseract() TESSERACT_LANG = "eng" def check_ocr_availability(): """Check if any OCR engine is available.""" try: import easyocr return "available" except ImportError: if TESSERACT_CMD: return "tesseract-only" return "not-found" # --- Summarization Settings --- SUMMARY_SENTENCE_COUNT = 5 SUMMARY_ALGORITHM = "lex-rank" # Options: lex-rank, lsa, luhn, edmundson # --- NER Settings --- SPACY_MODEL = "en_core_web_sm" NER_ENTITY_TYPES = ["PERSON", "ORG", "DATE", "MONEY", "GPE", "EVENT", "PRODUCT", "LAW", "NORP"] # --- Sentiment Settings --- SENTIMENT_THRESHOLDS = { "positive": 0.05, "negative": -0.05, } # --- Gemini AI Configuration --- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") # API access key for external clients API_ACCESS_KEY = ( os.getenv("API_ACCESS_KEY") or os.getenv("VALID_API_KEY") or os.getenv("API_KEY") ) # Competition/deployment toggle: # When false, endpoints are public and evaluators can call APIs without auth headers. REQUIRE_API_KEY = os.getenv("REQUIRE_API_KEY", "false").strip().lower() == "true" def is_api_key_valid(key: str) -> bool: if not REQUIRE_API_KEY: return True return bool(API_ACCESS_KEY and key and key.strip() == API_ACCESS_KEY) # Flag to check if Gemini is configured def is_gemini_available(): return bool(GEMINI_API_KEY)