text-extraction-api / config.py
krishnachoudhary-hclguvi
Add toggleable API auth for hackathon bot evaluation
a181751 unverified
import os
import shutil
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
# --- Paths ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
UPLOAD_DIR = os.path.join(BASE_DIR, "uploads")
STATIC_DIR = os.path.join(BASE_DIR, "static")
# Create uploads directory if it doesn't exist
os.makedirs(UPLOAD_DIR, exist_ok=True)
# --- File Upload Settings ---
MAX_FILE_SIZE_MB = 50
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
ALLOWED_EXTENSIONS = {
"pdf": "application/pdf",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"tiff": "image/tiff",
"bmp": "image/bmp",
"webp": "image/webp",
}
# --- OCR Configuration ---
# EasyOCR settings
EASYOCR_LANGS = ["en"] # Languages to support
EASYOCR_GPU = False # Set to True if NVIDIA GPU is available and CUDA is installed
# Keep Tesseract as fallback if needed, but prioritize EasyOCR for accuracy
def find_tesseract():
"""Auto-detect Tesseract installation path on Windows."""
import shutil
tesseract_in_path = shutil.which("tesseract")
if tesseract_in_path:
return tesseract_in_path
common_paths = [
r"C:\Program Files\Tesseract-OCR\tesseract.exe",
r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
r"C:\Users\{}\AppData\Local\Tesseract-OCR\tesseract.exe".format(os.getenv("USERNAME", "")),
]
for path in common_paths:
if os.path.isfile(path):
return path
return None
TESSERACT_CMD = find_tesseract()
TESSERACT_LANG = "eng"
def check_ocr_availability():
"""Check if any OCR engine is available."""
try:
import easyocr
return "available"
except ImportError:
if TESSERACT_CMD:
return "tesseract-only"
return "not-found"
# --- Summarization Settings ---
SUMMARY_SENTENCE_COUNT = 5
SUMMARY_ALGORITHM = "lex-rank" # Options: lex-rank, lsa, luhn, edmundson
# --- NER Settings ---
SPACY_MODEL = "en_core_web_sm"
NER_ENTITY_TYPES = ["PERSON", "ORG", "DATE", "MONEY", "GPE", "EVENT", "PRODUCT", "LAW", "NORP"]
# --- Sentiment Settings ---
SENTIMENT_THRESHOLDS = {
"positive": 0.05,
"negative": -0.05,
}
# --- Gemini AI Configuration ---
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
# API access key for external clients
API_ACCESS_KEY = (
os.getenv("API_ACCESS_KEY") or
os.getenv("VALID_API_KEY") or
os.getenv("API_KEY")
)
# Competition/deployment toggle:
# When false, endpoints are public and evaluators can call APIs without auth headers.
REQUIRE_API_KEY = os.getenv("REQUIRE_API_KEY", "false").strip().lower() == "true"
def is_api_key_valid(key: str) -> bool:
if not REQUIRE_API_KEY:
return True
return bool(API_ACCESS_KEY and key and key.strip() == API_ACCESS_KEY)
# Flag to check if Gemini is configured
def is_gemini_available():
return bool(GEMINI_API_KEY)