File size: 3,048 Bytes
52a0fe9
 
a2aa7c3
 
 
 
52a0fe9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2aa7c3
 
 
 
 
38365d2
 
 
 
 
 
 
a181751
 
 
 
38365d2
a181751
 
38365d2
 
a2aa7c3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import shutil
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# --- Paths ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
UPLOAD_DIR = os.path.join(BASE_DIR, "uploads")
STATIC_DIR = os.path.join(BASE_DIR, "static")

# Create uploads directory if it doesn't exist
os.makedirs(UPLOAD_DIR, exist_ok=True)

# --- File Upload Settings ---
MAX_FILE_SIZE_MB = 50
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
ALLOWED_EXTENSIONS = {
    "pdf": "application/pdf",
    "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    "png": "image/png",
    "jpg": "image/jpeg",
    "jpeg": "image/jpeg",
    "tiff": "image/tiff",
    "bmp": "image/bmp",
    "webp": "image/webp",
}

# --- OCR Configuration ---
# EasyOCR settings
EASYOCR_LANGS = ["en"]  # Languages to support
EASYOCR_GPU = False      # Set to True if NVIDIA GPU is available and CUDA is installed

# Keep Tesseract as fallback if needed, but prioritize EasyOCR for accuracy
def find_tesseract():
    """Auto-detect Tesseract installation path on Windows."""
    import shutil
    tesseract_in_path = shutil.which("tesseract")
    if tesseract_in_path:
        return tesseract_in_path

    common_paths = [
        r"C:\Program Files\Tesseract-OCR\tesseract.exe",
        r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe",
        r"C:\Users\{}\AppData\Local\Tesseract-OCR\tesseract.exe".format(os.getenv("USERNAME", "")),
    ]
    for path in common_paths:
        if os.path.isfile(path):
            return path
    return None

TESSERACT_CMD = find_tesseract()
TESSERACT_LANG = "eng"

def check_ocr_availability():
    """Check if any OCR engine is available."""
    try:
        import easyocr
        return "available"
    except ImportError:
        if TESSERACT_CMD:
            return "tesseract-only"
        return "not-found"

# --- Summarization Settings ---
SUMMARY_SENTENCE_COUNT = 5
SUMMARY_ALGORITHM = "lex-rank"  # Options: lex-rank, lsa, luhn, edmundson

# --- NER Settings ---
SPACY_MODEL = "en_core_web_sm"
NER_ENTITY_TYPES = ["PERSON", "ORG", "DATE", "MONEY", "GPE", "EVENT", "PRODUCT", "LAW", "NORP"]

# --- Sentiment Settings ---
SENTIMENT_THRESHOLDS = {
    "positive": 0.05,
    "negative": -0.05,
}

# --- Gemini AI Configuration ---
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")

# API access key for external clients
API_ACCESS_KEY = (
    os.getenv("API_ACCESS_KEY") or
    os.getenv("VALID_API_KEY") or
    os.getenv("API_KEY")
)

# Competition/deployment toggle:
# When false, endpoints are public and evaluators can call APIs without auth headers.
REQUIRE_API_KEY = os.getenv("REQUIRE_API_KEY", "false").strip().lower() == "true"

def is_api_key_valid(key: str) -> bool:
    if not REQUIRE_API_KEY:
        return True
    return bool(API_ACCESS_KEY and key and key.strip() == API_ACCESS_KEY)

# Flag to check if Gemini is configured
def is_gemini_available():
    return bool(GEMINI_API_KEY)