File size: 1,641 Bytes
3736c33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Project paths
PROJECT_ROOT = Path(__file__).parent
DATA_DIR = PROJECT_ROOT.parent / "data"  # Use project root's data folder, not backend/data
MODELS_DIR = PROJECT_ROOT / "models"
UPLOADS_DIR = DATA_DIR / "uploads"
VECTOR_DB_DIR = DATA_DIR / "vector_db"
CHATS_DIR = DATA_DIR / "chats"

# Create directories if they don't exist
for dir_path in [DATA_DIR, MODELS_DIR, UPLOADS_DIR, VECTOR_DB_DIR, CHATS_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)

# Model configuration
# RAG uses pre-trained models directly - no training required!
MODEL_NAME = os.getenv("MODEL_NAME", "microsoft/phi-2")  # Pre-trained model
USE_PRETRAINED = os.getenv("USE_PRETRAINED", "true").lower() == "true"  # Use pre-trained by default
MODEL_PATH = os.getenv("MODEL_PATH", str(MODELS_DIR / "trained_model"))  # Only if fine-tuned
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # For document embeddings

# API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY", "")

# Application settings
MAX_UPLOAD_SIZE = int(os.getenv("MAX_UPLOAD_SIZE", "200"))  # MB
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "2048"))
CHUNK_SIZE = 512
CHUNK_OVERLAP = 50

# Use cases
USE_CASES = {
    "explanation": "Provide detailed explanation of concepts",
    "summary": "Generate concise summary of content",
    "qa": "Answer questions based on content",
    "notes": "Create structured study notes"
}