Spaces:
Sleeping
Sleeping
ayushmodi001 commited on
Commit ·
caff036
1
Parent(s): 65509a0
final fixes
Browse files- Dockerfile +7 -2
- app/__pycache__/__init__.cpython-312.pyc +0 -0
- app/__pycache__/api.cpython-312.pyc +0 -0
- app/__pycache__/config.cpython-312.pyc +0 -0
- app/__pycache__/context_generator.cpython-312.pyc +0 -0
- app/__pycache__/mcq_pipeline.cpython-312.pyc +0 -0
- app/__pycache__/router.cpython-312.pyc +0 -0
- app/__pycache__/utils.cpython-312.pyc +0 -0
- app/api.py +25 -17
- app/config.py +17 -15
- app/question_generator.py +1 -4
- scripts/OPTIMIZATION.md +22 -1
- scripts/optimize_model.py +35 -8
- scripts/setup_optimization.py +26 -0
- space.json +0 -0
Dockerfile
CHANGED
|
@@ -4,6 +4,11 @@ FROM python:3.11-slim
|
|
| 4 |
# Set the working directory in the container
|
| 5 |
WORKDIR /app
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
# Create a virtual environment
|
| 8 |
RUN python -m venv /opt/venv
|
| 9 |
|
|
@@ -17,9 +22,9 @@ COPY requirements.txt .
|
|
| 17 |
RUN pip install --upgrade -r requirements.txt
|
| 18 |
|
| 19 |
# Create cache directories for models
|
| 20 |
-
RUN mkdir -p ./cache/transformers/optimized_model
|
| 21 |
-
RUN mkdir -p ./cache/transformers/optimized_tokenizer
|
| 22 |
# Create a temp cache directory that will be writable by appuser
|
|
|
|
|
|
|
| 23 |
RUN mkdir -p /tmp/opportunity_t5_model_cache/contexts
|
| 24 |
RUN chmod 777 /tmp/opportunity_t5_model_cache -R
|
| 25 |
|
|
|
|
| 4 |
# Set the working directory in the container
|
| 5 |
WORKDIR /app
|
| 6 |
|
| 7 |
+
# Set environment variables for optimization
|
| 8 |
+
ENV CACHE_BASE_DIR="/tmp/opportunity_t5_model_cache"
|
| 9 |
+
ENV ENABLE_LAZY_LOADING="True"
|
| 10 |
+
ENV USE_ONNX_RUNTIME="True"
|
| 11 |
+
|
| 12 |
# Create a virtual environment
|
| 13 |
RUN python -m venv /opt/venv
|
| 14 |
|
|
|
|
| 22 |
RUN pip install --upgrade -r requirements.txt
|
| 23 |
|
| 24 |
# Create cache directories for models
|
|
|
|
|
|
|
| 25 |
# Create a temp cache directory that will be writable by appuser
|
| 26 |
+
RUN mkdir -p /tmp/opportunity_t5_model_cache/transformers/optimized_model
|
| 27 |
+
RUN mkdir -p /tmp/opportunity_t5_model_cache/transformers/optimized_tokenizer
|
| 28 |
RUN mkdir -p /tmp/opportunity_t5_model_cache/contexts
|
| 29 |
RUN chmod 777 /tmp/opportunity_t5_model_cache -R
|
| 30 |
|
app/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (155 Bytes). View file
|
|
|
app/__pycache__/api.cpython-312.pyc
ADDED
|
Binary file (7.54 kB). View file
|
|
|
app/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (1.93 kB). View file
|
|
|
app/__pycache__/context_generator.cpython-312.pyc
ADDED
|
Binary file (17.9 kB). View file
|
|
|
app/__pycache__/mcq_pipeline.cpython-312.pyc
ADDED
|
Binary file (8.96 kB). View file
|
|
|
app/__pycache__/router.cpython-312.pyc
ADDED
|
Binary file (7.71 kB). View file
|
|
|
app/__pycache__/utils.cpython-312.pyc
ADDED
|
Binary file (3.46 kB). View file
|
|
|
app/api.py
CHANGED
|
@@ -7,34 +7,42 @@ from .utils import get_logger
|
|
| 7 |
from .question_generator import load_t5_pipeline
|
| 8 |
import asyncio # Add asyncio import for request queue
|
| 9 |
import time # Add time import for timing requests
|
| 10 |
-
from .config import WARMUP_SAMPLE_SIZE
|
| 11 |
|
| 12 |
logger = get_logger(__name__)
|
| 13 |
|
| 14 |
@asynccontextmanager
|
| 15 |
-
async def lifespan(app: FastAPI):
|
|
|
|
| 16 |
with ProcessPoolExecutor(max_workers=2) as executor: # Use both cores
|
| 17 |
app.state.process_executor = executor
|
| 18 |
# Process one request at a time but fully utilize both cores
|
| 19 |
app.state.request_semaphore = asyncio.Semaphore(1) # Process one request at a time for T5
|
| 20 |
app.state.request_queue = asyncio.Queue() # Create a request queue
|
| 21 |
logger.info("Application startup: ProcessPoolExecutor created with 2 workers.")
|
| 22 |
-
# Load models in the main process during startup
|
| 23 |
-
logger.info("Application startup: Loading models in main process...")
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# Start the request processor
|
| 40 |
app.state.processor_task = asyncio.create_task(process_request_queue(app))
|
|
|
|
| 7 |
from .question_generator import load_t5_pipeline
|
| 8 |
import asyncio # Add asyncio import for request queue
|
| 9 |
import time # Add time import for timing requests
|
| 10 |
+
from .config import WARMUP_SAMPLE_SIZE, ENABLE_LAZY_LOADING
|
| 11 |
|
| 12 |
logger = get_logger(__name__)
|
| 13 |
|
| 14 |
@asynccontextmanager
|
| 15 |
+
async def lifespan(app: FastAPI):
|
| 16 |
+
# --- Startup ---
|
| 17 |
with ProcessPoolExecutor(max_workers=2) as executor: # Use both cores
|
| 18 |
app.state.process_executor = executor
|
| 19 |
# Process one request at a time but fully utilize both cores
|
| 20 |
app.state.request_semaphore = asyncio.Semaphore(1) # Process one request at a time for T5
|
| 21 |
app.state.request_queue = asyncio.Queue() # Create a request queue
|
| 22 |
logger.info("Application startup: ProcessPoolExecutor created with 2 workers.")
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
# Load models in the main process during startup
|
| 25 |
+
logger.info("Application startup: Loading model with optimized startup...")
|
| 26 |
+
|
| 27 |
+
# With lazy loading, this will only load the tokenizer at startup
|
| 28 |
+
load_t5_pipeline()
|
| 29 |
+
logger.info("Application startup: Initial model loading complete.")
|
| 30 |
+
|
| 31 |
+
# For lazy loading, skip the warm-up at startup to reduce startup time
|
| 32 |
+
# The warm-up will happen naturally on the first request
|
| 33 |
+
if not ENABLE_LAZY_LOADING and WARMUP_SAMPLE_SIZE > 0:
|
| 34 |
+
logger.info("Warming up model with sample inference...")
|
| 35 |
+
from .question_generator import generate_questions
|
| 36 |
+
# Create a simplified warm-up with minimal examples
|
| 37 |
+
warm_up_candidates = [
|
| 38 |
+
{"answer": "compiler", "context": "A compiler translates code.", "context_id": "warm_up"}
|
| 39 |
+
]
|
| 40 |
+
# Run warm-up in the executor to ensure it's done in the same way as actual requests
|
| 41 |
+
loop = asyncio.get_event_loop()
|
| 42 |
+
await loop.run_in_executor(executor, generate_questions, warm_up_candidates)
|
| 43 |
+
logger.info("Model warm-up completed successfully.")
|
| 44 |
+
else:
|
| 45 |
+
logger.info("Model warm-up skipped with lazy loading enabled.")
|
| 46 |
|
| 47 |
# Start the request processor
|
| 48 |
app.state.processor_task = asyncio.create_task(process_request_queue(app))
|
app/config.py
CHANGED
|
@@ -23,10 +23,17 @@ T5_MODEL_NAME = "Ayush472/Technical_mcq_model"
|
|
| 23 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 24 |
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "gemini-2.5-flash")
|
| 25 |
|
| 26 |
-
# -
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
QUESTION_GENERATION_PARAMS = {
|
| 32 |
"num_beams": 6, # Reduced for faster processing
|
|
@@ -59,17 +66,12 @@ CONTEXT_CACHE_EXPIRY = 60 * 60 * 24 * 7 # 7 days in seconds
|
|
| 59 |
# In production, this will use a temp directory that the container user has access to
|
| 60 |
CACHE_BASE_DIR = os.getenv("CACHE_BASE_DIR", settings.cache_base_dir)
|
| 61 |
|
| 62 |
-
#
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
# Enable lazy loading to reduce startup time - only loads tokenizer at startup, loads model on first use
|
| 69 |
-
ENABLE_LAZY_LOADING = os.getenv("ENABLE_LAZY_LOADING", "True").lower() in ("true", "1", "t")
|
| 70 |
-
|
| 71 |
-
# Use ONNX Runtime for faster inference when available
|
| 72 |
-
USE_ONNX_RUNTIME = os.getenv("USE_ONNX_RUNTIME", "True").lower() in ("true", "1", "t")
|
| 73 |
|
| 74 |
# --- Distractor Generator Configuration ---
|
| 75 |
# Number of QA pairs per batch for distractor generation.
|
|
|
|
| 23 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 24 |
GEMINI_MODEL_NAME = os.getenv("GEMINI_MODEL_NAME", "gemini-2.5-flash")
|
| 25 |
|
| 26 |
+
# Model warm-up sample size (smaller is faster for startup)
|
| 27 |
+
WARMUP_SAMPLE_SIZE = 1
|
| 28 |
+
|
| 29 |
+
# Enable parallel processing within a request
|
| 30 |
+
ENABLE_PARALLEL_PROCESSING = True
|
| 31 |
+
|
| 32 |
+
# Enable lazy loading to reduce startup time - only loads tokenizer at startup, loads model on first use
|
| 33 |
+
ENABLE_LAZY_LOADING = os.getenv("ENABLE_LAZY_LOADING", "True").lower() in ("true", "1", "t")
|
| 34 |
+
|
| 35 |
+
# Use ONNX Runtime for faster inference when available
|
| 36 |
+
USE_ONNX_RUNTIME = os.getenv("USE_ONNX_RUNTIME", "True").lower() in ("true", "1", "t")
|
| 37 |
|
| 38 |
QUESTION_GENERATION_PARAMS = {
|
| 39 |
"num_beams": 6, # Reduced for faster processing
|
|
|
|
| 66 |
# In production, this will use a temp directory that the container user has access to
|
| 67 |
CACHE_BASE_DIR = os.getenv("CACHE_BASE_DIR", settings.cache_base_dir)
|
| 68 |
|
| 69 |
+
# Define paths based on the CACHE_BASE_DIR for consistency
|
| 70 |
+
CACHE_DIR = os.path.join(CACHE_BASE_DIR, "transformers")
|
| 71 |
+
OPTIMIZED_TOKENIZER_PATH = os.path.join(CACHE_DIR, "optimized_tokenizer")
|
| 72 |
+
OPTIMIZED_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model")
|
| 73 |
+
TRACED_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model", "traced_model.pt")
|
| 74 |
+
ONNX_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model", "model.onnx")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# --- Distractor Generator Configuration ---
|
| 77 |
# Number of QA pairs per batch for distractor generation.
|
app/question_generator.py
CHANGED
|
@@ -2,7 +2,7 @@ import torch
|
|
| 2 |
from transformers import T5ForConditionalGeneration, T5Tokenizer, AutoModelForSeq2SeqLM
|
| 3 |
from .config import (
|
| 4 |
T5_MODEL_NAME, OPTIMIZED_TOKENIZER_PATH, OPTIMIZED_MODEL_PATH, TRACED_MODEL_PATH,
|
| 5 |
-
QUESTION_GENERATION_PARAMS, ENABLE_LAZY_LOADING, USE_ONNX_RUNTIME
|
| 6 |
)
|
| 7 |
from .utils import get_logger
|
| 8 |
import re
|
|
@@ -27,9 +27,6 @@ if USE_ONNX_RUNTIME:
|
|
| 27 |
else:
|
| 28 |
ONNX_AVAILABLE = False
|
| 29 |
|
| 30 |
-
# Define path for ONNX model
|
| 31 |
-
ONNX_MODEL_PATH = os.path.join(OPTIMIZED_MODEL_PATH, "model.onnx")
|
| 32 |
-
|
| 33 |
logger = get_logger(__name__)
|
| 34 |
|
| 35 |
# --- Global Model and Tokenizer ---
|
|
|
|
| 2 |
from transformers import T5ForConditionalGeneration, T5Tokenizer, AutoModelForSeq2SeqLM
|
| 3 |
from .config import (
|
| 4 |
T5_MODEL_NAME, OPTIMIZED_TOKENIZER_PATH, OPTIMIZED_MODEL_PATH, TRACED_MODEL_PATH,
|
| 5 |
+
QUESTION_GENERATION_PARAMS, ENABLE_LAZY_LOADING, USE_ONNX_RUNTIME, ONNX_MODEL_PATH
|
| 6 |
)
|
| 7 |
from .utils import get_logger
|
| 8 |
import re
|
|
|
|
| 27 |
else:
|
| 28 |
ONNX_AVAILABLE = False
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
logger = get_logger(__name__)
|
| 31 |
|
| 32 |
# --- Global Model and Tokenizer ---
|
scripts/OPTIMIZATION.md
CHANGED
|
@@ -18,7 +18,8 @@ python -m scripts.setup_optimization
|
|
| 18 |
|
| 19 |
This script will:
|
| 20 |
1. Install required dependencies for ONNX runtime
|
| 21 |
-
2.
|
|
|
|
| 22 |
- An optimized standard model
|
| 23 |
- A JIT traced model
|
| 24 |
- An ONNX model (if ONNX runtime is available)
|
|
@@ -29,6 +30,7 @@ The following environment variables control optimization behavior:
|
|
| 29 |
|
| 30 |
- `ENABLE_LAZY_LOADING`: Set to "True" (default) to enable lazy loading, "False" to disable
|
| 31 |
- `USE_ONNX_RUNTIME`: Set to "True" (default) to use ONNX runtime when available, "False" to disable
|
|
|
|
| 32 |
|
| 33 |
Example:
|
| 34 |
```bash
|
|
@@ -37,6 +39,9 @@ $env:ENABLE_LAZY_LOADING="False"
|
|
| 37 |
|
| 38 |
# Disable ONNX runtime
|
| 39 |
$env:USE_ONNX_RUNTIME="False"
|
|
|
|
|
|
|
|
|
|
| 40 |
```
|
| 41 |
|
| 42 |
## Performance Comparison
|
|
@@ -49,6 +54,22 @@ $env:USE_ONNX_RUNTIME="False"
|
|
| 49 |
| ONNX | ~20s | ~1s | ~0.6s |
|
| 50 |
| Lazy + ONNX | ~5s | ~15s | ~0.6s |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
## Troubleshooting
|
| 53 |
|
| 54 |
If you encounter issues:
|
|
|
|
| 18 |
|
| 19 |
This script will:
|
| 20 |
1. Install required dependencies for ONNX runtime
|
| 21 |
+
2. Create necessary directories with appropriate permissions
|
| 22 |
+
3. Run model optimization to create:
|
| 23 |
- An optimized standard model
|
| 24 |
- A JIT traced model
|
| 25 |
- An ONNX model (if ONNX runtime is available)
|
|
|
|
| 30 |
|
| 31 |
- `ENABLE_LAZY_LOADING`: Set to "True" (default) to enable lazy loading, "False" to disable
|
| 32 |
- `USE_ONNX_RUNTIME`: Set to "True" (default) to use ONNX runtime when available, "False" to disable
|
| 33 |
+
- `CACHE_BASE_DIR`: Set to override the default cache directory (defaults to "/tmp/opportunity_t5_model_cache")
|
| 34 |
|
| 35 |
Example:
|
| 36 |
```bash
|
|
|
|
| 39 |
|
| 40 |
# Disable ONNX runtime
|
| 41 |
$env:USE_ONNX_RUNTIME="False"
|
| 42 |
+
|
| 43 |
+
# Use a custom cache directory
|
| 44 |
+
$env:CACHE_BASE_DIR="/path/to/writable/directory"
|
| 45 |
```
|
| 46 |
|
| 47 |
## Performance Comparison
|
|
|
|
| 54 |
| ONNX | ~20s | ~1s | ~0.6s |
|
| 55 |
| Lazy + ONNX | ~5s | ~15s | ~0.6s |
|
| 56 |
|
| 57 |
+
## Hugging Face Spaces Compatibility
|
| 58 |
+
|
| 59 |
+
When running in Hugging Face Spaces, the optimized models are stored in the `/tmp/opportunity_t5_model_cache` directory, which is created with appropriate permissions during container startup. This directory is writable by the application user and persists across application restarts.
|
| 60 |
+
|
| 61 |
+
If you encounter permission errors when optimizing models, you can:
|
| 62 |
+
|
| 63 |
+
1. Make sure the `CACHE_BASE_DIR` environment variable is set to a writable location
|
| 64 |
+
2. Run the setup script with appropriate permissions
|
| 65 |
+
3. Manually create the directories with the right permissions:
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
mkdir -p /tmp/opportunity_t5_model_cache/transformers/optimized_tokenizer
|
| 69 |
+
mkdir -p /tmp/opportunity_t5_model_cache/transformers/optimized_model
|
| 70 |
+
chmod -R 777 /tmp/opportunity_t5_model_cache
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
## Troubleshooting
|
| 74 |
|
| 75 |
If you encounter issues:
|
scripts/optimize_model.py
CHANGED
|
@@ -15,12 +15,22 @@ logging.basicConfig(
|
|
| 15 |
)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def create_dummy_input(tokenizer, device):
|
| 26 |
"""Create a dummy input for tracing"""
|
|
@@ -37,10 +47,27 @@ def optimize_t5_model(model_name, output_dir=CACHE_DIR, use_jit=True, use_onnx=T
|
|
| 37 |
3. Creating an ONNX version for even faster inference
|
| 38 |
"""
|
| 39 |
try:
|
| 40 |
-
#
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
os.makedirs(OPTIMIZED_TOKENIZER_PATH, exist_ok=True)
|
| 43 |
os.makedirs(OPTIMIZED_MODEL_PATH, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Determine device
|
| 46 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 15 |
)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
+
# Import CACHE_BASE_DIR from config if possible, or use a safe default
|
| 19 |
+
try:
|
| 20 |
+
# First try to import from the app config
|
| 21 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 22 |
+
from app.config import CACHE_BASE_DIR
|
| 23 |
+
except ImportError:
|
| 24 |
+
# Fallback to a safe location if config can't be imported
|
| 25 |
+
CACHE_BASE_DIR = os.getenv("CACHE_BASE_DIR", "/tmp/opportunity_t5_model_cache")
|
| 26 |
+
logger.info(f"Could not import from config, using fallback cache directory: {CACHE_BASE_DIR}")
|
| 27 |
+
|
| 28 |
+
# Paths for optimized models - using the writable CACHE_BASE_DIR from config
|
| 29 |
+
CACHE_DIR = os.path.join(CACHE_BASE_DIR, "transformers")
|
| 30 |
+
OPTIMIZED_TOKENIZER_PATH = os.path.join(CACHE_DIR, "optimized_tokenizer")
|
| 31 |
+
OPTIMIZED_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model")
|
| 32 |
+
TRACED_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model", "traced_model.pt")
|
| 33 |
+
ONNX_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model", "model.onnx")
|
| 34 |
|
| 35 |
def create_dummy_input(tokenizer, device):
|
| 36 |
"""Create a dummy input for tracing"""
|
|
|
|
| 47 |
3. Creating an ONNX version for even faster inference
|
| 48 |
"""
|
| 49 |
try:
|
| 50 |
+
# Use output_dir if provided (for CLI compatibility)
|
| 51 |
+
global CACHE_DIR, OPTIMIZED_TOKENIZER_PATH, OPTIMIZED_MODEL_PATH, TRACED_MODEL_PATH, ONNX_MODEL_PATH
|
| 52 |
+
if output_dir != CACHE_DIR:
|
| 53 |
+
CACHE_DIR = output_dir
|
| 54 |
+
OPTIMIZED_TOKENIZER_PATH = os.path.join(CACHE_DIR, "optimized_tokenizer")
|
| 55 |
+
OPTIMIZED_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model")
|
| 56 |
+
TRACED_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model", "traced_model.pt")
|
| 57 |
+
ONNX_MODEL_PATH = os.path.join(CACHE_DIR, "optimized_model", "model.onnx")
|
| 58 |
+
|
| 59 |
+
logger.info(f"Creating directories: {CACHE_DIR}, {OPTIMIZED_TOKENIZER_PATH}, {OPTIMIZED_MODEL_PATH}")
|
| 60 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 61 |
os.makedirs(OPTIMIZED_TOKENIZER_PATH, exist_ok=True)
|
| 62 |
os.makedirs(OPTIMIZED_MODEL_PATH, exist_ok=True)
|
| 63 |
+
try:
|
| 64 |
+
import stat
|
| 65 |
+
mode = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO
|
| 66 |
+
os.chmod(CACHE_DIR, mode)
|
| 67 |
+
os.chmod(OPTIMIZED_TOKENIZER_PATH, mode)
|
| 68 |
+
os.chmod(OPTIMIZED_MODEL_PATH, mode)
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.warning(f"Could not set directory permissions: {e}. This may be normal in some environments.")
|
| 71 |
|
| 72 |
# Determine device
|
| 73 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
scripts/setup_optimization.py
CHANGED
|
@@ -44,6 +44,32 @@ def run_optimization():
|
|
| 44 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 45 |
sys.path.insert(0, os.path.dirname(script_dir))
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
from scripts.optimize_model import optimize_t5_model
|
| 48 |
|
| 49 |
# Run optimization with both JIT and ONNX
|
|
|
|
| 44 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 45 |
sys.path.insert(0, os.path.dirname(script_dir))
|
| 46 |
|
| 47 |
+
# First, import config to get the proper cache paths
|
| 48 |
+
try:
|
| 49 |
+
from app.config import CACHE_BASE_DIR, CACHE_DIR, OPTIMIZED_TOKENIZER_PATH, OPTIMIZED_MODEL_PATH
|
| 50 |
+
|
| 51 |
+
# Create all necessary directories with appropriate permissions
|
| 52 |
+
os.makedirs(CACHE_BASE_DIR, exist_ok=True)
|
| 53 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 54 |
+
os.makedirs(OPTIMIZED_TOKENIZER_PATH, exist_ok=True)
|
| 55 |
+
os.makedirs(OPTIMIZED_MODEL_PATH, exist_ok=True)
|
| 56 |
+
|
| 57 |
+
# Set permissions to ensure they're writable
|
| 58 |
+
try:
|
| 59 |
+
import stat
|
| 60 |
+
# Add write permissions to user, group and others
|
| 61 |
+
mode = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO
|
| 62 |
+
os.chmod(CACHE_BASE_DIR, mode)
|
| 63 |
+
os.chmod(CACHE_DIR, mode)
|
| 64 |
+
os.chmod(OPTIMIZED_TOKENIZER_PATH, mode)
|
| 65 |
+
os.chmod(OPTIMIZED_MODEL_PATH, mode)
|
| 66 |
+
logger.info(f"Set permissions on directories: {CACHE_BASE_DIR}, {CACHE_DIR}, etc.")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logger.warning(f"Could not set directory permissions: {e}. This may be normal in some environments.")
|
| 69 |
+
|
| 70 |
+
except ImportError as e:
|
| 71 |
+
logger.warning(f"Could not import config: {e}. Will rely on optimize_model.py defaults.")
|
| 72 |
+
|
| 73 |
from scripts.optimize_model import optimize_t5_model
|
| 74 |
|
| 75 |
# Run optimization with both JIT and ONNX
|
space.json
ADDED
|
File without changes
|