Spaces:
Sleeping
Sleeping
File size: 4,699 Bytes
9f7a5a6 ceabda8 9f7a5a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
#!/bin/bash
# ==============================================================================
# Start Script for Ollama, FastAPI Backend, and Nginx Frontend
# This script initializes the environment, starts dependent services,
# and then launches the main application components.
# ==============================================================================
# ------------------------------------------------------------------------------
# 1. Environment Setup
# Set essential environment variables for performance and GPU usage.
# ------------------------------------------------------------------------------
echo "--- Initializing Environment Variables ---"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
export CUDA_VISIBLE_DEVICES=0 # Specifies which GPU to use, 0 for the first GPU
# ------------------------------------------------------------------------------
# 2. Start Ollama Server and Pull Models
# Ollama must be running and the required models available before FastAPI starts.
# ------------------------------------------------------------------------------
echo "--- Starting Ollama Server ---"
# Start Ollama in the background
# The 'serve' command runs the Ollama API server.
ollama serve &
# Define the models to be pulled
MODEL_TO_PULL="krishna_choudhary/tinyllama:latest"
WHISPER_MODEL_TO_PULL="whisper:tiny"
# Pull the LLM model if it's not already present
echo "Checking for Ollama LLM model: $MODEL_TO_PULL"
if ! ollama list | grep -q "$MODEL_TO_PULL"; then
echo "Pulling Ollama LLM model: $MODEL_TO_PULL (This may take some time)..."
ollama pull "$MODEL_TO_PULL"
else
echo "Ollama LLM model $MODEL_TO_PULL already present."
fi
# Pull the Whisper model for transcription if it's not already present
echo "Checking for Ollama Whisper model: $WHISPER_MODEL_TO_PULL"
if ! ollama list | grep -q "$WHISPER_MODEL_TO_PULL"; then
echo "Pulling Ollama Whisper model: $WHISPER_MODEL_TO_PULL (This may take some time)..."
ollama pull "$WHISPER_MODEL_TO_PULL"
else
echo "Ollama Whisper model $WHISPER_MODEL_TO_PULL already present."
fi
# Wait for Ollama to become responsive
max_attempts=90 # Maximum attempts (90 seconds)
attempt=0
echo "Waiting for Ollama API to be ready (max $max_attempts seconds)..."
while ! curl -s http://localhost:11434/api/tags >/dev/null; do
sleep 1
attempt=$((attempt + 1))
if [ $attempt -eq $max_attempts ]; then
echo "Error: Ollama failed to start within $((max_attempts)) seconds. Exiting."
exit 1
fi
done
echo "Ollama is ready and responsive."
# ------------------------------------------------------------------------------
# 3. Debugging: List Files
# Useful for verifying that application files are correctly copied into the container.
# ------------------------------------------------------------------------------
echo "--- Files in current directory ($PWD): ---"
ls -le
echo "-------------------------------------------"
# ------------------------------------------------------------------------------
# 4. Start FastAPI Backend Server
# The FastAPI application serves the API endpoints.
# ------------------------------------------------------------------------------
echo "--- Starting FastAPI Server ---"
# Run Uvicorn to serve the FastAPI application.
# --host 0.0.0.0 makes it accessible from outside the container (via exposed port).
# --port 7860 is where FastAPI listens.
# --workers 1 and --limit-concurrency 20 are performance settings.
uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1 --limit-concurrency 20 &
FASTAPI_PID=$!
echo "FastAPI server started with PID: $FASTAPI_PID"
# Give FastAPI a moment to fully initialize (optional but good practice)
sleep 5
# ------------------------------------------------------------------------------
# 5. Start Nginx Web Server
# Nginx acts as a reverse proxy for the FastAPI backend and serves the React frontend.
# ------------------------------------------------------------------------------
echo "--- Starting Nginx Web Server on Port 8501 ---"
# Start Nginx in the foreground so the script waits for it to exit,
# keeping the Docker container alive.
# 'daemon off;' ensures Nginx runs in the foreground.
nginx -g 'daemon off;' &
NGINX_PID=$!
echo "Nginx started with PID: $NGINX_PID"
# ------------------------------------------------------------------------------
# 6. Keep Container Alive
# The 'wait' command will keep the script running as long as Nginx is running.
# ------------------------------------------------------------------------------
echo "All services initiated. Keeping container alive by waiting for Nginx..."
wait $NGINX_PID
echo "Nginx stopped. Container may exit now." |