File size: 4,699 Bytes
9f7a5a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceabda8
9f7a5a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/bin/bash

# ==============================================================================
# Start Script for Ollama, FastAPI Backend, and Nginx Frontend
# This script initializes the environment, starts dependent services,
# and then launches the main application components.
# ==============================================================================

# ------------------------------------------------------------------------------
# 1. Environment Setup
#    Set essential environment variables for performance and GPU usage.
# ------------------------------------------------------------------------------
echo "--- Initializing Environment Variables ---"
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
export CUDA_VISIBLE_DEVICES=0 # Specifies which GPU to use, 0 for the first GPU

# ------------------------------------------------------------------------------
# 2. Start Ollama Server and Pull Models
#    Ollama must be running and the required models available before FastAPI starts.
# ------------------------------------------------------------------------------
echo "--- Starting Ollama Server ---"
# Start Ollama in the background
# The 'serve' command runs the Ollama API server.
ollama serve &

# Define the models to be pulled
MODEL_TO_PULL="krishna_choudhary/tinyllama:latest"
WHISPER_MODEL_TO_PULL="whisper:tiny"

# Pull the LLM model if it's not already present
echo "Checking for Ollama LLM model: $MODEL_TO_PULL"
if ! ollama list | grep -q "$MODEL_TO_PULL"; then
    echo "Pulling Ollama LLM model: $MODEL_TO_PULL (This may take some time)..."
    ollama pull "$MODEL_TO_PULL"
else
    echo "Ollama LLM model $MODEL_TO_PULL already present."
fi

# Pull the Whisper model for transcription if it's not already present
echo "Checking for Ollama Whisper model: $WHISPER_MODEL_TO_PULL"
if ! ollama list | grep -q "$WHISPER_MODEL_TO_PULL"; then
    echo "Pulling Ollama Whisper model: $WHISPER_MODEL_TO_PULL (This may take some time)..."
    ollama pull "$WHISPER_MODEL_TO_PULL"
else
    echo "Ollama Whisper model $WHISPER_MODEL_TO_PULL already present."
fi

# Wait for Ollama to become responsive
max_attempts=90 # Maximum attempts (90 seconds)
attempt=0
echo "Waiting for Ollama API to be ready (max $max_attempts seconds)..."
while ! curl -s http://localhost:11434/api/tags >/dev/null; do
    sleep 1
    attempt=$((attempt + 1))
    if [ $attempt -eq $max_attempts ]; then
        echo "Error: Ollama failed to start within $((max_attempts)) seconds. Exiting."
        exit 1
    fi
done
echo "Ollama is ready and responsive."

# ------------------------------------------------------------------------------
# 3. Debugging: List Files
#    Useful for verifying that application files are correctly copied into the container.
# ------------------------------------------------------------------------------
echo "--- Files in current directory ($PWD): ---"
ls -le
echo "-------------------------------------------"

# ------------------------------------------------------------------------------
# 4. Start FastAPI Backend Server
#    The FastAPI application serves the API endpoints.
# ------------------------------------------------------------------------------
echo "--- Starting FastAPI Server ---"
# Run Uvicorn to serve the FastAPI application.
# --host 0.0.0.0 makes it accessible from outside the container (via exposed port).
# --port 7860 is where FastAPI listens.
# --workers 1 and --limit-concurrency 20 are performance settings.
uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1 --limit-concurrency 20 &
FASTAPI_PID=$!
echo "FastAPI server started with PID: $FASTAPI_PID"

# Give FastAPI a moment to fully initialize (optional but good practice)
sleep 5

# ------------------------------------------------------------------------------
# 5. Start Nginx Web Server
#    Nginx acts as a reverse proxy for the FastAPI backend and serves the React frontend.
# ------------------------------------------------------------------------------
echo "--- Starting Nginx Web Server on Port 8501 ---"
# Start Nginx in the foreground so the script waits for it to exit,
# keeping the Docker container alive.
# 'daemon off;' ensures Nginx runs in the foreground.
nginx -g 'daemon off;' &
NGINX_PID=$!
echo "Nginx started with PID: $NGINX_PID"

# ------------------------------------------------------------------------------
# 6. Keep Container Alive
#    The 'wait' command will keep the script running as long as Nginx is running.
# ------------------------------------------------------------------------------
echo "All services initiated. Keeping container alive by waiting for Nginx..."
wait $NGINX_PID

echo "Nginx stopped. Container may exit now."