DaCrow13
commited on
Commit
·
6bd4abe
1
Parent(s):
27bcd6a
Fix monitoring stack configuration for HF Spaces: Add Alertmanager, Pushgateway, and fix Prometheus/Grafana connection
Browse files- Dockerfile +20 -3
- docker/nginx.conf +26 -0
- docker/scripts/start_space.sh +31 -17
Dockerfile
CHANGED
|
@@ -40,9 +40,26 @@ RUN wget https://github.com/prometheus/prometheus/releases/download/v2.45.0/prom
|
|
| 40 |
mkdir -p /etc/prometheus /var/lib/prometheus && \
|
| 41 |
rm -rf prometheus-*
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# Copy requirements first for caching
|
| 48 |
COPY requirements.txt .
|
|
|
|
| 40 |
mkdir -p /etc/prometheus /var/lib/prometheus && \
|
| 41 |
rm -rf prometheus-*
|
| 42 |
|
| 43 |
+
# Alertmanager
|
| 44 |
+
RUN wget https://github.com/prometheus/alertmanager/releases/download/v0.25.0/alertmanager-0.25.0.linux-amd64.tar.gz && \
|
| 45 |
+
tar xvfz alertmanager-*.tar.gz && \
|
| 46 |
+
mv alertmanager-*/alertmanager /usr/local/bin/ && \
|
| 47 |
+
mv alertmanager-*/amtool /usr/local/bin/ && \
|
| 48 |
+
mkdir -p /etc/alertmanager /var/lib/alertmanager && \
|
| 49 |
+
rm -rf alertmanager-*
|
| 50 |
+
|
| 51 |
+
# Pushgateway
|
| 52 |
+
RUN wget https://github.com/prometheus/pushgateway/releases/download/v1.6.0/pushgateway-1.6.0.linux-amd64.tar.gz && \
|
| 53 |
+
tar xvfz pushgateway-*.tar.gz && \
|
| 54 |
+
mv pushgateway-*/pushgateway /usr/local/bin/ && \
|
| 55 |
+
rm -rf pushgateway-*
|
| 56 |
+
|
| 57 |
+
COPY --chown=user monitoring/grafana/provisioning /etc/grafana/provisioning
|
| 58 |
+
COPY --chown=user monitoring/grafana/dashboards /var/lib/grafana/dashboards
|
| 59 |
+
COPY --chown=user monitoring/prometheus/prometheus.yml /etc/prometheus/prometheus.yml
|
| 60 |
+
COPY --chown=user monitoring/prometheus/alert_rules.yml /etc/prometheus/alert_rules.yml
|
| 61 |
+
COPY --chown=user monitoring/alertmanager/config.yml /etc/alertmanager/config.yml
|
| 62 |
+
|
| 63 |
|
| 64 |
# Copy requirements first for caching
|
| 65 |
COPY requirements.txt .
|
docker/nginx.conf
CHANGED
|
@@ -33,6 +33,14 @@ http {
|
|
| 33 |
server 127.0.0.1:9090;
|
| 34 |
}
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
server {
|
| 37 |
listen 7860;
|
| 38 |
server_name localhost;
|
|
@@ -108,6 +116,24 @@ http {
|
|
| 108 |
proxy_set_header X-Forwarded-Proto $scheme;
|
| 109 |
}
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
# Streamlit (Catch-all)
|
| 112 |
location / {
|
| 113 |
proxy_pass http://streamlit;
|
|
|
|
| 33 |
server 127.0.0.1:9090;
|
| 34 |
}
|
| 35 |
|
| 36 |
+
upstream alertmanager {
|
| 37 |
+
server 127.0.0.1:9093;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
upstream pushgateway {
|
| 41 |
+
server 127.0.0.1:9091;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
server {
|
| 45 |
listen 7860;
|
| 46 |
server_name localhost;
|
|
|
|
| 116 |
proxy_set_header X-Forwarded-Proto $scheme;
|
| 117 |
}
|
| 118 |
|
| 119 |
+
# Alertmanager UI
|
| 120 |
+
location /alertmanager/ {
|
| 121 |
+
proxy_pass http://alertmanager;
|
| 122 |
+
proxy_set_header Host $host;
|
| 123 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 124 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 125 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
# Pushgateway UI
|
| 129 |
+
location /pushgateway/ {
|
| 130 |
+
proxy_pass http://pushgateway;
|
| 131 |
+
proxy_set_header Host $host;
|
| 132 |
+
proxy_set_header X-Real-IP $remote_addr;
|
| 133 |
+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
| 134 |
+
proxy_set_header X-Forwarded-Proto $scheme;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
# Streamlit (Catch-all)
|
| 138 |
location / {
|
| 139 |
proxy_pass http://streamlit;
|
docker/scripts/start_space.sh
CHANGED
|
@@ -53,23 +53,37 @@ for i in {1..30}; do
|
|
| 53 |
done
|
| 54 |
|
| 55 |
echo "$(date) - Configuring and starting Prometheus..."
|
| 56 |
-
#
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
# Determine Prometheus External URL
|
| 75 |
# Always use relative path so it works on both huggingface.co and .hf.space domains
|
|
|
|
| 53 |
done
|
| 54 |
|
| 55 |
echo "$(date) - Configuring and starting Prometheus..."
|
| 56 |
+
# Patch Grafana Datasource for Localhost (HF Space)
|
| 57 |
+
# Replace prometheus:9090 with 127.0.0.1:9090 in all datasource configs
|
| 58 |
+
find /etc/grafana/provisioning/datasources -name '*.yml' -exec sed -i 's/prometheus:9090/127.0.0.1:9090/g' {} +
|
| 59 |
+
|
| 60 |
+
# Copy production configs to /tmp for modification
|
| 61 |
+
cp /etc/prometheus/prometheus.yml /tmp/prometheus.yml
|
| 62 |
+
cp /etc/prometheus/alert_rules.yml /tmp/alert_rules.yml
|
| 63 |
+
cp /etc/alertmanager/config.yml /tmp/alertmanager.yml
|
| 64 |
+
|
| 65 |
+
# Modify Prometheus config for local execution (replace docker-compose service names with localhost)
|
| 66 |
+
# hopcroft-api:8080 -> 127.0.0.1:8000 (API runs on 8000 in Space)
|
| 67 |
+
sed -i 's/hopcroft-api:8080/127.0.0.1:8000/g' /tmp/prometheus.yml
|
| 68 |
+
# alertmanager:9093 -> 127.0.0.1:9093
|
| 69 |
+
sed -i 's/alertmanager:9093/127.0.0.1:9093/g' /tmp/prometheus.yml
|
| 70 |
+
# pushgateway:9091 -> 127.0.0.1:9091
|
| 71 |
+
sed -i 's/pushgateway:9091/127.0.0.1:9091/g' /tmp/prometheus.yml
|
| 72 |
+
# Fix alert_rules path to be absolute or relative to execution
|
| 73 |
+
sed -i 's|"alert_rules.yml"|"/tmp/alert_rules.yml"|g' /tmp/prometheus.yml
|
| 74 |
+
|
| 75 |
+
echo "$(date) - Starting Alertmanager..."
|
| 76 |
+
alertmanager \
|
| 77 |
+
--config.file=/tmp/alertmanager.yml \
|
| 78 |
+
--storage.path=/tmp/alertmanager_data \
|
| 79 |
+
--web.route-prefix=/alertmanager/ \
|
| 80 |
+
>> /tmp/alertmanager.log 2>&1 &
|
| 81 |
+
|
| 82 |
+
echo "$(date) - Starting Pushgateway..."
|
| 83 |
+
pushgateway \
|
| 84 |
+
--persistence.file=/tmp/pushgateway_data \
|
| 85 |
+
--web.route-prefix=/pushgateway/ \
|
| 86 |
+
>> /tmp/pushgateway.log 2>&1 &
|
| 87 |
|
| 88 |
# Determine Prometheus External URL
|
| 89 |
# Always use relative path so it works on both huggingface.co and .hf.space domains
|