deployment 16
Browse files- Dockerfile +8 -2
- backend/pipeline.py +10 -4
- start-backend.sh +25 -0
Dockerfile
CHANGED
|
@@ -11,6 +11,8 @@ RUN apt-get update && apt-get install -y \
|
|
| 11 |
build-essential \
|
| 12 |
curl \
|
| 13 |
ca-certificates \
|
|
|
|
|
|
|
| 14 |
libopenblas-dev \
|
| 15 |
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
|
|
@@ -36,6 +38,10 @@ RUN pip install --no-cache-dir --force-reinstall 'numpy<2.0.0'
|
|
| 36 |
# Create storage directory
|
| 37 |
RUN mkdir -p /app/storage && chmod 777 /app/storage
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# Expose HF Spaces port
|
| 40 |
EXPOSE 7860
|
| 41 |
|
|
@@ -52,5 +58,5 @@ ENV USE_FAKE_REDIS=true
|
|
| 52 |
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 53 |
CMD curl -f http://localhost:7860/health || exit 1
|
| 54 |
|
| 55 |
-
# Start
|
| 56 |
-
CMD ["
|
|
|
|
| 11 |
build-essential \
|
| 12 |
curl \
|
| 13 |
ca-certificates \
|
| 14 |
+
dnsutils \
|
| 15 |
+
iputils-ping \
|
| 16 |
libopenblas-dev \
|
| 17 |
&& rm -rf /var/lib/apt/lists/*
|
| 18 |
|
|
|
|
| 38 |
# Create storage directory
|
| 39 |
RUN mkdir -p /app/storage && chmod 777 /app/storage
|
| 40 |
|
| 41 |
+
# Copy and make startup script executable
|
| 42 |
+
COPY start-backend.sh /app/start-backend.sh
|
| 43 |
+
RUN chmod +x /app/start-backend.sh
|
| 44 |
+
|
| 45 |
# Expose HF Spaces port
|
| 46 |
EXPOSE 7860
|
| 47 |
|
|
|
|
| 58 |
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 59 |
CMD curl -f http://localhost:7860/health || exit 1
|
| 60 |
|
| 61 |
+
# Start with diagnostic script
|
| 62 |
+
CMD ["/app/start-backend.sh"]
|
backend/pipeline.py
CHANGED
|
@@ -139,6 +139,7 @@ class TranscriptionPipeline:
|
|
| 139 |
"""Download audio from YouTube URL using yt-dlp."""
|
| 140 |
output_path = self.temp_dir / "audio.wav"
|
| 141 |
|
|
|
|
| 142 |
cmd = [
|
| 143 |
"yt-dlp",
|
| 144 |
"-x", # Extract audio
|
|
@@ -147,17 +148,22 @@ class TranscriptionPipeline:
|
|
| 147 |
"--output", str(output_path.with_suffix('')), # yt-dlp adds .wav
|
| 148 |
"--force-ipv4", # Force IPv4 to avoid DNS issues
|
| 149 |
"--socket-timeout", "30",
|
| 150 |
-
"--
|
| 151 |
-
"--
|
| 152 |
-
#
|
| 153 |
-
"--extractor-args", "youtube:player_client=android,web",
|
| 154 |
"--no-check-certificates",
|
|
|
|
|
|
|
| 155 |
self.youtube_url
|
| 156 |
]
|
| 157 |
|
| 158 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 159 |
|
| 160 |
if result.returncode != 0:
|
|
|
|
|
|
|
|
|
|
| 161 |
raise RuntimeError(f"yt-dlp failed: {result.stderr}")
|
| 162 |
|
| 163 |
if not output_path.exists():
|
|
|
|
| 139 |
"""Download audio from YouTube URL using yt-dlp."""
|
| 140 |
output_path = self.temp_dir / "audio.wav"
|
| 141 |
|
| 142 |
+
# Try with different extractors and network options
|
| 143 |
cmd = [
|
| 144 |
"yt-dlp",
|
| 145 |
"-x", # Extract audio
|
|
|
|
| 148 |
"--output", str(output_path.with_suffix('')), # yt-dlp adds .wav
|
| 149 |
"--force-ipv4", # Force IPv4 to avoid DNS issues
|
| 150 |
"--socket-timeout", "30",
|
| 151 |
+
"--retries", "10",
|
| 152 |
+
"--fragment-retries", "10",
|
| 153 |
+
# Try alternative extractors
|
| 154 |
+
"--extractor-args", "youtube:player_client=android,ios,web",
|
| 155 |
"--no-check-certificates",
|
| 156 |
+
# Add verbose output for debugging
|
| 157 |
+
"--verbose",
|
| 158 |
self.youtube_url
|
| 159 |
]
|
| 160 |
|
| 161 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 162 |
|
| 163 |
if result.returncode != 0:
|
| 164 |
+
# Log the full error for debugging
|
| 165 |
+
print(f"yt-dlp stderr: {result.stderr}")
|
| 166 |
+
print(f"yt-dlp stdout: {result.stdout}")
|
| 167 |
raise RuntimeError(f"yt-dlp failed: {result.stderr}")
|
| 168 |
|
| 169 |
if not output_path.exists():
|
start-backend.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "🔍 Checking DNS configuration..."
|
| 5 |
+
|
| 6 |
+
# Try to resolve youtube.com
|
| 7 |
+
if ! nslookup youtube.com > /dev/null 2>&1; then
|
| 8 |
+
echo "⚠️ DNS resolution failed, attempting fixes..."
|
| 9 |
+
|
| 10 |
+
# Check if we can ping Google's DNS
|
| 11 |
+
if ping -c 1 8.8.8.8 > /dev/null 2>&1; then
|
| 12 |
+
echo "✓ Network connectivity exists"
|
| 13 |
+
echo "⚠️ DNS resolution issue - this may be a HF Spaces network policy"
|
| 14 |
+
else
|
| 15 |
+
echo "✗ No network connectivity"
|
| 16 |
+
fi
|
| 17 |
+
|
| 18 |
+
# Show current DNS config
|
| 19 |
+
echo "Current DNS configuration:"
|
| 20 |
+
cat /etc/resolv.conf || echo "Cannot read /etc/resolv.conf"
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
echo ""
|
| 24 |
+
echo "🚀 Starting backend server..."
|
| 25 |
+
cd /app/backend && python -u main.py
|