calebhan commited on
Commit
a478677
·
1 Parent(s): ee9cbc1

deployment 16

Browse files
Files changed (3) hide show
  1. Dockerfile +8 -2
  2. backend/pipeline.py +10 -4
  3. start-backend.sh +25 -0
Dockerfile CHANGED
@@ -11,6 +11,8 @@ RUN apt-get update && apt-get install -y \
11
  build-essential \
12
  curl \
13
  ca-certificates \
 
 
14
  libopenblas-dev \
15
  && rm -rf /var/lib/apt/lists/*
16
 
@@ -36,6 +38,10 @@ RUN pip install --no-cache-dir --force-reinstall 'numpy<2.0.0'
36
  # Create storage directory
37
  RUN mkdir -p /app/storage && chmod 777 /app/storage
38
 
 
 
 
 
39
  # Expose HF Spaces port
40
  EXPOSE 7860
41
 
@@ -52,5 +58,5 @@ ENV USE_FAKE_REDIS=true
52
  HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
53
  CMD curl -f http://localhost:7860/health || exit 1
54
 
55
- # Start FastAPI server
56
- CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
11
  build-essential \
12
  curl \
13
  ca-certificates \
14
+ dnsutils \
15
+ iputils-ping \
16
  libopenblas-dev \
17
  && rm -rf /var/lib/apt/lists/*
18
 
 
38
  # Create storage directory
39
  RUN mkdir -p /app/storage && chmod 777 /app/storage
40
 
41
+ # Copy and make startup script executable
42
+ COPY start-backend.sh /app/start-backend.sh
43
+ RUN chmod +x /app/start-backend.sh
44
+
45
  # Expose HF Spaces port
46
  EXPOSE 7860
47
 
 
58
  HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
59
  CMD curl -f http://localhost:7860/health || exit 1
60
 
61
+ # Start with diagnostic script
62
+ CMD ["/app/start-backend.sh"]
backend/pipeline.py CHANGED
@@ -139,6 +139,7 @@ class TranscriptionPipeline:
139
  """Download audio from YouTube URL using yt-dlp."""
140
  output_path = self.temp_dir / "audio.wav"
141
 
 
142
  cmd = [
143
  "yt-dlp",
144
  "-x", # Extract audio
@@ -147,17 +148,22 @@ class TranscriptionPipeline:
147
  "--output", str(output_path.with_suffix('')), # yt-dlp adds .wav
148
  "--force-ipv4", # Force IPv4 to avoid DNS issues
149
  "--socket-timeout", "30",
150
- "--source-address", "0.0.0.0", # Bind to all interfaces
151
- "--legacy-server-connect", # Use legacy connection method
152
- # Workarounds for YouTube restrictions
153
- "--extractor-args", "youtube:player_client=android,web",
154
  "--no-check-certificates",
 
 
155
  self.youtube_url
156
  ]
157
 
158
  result = subprocess.run(cmd, capture_output=True, text=True)
159
 
160
  if result.returncode != 0:
 
 
 
161
  raise RuntimeError(f"yt-dlp failed: {result.stderr}")
162
 
163
  if not output_path.exists():
 
139
  """Download audio from YouTube URL using yt-dlp."""
140
  output_path = self.temp_dir / "audio.wav"
141
 
142
+ # Try with different extractors and network options
143
  cmd = [
144
  "yt-dlp",
145
  "-x", # Extract audio
 
148
  "--output", str(output_path.with_suffix('')), # yt-dlp adds .wav
149
  "--force-ipv4", # Force IPv4 to avoid DNS issues
150
  "--socket-timeout", "30",
151
+ "--retries", "10",
152
+ "--fragment-retries", "10",
153
+ # Try alternative extractors
154
+ "--extractor-args", "youtube:player_client=android,ios,web",
155
  "--no-check-certificates",
156
+ # Add verbose output for debugging
157
+ "--verbose",
158
  self.youtube_url
159
  ]
160
 
161
  result = subprocess.run(cmd, capture_output=True, text=True)
162
 
163
  if result.returncode != 0:
164
+ # Log the full error for debugging
165
+ print(f"yt-dlp stderr: {result.stderr}")
166
+ print(f"yt-dlp stdout: {result.stdout}")
167
  raise RuntimeError(f"yt-dlp failed: {result.stderr}")
168
 
169
  if not output_path.exists():
start-backend.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ echo "🔍 Checking DNS configuration..."
5
+
6
+ # Try to resolve youtube.com
7
+ if ! nslookup youtube.com > /dev/null 2>&1; then
8
+ echo "⚠️ DNS resolution failed, attempting fixes..."
9
+
10
+ # Check if we can ping Google's DNS
11
+ if ping -c 1 8.8.8.8 > /dev/null 2>&1; then
12
+ echo "✓ Network connectivity exists"
13
+ echo "⚠️ DNS resolution issue - this may be a HF Spaces network policy"
14
+ else
15
+ echo "✗ No network connectivity"
16
+ fi
17
+
18
+ # Show current DNS config
19
+ echo "Current DNS configuration:"
20
+ cat /etc/resolv.conf || echo "Cannot read /etc/resolv.conf"
21
+ fi
22
+
23
+ echo ""
24
+ echo "🚀 Starting backend server..."
25
+ cd /app/backend && python -u main.py