DARKWICK commited on
Commit
8f3321a
·
verified ·
1 Parent(s): 44d27d8

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +26 -0
  2. README.md +146 -0
  3. app.py +194 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # System dependencies (FFmpeg is required for audio processing via yt_dlp)
4
+ RUN apt-get update \
5
+ && apt-get install -y --no-install-recommends ffmpeg curl \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ WORKDIR /app
9
+
10
+ # Copy and install dependencies
11
+ COPY requirements.txt ./
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy the application code
15
+ COPY app.py ./
16
+
17
+ # Environment variables for Gradio
18
+ ENV GRADIO_SERVER_NAME=0.0.0.0 \
19
+ GRADIO_SERVER_PORT=7860 \
20
+ PYTHONUNBUFFERED=1
21
+
22
+ # Expose the default Gradio port
23
+ EXPOSE 7860
24
+
25
+ # Start the app
26
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ \# YouTube Translator \& Speaker (Hugging Face — Docker Space)
2
+
3
+
4
+
5
+ A fully automated app that:
6
+
7
+ ✅ Extracts video transcript using YouTube captions
8
+
9
+ ✅ Falls back to OpenAI Whisper if no captions exist (requires `OPENAI\_API\_KEY`)
10
+
11
+ ✅ Translates to any selected language (OpenAI GPT model)
12
+
13
+ ✅ Optional: Converts translation to speech using `edge-tts` (free, no key required)
14
+
15
+
16
+
17
+ ---
18
+
19
+
20
+
21
+ \## 🚀 Deploy on Hugging Face (Docker)
22
+
23
+
24
+
25
+ \### 1️⃣ Create a new Space
26
+
27
+ \- Go to Hugging Face → New Space → Choose \*\*Docker\*\*
28
+
29
+ \- Give it a name (e.g., `youtube-translator`)
30
+
31
+ \- Visibility: Public or Private (your choice)
32
+
33
+
34
+
35
+ \### 2️⃣ Upload the following files:
36
+
37
+ 📦 `app.py`
38
+
39
+ 📦 `requirements.txt`
40
+
41
+ 📦 `Dockerfile`
42
+
43
+ 📦 `README.md` (optional but recommended)
44
+
45
+
46
+
47
+ \### 3️⃣ Add your OpenAI API Key
48
+
49
+ Go to \*\*Settings → Secrets → Add New Secret\*\*
50
+
51
+ | Name | Value |
52
+
53
+ |------------------|--------------------------------|
54
+
55
+ | `OPENAI\_API\_KEY` | `sk-xxxxxxxxxxxxxxxxxxxxxx` |
56
+
57
+
58
+
59
+ 💡 Without this key:
60
+
61
+ \- Whisper transcription fallback will NOT work
62
+
63
+ \- Translation will NOT work (only original captions will show)
64
+
65
+
66
+
67
+ \### 4️⃣ Restart the Space
68
+
69
+ The app will build and launch on port 7860 automatically.
70
+
71
+
72
+
73
+ ---
74
+
75
+
76
+
77
+ \## 📍 Usage
78
+
79
+ 1\. Paste YouTube \*\*URL\*\* or \*\*11-char ID\*\*
80
+
81
+ 2\. Choose a language (`en`, `es`, `hi`, `ha`, `fr`, etc.)
82
+
83
+ 3\. (Optional) Keep "Generate Speech" ON
84
+
85
+ 4\. Click \*\*Submit\*\* → Done ✅
86
+
87
+
88
+
89
+ ---
90
+
91
+
92
+
93
+ \## ⚠ Common Errors \& Fixes
94
+
95
+
96
+
97
+ | Error Message | Solution |
98
+
99
+ |--------------|----------|
100
+
101
+ | Invalid video ID | Use a proper YouTube URL or 11-char ID |
102
+
103
+ | No captions found + no API key | Add `OPENAI\_API\_KEY` or use a video with subtitles |
104
+
105
+ | Translation failed | Check your API key / rate limit |
106
+
107
+ | TTS not playing | Retry or uncheck “Generate Speech” |
108
+
109
+
110
+
111
+ ---
112
+
113
+
114
+
115
+ \## 🛡 Security \& Best Practices
116
+
117
+ ✅ Do NOT hardcode API keys
118
+
119
+ ✅ Use Hugging Face Secrets
120
+
121
+ ✅ Rotate your OpenAI key regularly
122
+
123
+ ✅ Avoid public exposure of keys in screenshots or code
124
+
125
+
126
+
127
+ ---
128
+
129
+
130
+
131
+ \## 💻 Local Development
132
+
133
+
134
+
135
+ ```bash
136
+
137
+ python -m venv .venv \&\& source .venv/bin/activate
138
+
139
+ pip install -r requirements.txt
140
+
141
+ export OPENAI\_API\_KEY=sk-...
142
+
143
+ python app.py
144
+
145
+
146
+
app.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import asyncio
4
+ import tempfile
5
+ from typing import Tuple, Optional
6
+
7
+ import gradio as gr
8
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, CouldNotRetrieveTranscript
9
+ import yt_dlp
10
+ from openai import OpenAI
11
+ from langcodes import Language
12
+ import edge_tts
13
+
14
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
15
+
16
+ # ---------------------------
17
+ # Helpers
18
+ # ---------------------------
19
+
20
+ def extract_video_id(url_or_id: str) -> Optional[str]:
21
+ """Accepts a full YouTube URL or bare ID and returns the 11-char video id."""
22
+ if not url_or_id:
23
+ return None
24
+ if re.fullmatch(r"[A-Za-z0-9_-]{11}", url_or_id):
25
+ return url_or_id
26
+ patterns = [
27
+ r"(?:v=|/v/|youtu\.be/|/embed/)([A-Za-z0-9_-]{11})",
28
+ ]
29
+ for p in patterns:
30
+ m = re.search(p, url_or_id)
31
+ if m:
32
+ return m.group(1)
33
+ return None
34
+
35
+
36
+ def get_transcript_text(video_id: str) -> Tuple[Optional[str], Optional[str]]:
37
+ """Return (transcript_text, source_lang) if available via YouTube captions."""
38
+ try:
39
+ listing = YouTubeTranscriptApi.list_transcripts(video_id)
40
+ preferred = None
41
+ try:
42
+ preferred = listing.find_manually_created_transcript(listing._manually_created_transcripts.keys())
43
+ except Exception:
44
+ pass
45
+ if not preferred:
46
+ try:
47
+ preferred = listing.find_generated_transcript(listing._generated_transcripts.keys())
48
+ except Exception:
49
+ pass
50
+ if not preferred:
51
+ all_trans = list(listing)
52
+ if not all_trans:
53
+ return None, None
54
+ preferred = all_trans[0]
55
+ parts = preferred.fetch()
56
+ text = "\n".join(p["text"].strip() for p in parts if p.get("text"))
57
+ src_lang = preferred.language_code or ""
58
+ return text, src_lang
59
+ except (TranscriptsDisabled, NoTranscriptFound, CouldNotRetrieveTranscript):
60
+ return None, None
61
+ except Exception:
62
+ return None, None
63
+
64
+
65
+ def download_audio(video_id: str) -> str:
66
+ tmpdir = tempfile.mkdtemp()
67
+ outfile = os.path.join(tmpdir, f"{video_id}.mp3")
68
+ ydl_opts = {
69
+ "format": "bestaudio/best",
70
+ "outtmpl": os.path.join(tmpdir, f"%(id)s.%(ext)s"),
71
+ "postprocessors": [
72
+ {
73
+ "key": "FFmpegExtractAudio",
74
+ "preferredcodec": "mp3",
75
+ "preferredquality": "192",
76
+ }
77
+ ],
78
+ "quiet": True,
79
+ "noprogress": True,
80
+ }
81
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
82
+ ydl.download([f"https://www.youtube.com/watch?v={video_id}"])
83
+ return outfile
84
+
85
+
86
+ def transcribe_with_openai(mp3_path: str) -> str:
87
+ client = OpenAI(api_key=OPENAI_API_KEY)
88
+ with open(mp3_path, "rb") as f:
89
+ transcript = client.audio.transcriptions.create(
90
+ model="gpt-4o-transcribe",
91
+ file=f,
92
+ response_format="text",
93
+ )
94
+ return str(transcript)
95
+
96
+
97
+ def openai_translate(text: str, target_lang_code: str) -> str:
98
+ client = OpenAI(api_key=OPENAI_API_KEY)
99
+ try:
100
+ lang_name = Language.make(target_lang_code).display_name("en")
101
+ except Exception:
102
+ lang_name = target_lang_code
103
+ sys_prompt = (
104
+ "You are a professional media translator. Translate the user's transcript into the target language. "
105
+ "Keep meaning, style, and names. Remove timestamps. Output ONLY the translation without commentary."
106
+ )
107
+ msg = [
108
+ {"role": "system", "content": sys_prompt},
109
+ {
110
+ "role": "user",
111
+ "content": f"Target language: {lang_name} ({target_lang_code}).\n\nTranscript to translate:\n{text}",
112
+ },
113
+ ]
114
+ resp = client.chat.completions.create(model="gpt-4o-mini", messages=msg, temperature=0.2)
115
+ return resp.choices[0].message.content.strip()
116
+
117
+
118
+ VOICE_MAP = {
119
+ "en": "en-US-JennyNeural",
120
+ "es": "es-ES-ElviraNeural",
121
+ "fr": "fr-FR-DeniseNeural",
122
+ "de": "de-DE-KatjaNeural",
123
+ "hi": "hi-IN-SwaraNeural",
124
+ "ar": "ar-EG-SalmaNeural",
125
+ "pt": "pt-BR-FranciscaNeural",
126
+ "ru": "ru-RU-SvetlanaNeural",
127
+ "ja": "ja-JP-NanamiNeural",
128
+ "ko": "ko-KR-SunHiNeural",
129
+ "zh": "zh-CN-XiaoxiaoNeural",
130
+ "ha": "en-US-JennyNeural",
131
+ }
132
+
133
+
134
+ async def synthesize_edge_tts(text: str, lang_code: str) -> str:
135
+ voice = VOICE_MAP.get(lang_code.split("-")[0], VOICE_MAP["en"])
136
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
137
+ tmp.close()
138
+ communicate = edge_tts.Communicate(text, voice=voice)
139
+ with open(tmp.name, "wb") as f:
140
+ async for chunk in communicate.stream():
141
+ if chunk["type"] == "audio":
142
+ f.write(chunk["data"])
143
+ return tmp.name
144
+
145
+
146
+ def process(url_or_id: str, target_lang: str, do_tts: bool):
147
+ vid = extract_video_id(url_or_id)
148
+ if not vid:
149
+ return gr.update(value="", visible=True), gr.update(value=None, visible=do_tts), "Invalid YouTube ID/URL."
150
+ text, src_lang = get_transcript_text(vid)
151
+ if (not text or len(text) < 20) and OPENAI_API_KEY:
152
+ try:
153
+ mp3 = download_audio(vid)
154
+ text = transcribe_with_openai(mp3)
155
+ except Exception as e:
156
+ return gr.update(value="", visible=True), gr.update(value=None, visible=do_tts), f"Transcription failed: {e}"
157
+ elif not text:
158
+ return gr.update(value="", visible=True), gr.update(value=None, visible=do_tts), (
159
+ "No captions found and no OPENAI_API_KEY provided."
160
+ )
161
+ try:
162
+ translated = openai_translate(text, target_lang) if OPENAI_API_KEY else text
163
+ warn = "" if OPENAI_API_KEY else "No translation (API key missing)."
164
+ except Exception as e:
165
+ return gr.update(value="", visible=True), gr.update(value=None, visible=do_tts), f"Translation failed: {e}"
166
+ audio_path = None
167
+ if do_tts and translated:
168
+ try:
169
+ audio_path = asyncio.run(synthesize_edge_tts(translated, target_lang))
170
+ except RuntimeError:
171
+ loop = asyncio.get_event_loop()
172
+ audio_path = loop.run_until_complete(synthesize_edge_tts(translated, target_lang))
173
+ return gr.update(value=translated, visible=True), gr.update(value=audio_path, visible=do_tts), warn
174
+
175
+
176
+ with gr.Blocks(theme=gr.themes.Soft(), title="YouTube Translator & Speaker") as demo:
177
+ gr.Markdown("# YouTube Translator and Speaker")
178
+ with gr.Row():
179
+ with gr.Column(scale=1):
180
+ video_in = gr.Textbox(label="YouTube URL or Video ID")
181
+ lang = gr.Dropdown(label="Target Language", choices=["en","es","fr","de","hi","ar","pt","ru","ja","ko","zh","ha"], value="en")
182
+ do_tts = gr.Checkbox(label="Generate Speech", value=True)
183
+ submit = gr.Button("Submit", variant="primary")
184
+ clear = gr.Button("Clear")
185
+ with gr.Column(scale=1):
186
+ out_text = gr.Textbox(label="Translated Text", lines=14)
187
+ out_audio = gr.Audio(label="Speech (MP3)", type="filepath")
188
+ status = gr.Markdown(visible=True)
189
+
190
+ submit.click(fn=process, inputs=[video_in, lang, do_tts], outputs=[out_text, out_audio, status])
191
+ clear.click(lambda: ("", None, ""), outputs=[out_text, out_audio, status])
192
+
193
+ if __name__ == "__main__":
194
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==4.44.0
2
+ youtube-transcript-api==0.6.2
3
+ yt_dlp==2025.1.26
4
+ openai>=1.40.0
5
+ edge-tts==6.1.11
6
+ langcodes==3.4.0
7
+ python-dotenv==1.0.1