adelevett commited on
Commit
bab49e8
·
verified ·
1 Parent(s): 4fc3ae4

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +9 -6
  2. app.py +376 -0
  3. packages.txt +2 -0
  4. requirements.txt +12 -0
README.md CHANGED
@@ -1,14 +1,17 @@
1
  ---
2
  title: Flashcard2Audio
3
- emoji: 👀
4
- colorFrom: green
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 6.5.1
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: Add audio files to existing Anki decks
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
  title: Flashcard2Audio
3
+ emoji: 🎴
4
+ colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.0.0
8
  app_file: app.py
9
  pinned: false
10
+ python_version: "3.10"
 
11
  ---
12
 
13
+ # Flashcard2Audio
14
+
15
+ Offline Neural TTS Audio Generator for Anki Flashcards
16
+
17
+ Supports CSV and APKG input with smart media preservation.
app.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import genanki
4
+ import pocket_tts
5
+ import tempfile
6
+ import os
7
+ import shutil
8
+ import random
9
+ import zipfile
10
+ import sqlite3
11
+ import re
12
+ import time
13
+ import json
14
+ from pathlib import Path
15
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
+ from pydub import AudioSegment
17
+
18
+ # --- Configuration ---
19
+ MAX_WORKERS = 2 # Keep low for HF Spaces (CPU/RAM constraint)
20
+ PREVIEW_LIMIT = 100 # UI safety cap
21
+ PROGRESS_THROTTLE = 1.0 # Seconds between UI updates
22
+
23
+ # --- Helpers ---
24
+
25
+ def clean_text_for_tts(text):
26
+ """Deep cleaning for TTS input only."""
27
+ if pd.isna(text): return ""
28
+ text = str(text)
29
+ # Remove HTML tags
30
+ text = re.sub(re.compile('<.*?>'), '', text)
31
+ # Remove Anki sound tags
32
+ text = re.sub(r'\[sound:.*?\]', '', text)
33
+ # Remove mustache templates
34
+ text = re.sub(r'\{\{.*?\}\}', '', text)
35
+ return text.strip()
36
+
37
+ def has_existing_audio(text):
38
+ """Check if text already contains an Anki sound tag."""
39
+ if pd.isna(text): return False
40
+ return bool(re.search(r'\[sound:.*?\]', str(text)))
41
+
42
+ print("Loading TTS Model...")
43
+ try:
44
+ TTS_MODEL = pocket_tts.load_model()
45
+ print("Model Loaded Successfully.")
46
+ except Exception as e:
47
+ print(f"CRITICAL ERROR loading model: {e}")
48
+ TTS_MODEL = None
49
+
50
+ def wav_to_mp3(src_wav, dst_mp3):
51
+ AudioSegment.from_wav(src_wav).export(dst_mp3, format="mp3", bitrate="64k")
52
+
53
+ def generate_audio_for_row(q_text, a_text, idx, tmpdir, mode):
54
+ """
55
+ Generates audio. Returns (path_q, path_a).
56
+ Returns 'SKIP' if audio exists and we are preserving it.
57
+ """
58
+ q_out, a_out = None, None
59
+
60
+ # Logic for handling modes
61
+ # Mode 0: Smart Fill (Preserve Existing)
62
+ # Mode 1: Overwrite All
63
+
64
+ overwrite = (mode == "Generate all new audio (Overwrite)")
65
+
66
+ # --- Question Processing ---
67
+ if not overwrite and has_existing_audio(q_text):
68
+ q_out = "SKIP"
69
+ else:
70
+ q_wav = os.path.join(tmpdir, f"q_{idx}.wav")
71
+ try:
72
+ clean = clean_text_for_tts(q_text)
73
+ if clean and TTS_MODEL:
74
+ pocket_tts.generate_to_file(TTS_MODEL, clean, q_wav)
75
+ q_out = q_wav
76
+ else:
77
+ AudioSegment.silent(duration=500).export(q_wav, format="wav")
78
+ q_out = q_wav
79
+ except Exception as e:
80
+ print(f"TTS Error Q row {idx}: {e}")
81
+ # Fallback to silence to keep deck integrity
82
+ AudioSegment.silent(duration=500).export(q_wav, format="wav")
83
+ q_out = q_wav
84
+
85
+ # --- Answer Processing ---
86
+ if not overwrite and has_existing_audio(a_text):
87
+ a_out = "SKIP"
88
+ else:
89
+ a_wav = os.path.join(tmpdir, f"a_{idx}.wav")
90
+ try:
91
+ clean = clean_text_for_tts(a_text)
92
+ if clean and TTS_MODEL:
93
+ pocket_tts.generate_to_file(TTS_MODEL, clean, a_wav)
94
+ a_out = a_wav
95
+ else:
96
+ AudioSegment.silent(duration=500).export(a_wav, format="wav")
97
+ a_out = a_wav
98
+ except Exception as e:
99
+ print(f"TTS Error A row {idx}: {e}")
100
+ AudioSegment.silent(duration=500).export(a_wav, format="wav")
101
+ a_out = a_wav
102
+
103
+ return q_out, a_out
104
+
105
+ def parse_file(file_obj):
106
+ if file_obj is None:
107
+ return None, None, None, "No file uploaded", "", None
108
+
109
+ ext = Path(file_obj.name).suffix.lower()
110
+ df = pd.DataFrame()
111
+ extract_root = None # Directory where we keep original media
112
+ has_media = False
113
+
114
+ try:
115
+ if ext == ".csv":
116
+ df = pd.read_csv(file_obj.name)
117
+ if len(df.columns) < 2:
118
+ df = pd.read_csv(file_obj.name, header=None)
119
+ if len(df.columns) < 2:
120
+ return None, None, None, "CSV error: Need 2 columns", "", None
121
+
122
+ df = df.iloc[:, :2]
123
+ df.columns = ["Question", "Answer"]
124
+
125
+ elif ext == ".apkg" or ext == ".zip":
126
+ # Extract to a PERSISTENT temp dir (passed to state)
127
+ extract_root = tempfile.mkdtemp()
128
+ with zipfile.ZipFile(file_obj.name, 'r') as z:
129
+ z.extractall(extract_root)
130
+
131
+ # Check for existing media (rough check)
132
+ media_dir = os.path.join(extract_root, "media")
133
+ has_media = os.path.exists(media_dir) or any(f.isdigit() for f in os.listdir(extract_root))
134
+
135
+ col_path = os.path.join(extract_root, "collection.anki2")
136
+ if not os.path.exists(col_path):
137
+ shutil.rmtree(extract_root)
138
+ return None, None, None, "Invalid APKG: No collection.anki2", "", None
139
+
140
+ conn = sqlite3.connect(col_path)
141
+ cur = conn.cursor()
142
+ cur.execute("SELECT flds FROM notes")
143
+ rows = cur.fetchall()
144
+
145
+ data = []
146
+ for r in rows:
147
+ flds = r[0].split('\x1f')
148
+ q = flds[0] if len(flds) > 0 else ""
149
+ a = flds[1] if len(flds) > 1 else ""
150
+ data.append([q, a])
151
+
152
+ df = pd.DataFrame(data, columns=["Question", "Answer"])
153
+ conn.close()
154
+
155
+ else:
156
+ return None, None, None, "Unsupported file type", "", None
157
+
158
+ df = df.fillna("")
159
+
160
+ msg = f"✅ Loaded {len(df)} cards."
161
+ if has_media:
162
+ msg += " 🎵 Existing media detected."
163
+
164
+ return df, has_media, df.head(PREVIEW_LIMIT), msg, estimate_time(len(df)), extract_root
165
+
166
+ except Exception as e:
167
+ if extract_root and os.path.exists(extract_root):
168
+ shutil.rmtree(extract_root)
169
+ return None, None, None, f"Error: {str(e)}", "", None
170
+
171
+ def estimate_time(num_cards):
172
+ """Rough estimate: 2s per card"""
173
+ seconds = num_cards * 2.0
174
+ if seconds < 60: return f"~{int(seconds)}s"
175
+ return f"~{int(seconds/60)} min"
176
+
177
+ def process_dataframe(df_full, search_term, extract_root, mode, progress=gr.Progress()):
178
+ if df_full is None or len(df_full) == 0:
179
+ return None, "No data"
180
+
181
+ # Filter logic
182
+ if search_term:
183
+ mask = df_full.astype(str).apply(lambda x: x.str.contains(search_term, case=False)).any(axis=1)
184
+ df = df_full[mask]
185
+ else:
186
+ df = df_full
187
+
188
+ if len(df) == 0:
189
+ return None, "No matching cards"
190
+
191
+ # Setup
192
+ work_dir = tempfile.mkdtemp()
193
+ media_files = []
194
+
195
+ try:
196
+ # --- Media Preservation Logic ---
197
+ if extract_root:
198
+ media_map_path = os.path.join(extract_root, "media")
199
+ if os.path.exists(media_map_path) and os.path.getsize(media_map_path) > 0:
200
+ try:
201
+ with open(media_map_path, 'r') as f:
202
+ # Fix: Handle potentially malformed JSON gracefully
203
+ content = f.read().strip()
204
+ if content:
205
+ media_map = json.loads(content) # {"0": "my_audio.mp3", ...}
206
+
207
+ # Rename files in extract_root back to original names
208
+ for k, v in media_map.items():
209
+ src = os.path.join(extract_root, k)
210
+ dst = os.path.join(extract_root, v)
211
+ if os.path.exists(src):
212
+ # Rename enables genanki to find them by name
213
+ os.rename(src, dst)
214
+ media_files.append(dst)
215
+ else:
216
+ print("Warning: Media map file is empty.")
217
+ except Exception as e:
218
+ print(f"Warning: Could not restore existing media: {e}")
219
+
220
+ # --- Genanki Setup ---
221
+ model_id = random.randrange(1 << 30, 1 << 31)
222
+ my_model = genanki.Model(
223
+ model_id, 'PocketTTS Model',
224
+ fields=[{'name': 'Question'}, {'name': 'Answer'}],
225
+ templates=[{
226
+ 'name': 'Card 1',
227
+ 'qfmt': '{{Question}}<br>{{AudioQ}}',
228
+ 'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}<br>{{AudioA}}',
229
+ }])
230
+ my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31), 'Pocket TTS Deck')
231
+
232
+ # --- Execution ---
233
+ total = len(df)
234
+ completed = 0
235
+ last_update_time = 0
236
+
237
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as exe:
238
+ futures = {}
239
+ for idx, row in df.iterrows():
240
+ f = exe.submit(generate_audio_for_row, str(row['Question']), str(row['Answer']), idx, work_dir, mode)
241
+ futures[f] = idx
242
+
243
+ for future in as_completed(futures):
244
+ idx = futures[future]
245
+ try:
246
+ q_res, a_res = future.result()
247
+
248
+ # --- Field Construction (Corrected) ---
249
+ q_original = str(df.iloc[idx]['Question'])
250
+ q_field = q_original
251
+
252
+ # Update Question
253
+ if q_res and q_res != "SKIP":
254
+ q_mp3 = str(Path(q_res).with_suffix('.mp3'))
255
+ wav_to_mp3(q_res, q_mp3)
256
+ os.remove(q_res) # clean wav
257
+ media_files.append(q_mp3)
258
+
259
+ # Remove OLD sound tags first to avoid duplicates
260
+ q_field = re.sub(r'\[sound:.*?\]', '', q_field)
261
+ q_field = q_field.strip() + f"<br>[sound:{os.path.basename(q_mp3)}]"
262
+
263
+ # Update Answer
264
+ a_original = str(df.iloc[idx]['Answer'])
265
+ a_field = a_original
266
+
267
+ if a_res and a_res != "SKIP":
268
+ a_mp3 = str(Path(a_res).with_suffix('.mp3'))
269
+ wav_to_mp3(a_res, a_mp3)
270
+ os.remove(a_res) # clean wav
271
+ media_files.append(a_mp3)
272
+
273
+ # Remove OLD sound tags first
274
+ a_field = re.sub(r'\[sound:.*?\]', '', a_field)
275
+ a_field = a_field.strip() + f"<br>[sound:{os.path.basename(a_mp3)}]"
276
+
277
+ # Add Note
278
+ note = genanki.Note(
279
+ model=my_model,
280
+ fields=[q_field, a_field]
281
+ )
282
+ my_deck.add_note(note)
283
+
284
+ except Exception as e:
285
+ print(f"Row {idx} failed: {e}")
286
+
287
+ # --- Throttled Progress ---
288
+ completed += 1
289
+ current_time = time.time()
290
+ if completed == total or (current_time - last_update_time) > PROGRESS_THROTTLE:
291
+ progress(completed / total, desc=f"Processed {completed}/{total}")
292
+ last_update_time = current_time
293
+
294
+ # --- Package ---
295
+ package = genanki.Package(my_deck)
296
+ # Deduplicate media files list
297
+ package.media_files = list(set(media_files))
298
+
299
+ raw_out = os.path.join(work_dir, "output.apkg")
300
+ package.write_to_file(raw_out)
301
+
302
+ final_out = os.path.join(tempfile.gettempdir(), f"pocket_deck_{random.randint(1000,9999)}.apkg")
303
+ shutil.copy(raw_out, final_out)
304
+
305
+ return final_out, f"✅ Done! Packaged {len(package.media_files)} audio files."
306
+
307
+ except Exception as e:
308
+ return None, f"Critical Error: {str(e)}"
309
+
310
+ finally:
311
+ # --- Guaranteed Cleanup ---
312
+ if os.path.exists(work_dir):
313
+ shutil.rmtree(work_dir)
314
+ # Also clean up the input extraction root if it exists
315
+ if extract_root and os.path.exists(extract_root):
316
+ shutil.rmtree(extract_root)
317
+
318
+ # --- UI ---
319
+
320
+ with gr.Blocks(title="Pocket TTS Anki") as app:
321
+ gr.Markdown("## 🎴 Pocket TTS Anki Generator")
322
+ gr.Markdown("Offline Neural Audio. Supports CSV and APKG (smart media preservation).")
323
+
324
+ # State variables
325
+ full_df_state = gr.State()
326
+ extract_root_state = gr.State() # Holds path to unzipped APKG
327
+
328
+ with gr.Row():
329
+ file_input = gr.File(label="Upload (CSV/APKG)", file_types=[".csv", ".apkg", ".zip"])
330
+ status = gr.Textbox(label="Status", interactive=False)
331
+ eta_box = gr.Textbox(label="Est. Time", interactive=False)
332
+
333
+ with gr.Row():
334
+ search_box = gr.Textbox(label="Filter (Optional)", placeholder="Process subset...")
335
+
336
+ # New 3-Way Toggle
337
+ mode_radio = gr.Radio(
338
+ choices=[
339
+ "Smart Fill (Preserve Existing)",
340
+ "Generate all new audio (Overwrite)",
341
+ "Only generate missing (Same as Smart Fill)"
342
+ ],
343
+ value="Smart Fill (Preserve Existing)",
344
+ label="Generation Mode"
345
+ )
346
+
347
+ preview_table = gr.Dataframe(label="Preview (First 100)", interactive=False, height=300)
348
+
349
+ with gr.Row():
350
+ btn = gr.Button("🚀 Generate Deck", variant="primary")
351
+ dl = gr.File(label="Download")
352
+
353
+ result_lbl = gr.Textbox(label="Result", interactive=False)
354
+
355
+ def on_upload(file):
356
+ # Returns: df, has_media, preview, msg, eta, extract_path
357
+ df, _, preview, msg, eta, ext_path = parse_file(file)
358
+ return df, preview, msg, eta, ext_path
359
+
360
+ file_input.upload(on_upload, inputs=file_input,
361
+ outputs=[full_df_state, preview_table, status, eta_box, extract_root_state])
362
+
363
+ def on_search(term, df):
364
+ if df is None: return None
365
+ if not term: return df.head(PREVIEW_LIMIT)
366
+ mask = df.astype(str).apply(lambda x: x.str.contains(term, case=False)).any(axis=1)
367
+ return df[mask].head(PREVIEW_LIMIT)
368
+
369
+ search_box.change(on_search, inputs=[search_box, full_df_state], outputs=preview_table)
370
+
371
+ btn.click(process_dataframe,
372
+ inputs=[full_df_state, search_box, extract_root_state, mode_radio],
373
+ outputs=[dl, result_lbl])
374
+
375
+ if __name__ == "__main__":
376
+ app.queue(max_size=2).launch(server_name="0.0.0.0", server_port=7860)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ libsndfile1
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PyTorch installation with platform-specific versions:
2
+
3
+ --extra-index-url https://download.pytorch.org/whl/cpu
4
+
5
+ # Linux (HF Spaces) - use CPU builds from extra index
6
+ torch>=2.5.0
7
+ gradio>=4.0.0
8
+ pandas
9
+ genanki
10
+ pydub
11
+ # Pocket TTS is not on PyPI - must install from GitHub
12
+ git+https://github.com/kyutai-labs/pocket-tts.git